Bug Summary

File:nnc/ccv_nnc_symbolic_graph_backward.c
Warning:line 724, column 4
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_symbolic_graph_backward.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -resource-dir /usr/local/lib/clang/14.0.0 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/9/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fblocks -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/buildslave/public_html/analyze/2022-06-22-151334-490440-1 -x c ccv_nnc_symbolic_graph_backward.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_symbolic_graph.h"
6
7// MARK - Level-3.5 API
8
9typedef struct {
10 int f_wrt; // Check if both f_symbols and wrt_symbols flow through this node.
11 ccv_array_t* outgoings; // backward traverse nodes.
12 uint64_t* input_bitmasks;
13 int input_bitmask_size;
14 uint64_t* output_bitmasks;
15 int output_bitmask_size;
16} ccv_nnc_graph_backward_info_t;
17
18typedef struct {
19 int input_size;
20 int* inputs;
21 int output;
22 ccv_array_t* outgoings;
23 float value;
24 ccv_nnc_graph_exec_symbol_t symbol;
25} ccv_nnc_sum_or_set_graph_exec_symbol_t;
26
27typedef struct {
28 int input_size;
29 int output_size;
30 int* inputs;
31 int* outputs;
32 ccv_array_t* outgoings;
33 ccv_nnc_cmd_t cmd;
34 ccv_nnc_graph_exec_symbol_t symbol;
35} ccv_nnc_autograd_graph_exec_symbol_t;
36
37typedef struct {
38 int d; // The pointer to the forward level object.
39 int alias_ref; // The alias ref to itself (autograd_tensor_symbols array).
40 int flags; // Flags for this symbol.
41 ccv_nnc_tensor_symbol_t symbol;
42} ccv_nnc_autograd_tensor_symbol_t;
43
44typedef struct {
45 int d; // The tensor symbol ref.
46 int x; // The exec symbol ref.
47 ccv_array_t* exec_registry; // Additional exec symbol refs, similar to x, only useful for aliasing.
48 ccv_array_t* alias_registry; // int point to all the alias (if this is not an alias). The alias is the object in autograd_tensor_symbols, you need another level of indirection to get the actual forward level alias.
49} ccv_nnc_tensor_ref_t;
50
51typedef struct {
52 int c; // The start non-accumulated version.
53 ccv_array_t* ref_version; // tensor ref point to the reverse tensor symbol.
54} ccv_nnc_autograd_tensor_version_t;
55
56typedef struct {
57 int d;
58 int alias_ref;
59} ccv_nnc_sum_variable_t;
60
61// This method tries to figure out if a set of aliases can cover the whole tensor dim.
62// This is not a precise implementation though. The requirement is to answer this question
63// with a given memory constraint, therefore, only allow up to 65536 different tensor locations.
64// If you have more than that, it will assume that it doesn't have fully assigned aliases,
65// and will return 0.
66
67// Return 1 if inserted successfully.
68static inline int _ccv_nnc_try_mix(int* const md, const int ins, const int c)
69{
70 if (!c)
71 {
72 md[0] = ins;
73 return 1;
74 }
75 int ll = 0, uu = c - 1;
76 int mm;
77 do {
78 mm = ll + ((uu - ll) >> 1);
79 if (ins == md[mm])
80 return 0;
81 else if (ins < md[mm])
82 uu = mm - 1;
83 else if (ins > md[mm])
84 ll = mm + 1;
85 } while (ll <= uu);
86 if (ll < c)
87 memmove(md + ll + 1, md + ll, sizeof(int) * (c - ll));
88 md[ll] = ins;
89 return 1;
90}
91
92static inline int _ccv_nnc_mix_idx(const int* const md, const int ins, const int c)
93{
94 if (c <= 1)
95 return 0;
96 int ll = 0, uu = c - 1;
97 int mm;
98 do {
99 mm = ll + ((uu - ll) >> 1);
100 if (ins == md[mm])
101 return mm;
102 else if (ins < md[mm])
103 uu = mm - 1;
104 else if (ins > md[mm])
105 ll = mm + 1;
106 } while (ll <= uu);
107 assert(0 && "Shouldn't reach here")((void) sizeof ((0 && "Shouldn't reach here") ? 1 : 0
), __extension__ ({ if (0 && "Shouldn't reach here") ;
else __assert_fail ("0 && \"Shouldn't reach here\"",
"ccv_nnc_symbolic_graph_backward.c", 107, __extension__ __PRETTY_FUNCTION__
); }))
;
108 return -1;
109}
110
111static inline void _ccv_nnc_try_set_pix_0(const int* const ofs, const int* const dim, const int* const tensor_dim, int* const* const scmd, const int* const cube_dim, const int* const cube_step, uint32_t* const cube, int offset)
112{
113 const int s = (ofs[0] == 0) ? 0 : _ccv_nnc_mix_idx(scmd[0], ofs[0], cube_dim[0]) + 1;
114 const int d = ((ofs[0] + dim[0] == tensor_dim[0]) ? cube_dim[0] : _ccv_nnc_mix_idx(scmd[0], ofs[0] + ccv_max(1, dim[0])({ typeof (1) _a = (1); typeof (dim[0]) _b = (dim[0]); (_a >
_b) ? _a : _b; })
, cube_dim[0])) + 1;
115 assert(s >= 0 && d > s)((void) sizeof ((s >= 0 && d > s) ? 1 : 0), __extension__
({ if (s >= 0 && d > s) ; else __assert_fail (
"s >= 0 && d > s", "ccv_nnc_symbolic_graph_backward.c"
, 115, __extension__ __PRETTY_FUNCTION__); }))
;
116 int i;
117 for (i = s; i < d; i++)
118 // Fill this pix. I can make this faster by loop through full ones (divided by 8), but too lazy.
119 cube[(offset + i) >> 5] |= (1u << ((offset + i) & 0x1f));
120}
121
122static inline void _ccv_nnc_try_set_pix_1(const int* const ofs, const int* const dim, const int* const tensor_dim, int* const* const scmd, const int* const cube_dim, const int* const cube_step, uint32_t* const cube, int offset)
123{
124 const int s0 = (ofs[0] == 0) ? 0 : _ccv_nnc_mix_idx(scmd[0], ofs[0], cube_dim[0]) + 1;
125 const int d0 = ((ofs[0] + dim[0] == tensor_dim[0]) ? cube_dim[0] : _ccv_nnc_mix_idx(scmd[0], ofs[0] + ccv_max(1, dim[0])({ typeof (1) _a = (1); typeof (dim[0]) _b = (dim[0]); (_a >
_b) ? _a : _b; })
, cube_dim[0])) + 1;
126 assert(s0 >= 0 && d0 > s0)((void) sizeof ((s0 >= 0 && d0 > s0) ? 1 : 0), __extension__
({ if (s0 >= 0 && d0 > s0) ; else __assert_fail
("s0 >= 0 && d0 > s0", "ccv_nnc_symbolic_graph_backward.c"
, 126, __extension__ __PRETTY_FUNCTION__); }))
;
127 const int s1 = (ofs[1] == 0) ? 0 : _ccv_nnc_mix_idx(scmd[1], ofs[1], cube_dim[1]) + 1;
128 const int d1 = ((ofs[1] + dim[1] == tensor_dim[1]) ? cube_dim[1] : _ccv_nnc_mix_idx(scmd[1], ofs[1] + ccv_max(1, dim[1])({ typeof (1) _a = (1); typeof (dim[1]) _b = (dim[1]); (_a >
_b) ? _a : _b; })
, cube_dim[1])) + 1;
129 assert(s1 >= 0 && d1 > s1)((void) sizeof ((s1 >= 0 && d1 > s1) ? 1 : 0), __extension__
({ if (s1 >= 0 && d1 > s1) ; else __assert_fail
("s1 >= 0 && d1 > s1", "ccv_nnc_symbolic_graph_backward.c"
, 129, __extension__ __PRETTY_FUNCTION__); }))
;
130 int i, j;
131 const int step1 = cube_step[1];
132 if (step1 == d0 - s0)
133 {
134 // Faster one, we can simply loop through.
135 for (i = s1 * step1; i < d1 * step1; i++)
136 cube[(offset + i) >> 5] |= (1u << ((offset + i) & 0x1f));
137 } else {
138 offset += s1 * step1;
139 // There are gaps, slow one.
140 for (i = s1; i < d1; i++, offset += step1)
141 for (j = s0; j < d0; j++)
142 cube[(offset + j) >> 5] |= (1u << ((offset + j) & 0x1f));
143 }
144}
145
146static inline void _ccv_nnc_try_set_pix(const int* const ofs, const int* const dim, const int* const tensor_dim, int* const* const scmd, const int* const cube_dim, const int* const cube_step, uint32_t* const cube, int offset, const int dim_idx)
147{
148 switch (dim_idx)
149 {
150 case 1:
151 _ccv_nnc_try_set_pix_1(ofs, dim, tensor_dim, scmd, cube_dim, cube_step, cube, offset);
152 return;
153 case 0:
154 _ccv_nnc_try_set_pix_0(ofs, dim, tensor_dim, scmd, cube_dim, cube_step, cube, offset);
155 return;
156 }
157 int i;
158 const int s = (ofs[dim_idx] == 0) ? 0 : _ccv_nnc_mix_idx(scmd[dim_idx], ofs[dim_idx], cube_dim[dim_idx]) + 1;
159 const int d = ((ofs[dim_idx] + dim[dim_idx] == tensor_dim[dim_idx]) ? cube_dim[dim_idx] : _ccv_nnc_mix_idx(scmd[dim_idx], ofs[dim_idx] + ccv_max(1, dim[dim_idx])({ typeof (1) _a = (1); typeof (dim[dim_idx]) _b = (dim[dim_idx
]); (_a > _b) ? _a : _b; })
, cube_dim[dim_idx])) + 1;
160 assert(s >= 0 && d > s)((void) sizeof ((s >= 0 && d > s) ? 1 : 0), __extension__
({ if (s >= 0 && d > s) ; else __assert_fail (
"s >= 0 && d > s", "ccv_nnc_symbolic_graph_backward.c"
, 160, __extension__ __PRETTY_FUNCTION__); }))
;
161 for (i = s; i < d; i++)
162 _ccv_nnc_try_set_pix(ofs, dim, tensor_dim, scmd, cube_dim, cube_step, cube, offset + i * cube_step[dim_idx], dim_idx - 1);
163}
164
165static int _ccv_nnc_tensor_ref_fully_assigned_with_aliases(const ccv_nnc_tensor_ref_t* const tensor_ref, const ccv_array_t* const autograd_tensor_symbols, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info)
166{
167 // Only work with tensor_ref of aliases.
168 assert(tensor_ref->alias_registry)((void) sizeof ((tensor_ref->alias_registry) ? 1 : 0), __extension__
({ if (tensor_ref->alias_registry) ; else __assert_fail (
"tensor_ref->alias_registry", "ccv_nnc_symbolic_graph_backward.c"
, 168, __extension__ __PRETTY_FUNCTION__); }))
;
169 const ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref->
d)))
;
170 assert(tensor_symbol_info[autograd->d].alias_ref == 0)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref
== 0) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd
->d].alias_ref == 0) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref == 0"
, "ccv_nnc_symbolic_graph_backward.c", 170, __extension__ __PRETTY_FUNCTION__
); }))
;
171 const int* tensor_dim = tensor_symbol_info[autograd->d].info.dim;
172 const int tensor_count = ccv_nnc_dimension_count(tensor_dim);
173 int i, j;
174 for (i = 0; i < tensor_ref->alias_registry->rnum; i++)
175 {
176 const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, i)((void*)(((char*)((tensor_ref->alias_registry)->data)) +
(size_t)(tensor_ref->alias_registry)->rsize * (size_t)
(i)))
;
177 assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1
: 0), __extension__ ({ if (d < autograd_tensor_symbols->
rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum"
, "ccv_nnc_symbolic_graph_backward.c", 177, __extension__ __PRETTY_FUNCTION__
); }))
;
178 const ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(d)))
;
179 assert(tensor_symbol_info[autograd->d].alias_ref)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref
) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd->
d].alias_ref) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref"
, "ccv_nnc_symbolic_graph_backward.c", 179, __extension__ __PRETTY_FUNCTION__
); }))
;
180 const int* inc = tensor_symbol_info[autograd->d].inc;
181 // If this is just reshaped (i.e., dimension is the same, and inc covers the whole). We have fully assigned.
182 if (memcmp(inc, tensor_symbol_info[autograd->d].info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) == 0 &&
183 ccv_nnc_dimension_count(inc) == tensor_count)
184 return 1;
185 // Otherwise if inc doesn't match original dim, it is not covered.
186 if (memcmp(inc, tensor_dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) != 0)
187 return 0;
188 }
189 /* We need a solid cube (potentially hyper dimensional) to compute if there are overlaps.
190 * To make this cube as small as possible, we need to map the actual tensor dimension
191 * (therefore, we don't actually allocate the whole tensor to compute overlaps) to a smaller
192 * cube given the ofs and dim size of its aliases.
193 *
194 * The following code generated the dimension mapping (using scratch space) with binary search + insertion
195 * and then we fill the cube with a given tensor alias's dimensional information (ofs, dim).
196 * Afterwards, we simply need to check if the cube is totally filled up to know if this tensor
197 * is fully assigned with its aliases (if that is the case, we can skip zeroing for this tensor).
198 *
199 * There are several restrictions though to make this faster: 1). I cannot handle any cube that all side
200 * lengths combined larger than 1023 (scm only have 1024 scratch space). 2). I cannot handle any cube
201 * that the total volume is larger than 2048 * 8 (I only allocate 2K on stack for this).
202 * */
203 int scm[1024]; // Having 1024 int scratch space for mapping dimensions. (Or sparse coordinate mapping).
204 int cube_dim[CCV_NNC_MAX_DIM_ALLOC(12)] = {}; // Mapping dimension size.
205 int cube_size = 1;
206 int* scmptr = scm;
207 for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC(12) && tensor_dim[i]; i++)
208 {
209 int head = 0, tail = 0; // Note that we touched both the head and tail (otherwise this dimension is not fully covered).
210 int len = 0;
211 for (j = 0; j < tensor_ref->alias_registry->rnum; j++)
212 {
213 const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, j)((void*)(((char*)((tensor_ref->alias_registry)->data)) +
(size_t)(tensor_ref->alias_registry)->rsize * (size_t)
(j)))
;
214 assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1
: 0), __extension__ ({ if (d < autograd_tensor_symbols->
rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum"
, "ccv_nnc_symbolic_graph_backward.c", 214, __extension__ __PRETTY_FUNCTION__
); }))
;
215 const ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(d)))
;
216 assert(tensor_symbol_info[autograd->d].alias_ref)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref
) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd->
d].alias_ref) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref"
, "ccv_nnc_symbolic_graph_backward.c", 216, __extension__ __PRETTY_FUNCTION__
); }))
;
217 const int* ofs = tensor_symbol_info[autograd->d].ofs;
218 const int* dim = tensor_symbol_info[autograd->d].info.dim;
219 head = head || (ofs[i] == 0);
220 tail = tail || (ofs[i] + ccv_max(1, dim[i])({ typeof (1) _a = (1); typeof (dim[i]) _b = (dim[i]); (_a >
_b) ? _a : _b; })
== tensor_dim[i]);
221 if (ofs[i] != 0)
222 len += _ccv_nnc_try_mix(scmptr, ofs[i], len);
223 if (scmptr - scm + len >= 1024) // Cannot handle that much, abort.
224 return 0;
225 if (ofs[i] + ccv_max(1, dim[i])({ typeof (1) _a = (1); typeof (dim[i]) _b = (dim[i]); (_a >
_b) ? _a : _b; })
< tensor_dim[i])
226 len += _ccv_nnc_try_mix(scmptr, ofs[i] + ccv_max(1, dim[i])({ typeof (1) _a = (1); typeof (dim[i]) _b = (dim[i]); (_a >
_b) ? _a : _b; })
, len);
227 if (scmptr - scm + len >= 1024) // Cannot handle that much, abort.
228 return 0;
229 }
230 if (!head || !tail)
231 return 0;
232 cube_size *= (len + 1);
233 cube_dim[i] = len;
234 scmptr += len; // Moving to next level.
235 }
236 // The cube map is too large, cannot do the computation, assume it is not fully assigned.
237 if (cube_size > 2048 * 8)
238 return 0;
239 // binary map to see if it fills up.
240 uint32_t cube[(cube_size + 31) >> 5];
241 memset(cube, 0, sizeof(uint32_t) * ((cube_size + 31) >> 5));
242 int* scmd[CCV_NNC_MAX_DIM_ALLOC(12)] = {}; // Sparse coordinate map at dimension x.
243 int cube_step[CCV_NNC_MAX_DIM_ALLOC(12)] = {};
244 for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC(12) && tensor_dim[i]; i++)
245 {
246 cube_step[i] = (i > 0) ? cube_step[i - 1] * (cube_dim[i - 1] + 1) : 1;
247 scmd[i] = (i > 0) ? scmd[i - 1] + cube_dim[i - 1] : scm;
248 }
249 const int max_dim = i;
250 for (i = 0; i < tensor_ref->alias_registry->rnum; i++)
251 {
252 const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, i)((void*)(((char*)((tensor_ref->alias_registry)->data)) +
(size_t)(tensor_ref->alias_registry)->rsize * (size_t)
(i)))
;
253 assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1
: 0), __extension__ ({ if (d < autograd_tensor_symbols->
rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum"
, "ccv_nnc_symbolic_graph_backward.c", 253, __extension__ __PRETTY_FUNCTION__
); }))
;
254 const ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(d)))
;
255 assert(tensor_symbol_info[autograd->d].alias_ref)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref
) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd->
d].alias_ref) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref"
, "ccv_nnc_symbolic_graph_backward.c", 255, __extension__ __PRETTY_FUNCTION__
); }))
;
256 const int* ofs = tensor_symbol_info[autograd->d].ofs;
257 const int* dim = tensor_symbol_info[autograd->d].info.dim;
258 _ccv_nnc_try_set_pix(ofs, dim, tensor_dim, scmd, cube_dim, cube_step, cube, 0, max_dim - 1);
259 }
260 // Compare to see now if the binary map filled up. If it filled up, we know it is fully assigned.
261 for (i = 0; i < (cube_size >> 5); i++)
262 if (cube[i] < 0xffffffff)
263 return 0;
264 if ((cube_size & 0x1f) > 0)
265 {
266 // Fetch the rest.
267 uint32_t r = 0;
268 for (i = 0; i < (cube_size & 0x1f); i++)
269 r |= (1u << i);
270 assert(cube[((cube_size + 31) >> 5) - 1] <= r)((void) sizeof ((cube[((cube_size + 31) >> 5) - 1] <=
r) ? 1 : 0), __extension__ ({ if (cube[((cube_size + 31) >>
5) - 1] <= r) ; else __assert_fail ("cube[((cube_size + 31) >> 5) - 1] <= r"
, "ccv_nnc_symbolic_graph_backward.c", 270, __extension__ __PRETTY_FUNCTION__
); }))
;
271 if (cube[((cube_size + 31) >> 5) - 1] < r)
272 return 0;
273 }
274 return 1;
275}
276
277static int _ccv_nnc_tensor_ref_version_find_init(const ccv_nnc_autograd_tensor_version_t* const tensor_ver)
278{
279 int i;
280 for (i = 0; i < tensor_ver->ref_version->rnum; i++)
281 if (((ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, i)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(i)))
)->x < 0)
282 return i;
283 return -1;
284}
285
286static void _ccv_nnc_graph_sum_autograd_tensor_versions(const int idx, const int d, const int exec_symbol_info_size, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, ccv_nnc_autograd_tensor_version_t* const tensor_ver, ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs, ccv_array_t* const autograd_tensor_symbols, ccv_array_t* const sum_or_set_execs)
287{
288 int i, j;
289 assert(tensor_ver->c < tensor_ver->ref_version->rnum)((void) sizeof ((tensor_ver->c < tensor_ver->ref_version
->rnum) ? 1 : 0), __extension__ ({ if (tensor_ver->c <
tensor_ver->ref_version->rnum) ; else __assert_fail ("tensor_ver->c < tensor_ver->ref_version->rnum"
, "ccv_nnc_symbolic_graph_backward.c", 289, __extension__ __PRETTY_FUNCTION__
); }))
;
290 const int input_size = tensor_ver->ref_version->rnum - tensor_ver->c;
291 int* inputs = (int*)ccmallocmalloc(sizeof(int) * input_size);
292 for (i = tensor_ver->c; i < tensor_ver->ref_version->rnum; i++)
293 inputs[i] = ((ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, i)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(i)))
)->d;
294 const ccv_nnc_autograd_tensor_symbol_t tensor_sym = {
295 .d = d
296 };
297 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
298 ccv_nnc_sum_or_set_graph_exec_symbol_t sum_exec = {
299 .input_size = input_size,
300 .inputs = inputs,
301 .output = autograd_tensor_symbols->rnum - 1
302 };
303 if (idx >= 0)
304 {
305 sum_exec.outgoings = ccv_array_new(sizeof(int), 1, 0);
306 ccv_array_push(sum_exec.outgoings, &idx);
307 }
308 ccv_array_push(sum_or_set_execs, &sum_exec);
309 const int outgoing = exec_symbol_info_size + sum_or_set_execs->rnum - 1;
310 for (i = tensor_ver->c; i < tensor_ver->ref_version->rnum; i++)
311 {
312 const ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, i)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(i)))
;
313 const int x = tensor_ref->x;
314 if (x < 0) /* This is initialization tensor, it has to be occurred before the execution anyway. */
315 {
316 // No alias.
317 assert(!tensor_ref->alias_registry)((void) sizeof ((!tensor_ref->alias_registry) ? 1 : 0), __extension__
({ if (!tensor_ref->alias_registry) ; else __assert_fail (
"!tensor_ref->alias_registry", "ccv_nnc_symbolic_graph_backward.c"
, 317, __extension__ __PRETTY_FUNCTION__); }))
;
318 // No associated additional execs.
319 assert(!tensor_ref->exec_registry)((void) sizeof ((!tensor_ref->exec_registry) ? 1 : 0), __extension__
({ if (!tensor_ref->exec_registry) ; else __assert_fail (
"!tensor_ref->exec_registry", "ccv_nnc_symbolic_graph_backward.c"
, 319, __extension__ __PRETTY_FUNCTION__); }))
;
320 continue;
321 }
322 if (x < exec_symbol_info_size)
323 {
324 ccv_nnc_autograd_graph_exec_symbol_t* back_exec = autograd_execs + x;
325 if (!back_exec->outgoings)
326 back_exec->outgoings = ccv_array_new(sizeof(int), 1, 0);
327 ccv_array_replace_unique_int(back_exec->outgoings, idx, outgoing);
328 } else {
329 // This tensor_ref is generated by the sum operation.
330 ccv_nnc_sum_or_set_graph_exec_symbol_t* sum_or_set = (ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, x - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs
)->rsize * (size_t)(x - exec_symbol_info_size)))
;
331 ccv_array_replace_unique_int(sum_or_set->outgoings, idx, outgoing);
332 }
333 // If this tensor have associated alias, we need to init it to zeros when it is allocated (we only need to set a flag here)
334 // it is handled at compilation phase.
335 if (tensor_ref->alias_registry &&
336 // Loop over to see if this tensor is fully occupied to avoid extra zero step.
337 !_ccv_nnc_tensor_ref_fully_assigned_with_aliases(tensor_ref, autograd_tensor_symbols, tensor_symbol_info))
338 {
339 ccv_nnc_autograd_tensor_symbol_t* tensor_sym = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref->
d)))
;
340 // By having alias_registry, what this symbol represents must not by an alias.
341 assert(tensor_sym->alias_ref == 0)((void) sizeof ((tensor_sym->alias_ref == 0) ? 1 : 0), __extension__
({ if (tensor_sym->alias_ref == 0) ; else __assert_fail (
"tensor_sym->alias_ref == 0", "ccv_nnc_symbolic_graph_backward.c"
, 341, __extension__ __PRETTY_FUNCTION__); }))
;
342 tensor_sym->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS;
343 }
344 if (tensor_ref->exec_registry)
345 for (j = 0; j < tensor_ref->exec_registry->rnum; j++)
346 {
347 const int x = *(int*)ccv_array_get(tensor_ref->exec_registry, j)((void*)(((char*)((tensor_ref->exec_registry)->data)) +
(size_t)(tensor_ref->exec_registry)->rsize * (size_t)(
j)))
;
348 assert(x >= 0)((void) sizeof ((x >= 0) ? 1 : 0), __extension__ ({ if (x >=
0) ; else __assert_fail ("x >= 0", "ccv_nnc_symbolic_graph_backward.c"
, 348, __extension__ __PRETTY_FUNCTION__); }))
;
349 // The exec_registry can only be generated by alias registry, therefore, it cannot reference to a sum operation.
350 assert(x < exec_symbol_info_size)((void) sizeof ((x < exec_symbol_info_size) ? 1 : 0), __extension__
({ if (x < exec_symbol_info_size) ; else __assert_fail ("x < exec_symbol_info_size"
, "ccv_nnc_symbolic_graph_backward.c", 350, __extension__ __PRETTY_FUNCTION__
); }))
;
351 ccv_nnc_autograd_graph_exec_symbol_t* back_exec = autograd_execs + x;
352 if (!back_exec->outgoings)
353 back_exec->outgoings = ccv_array_new(sizeof(int), 1, 0);
354 ccv_array_replace_unique_int(back_exec->outgoings, idx, outgoing);
355 }
356 }
357 const ccv_nnc_tensor_ref_t tensor_ref = {
358 .d = autograd_tensor_symbols->rnum - 1,
359 .x = outgoing
360 };
361 ccv_array_push(tensor_ver->ref_version, &tensor_ref);
362 /* Move the c pointer up to the latest summed result. */
363 tensor_ver->c = tensor_ver->ref_version->rnum - 1;
364}
365
366static int _ccv_nnc_tensor_ref_version_involve_alias(const ccv_nnc_tensor_ref_t* const tensor_ref, const ccv_array_t* const autograd_tensor_symbols, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const ccv_nnc_tensor_symbol_info_t* const alias)
367{
368 assert(alias->alias_ref > 0)((void) sizeof ((alias->alias_ref > 0) ? 1 : 0), __extension__
({ if (alias->alias_ref > 0) ; else __assert_fail ("alias->alias_ref > 0"
, "ccv_nnc_symbolic_graph_backward.c", 368, __extension__ __PRETTY_FUNCTION__
); }))
;
369 // No alias_registry, must conflict (owns the whole band).
370 if (!tensor_ref->alias_registry)
371 return 1;
372 int i;
373 for (i = 0; i < tensor_ref->alias_registry->rnum; i++)
374 {
375 const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, i)((void*)(((char*)((tensor_ref->alias_registry)->data)) +
(size_t)(tensor_ref->alias_registry)->rsize * (size_t)
(i)))
;
376 assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1
: 0), __extension__ ({ if (d < autograd_tensor_symbols->
rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum"
, "ccv_nnc_symbolic_graph_backward.c", 376, __extension__ __PRETTY_FUNCTION__
); }))
;
377 ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(d)))
;
378 if (ccv_nnc_over_tensor_symbol_aliases(tensor_symbol_info + autograd->d, alias))
379 return 1;
380 }
381 // All aliases referenced by this ref_version doesn't overlap with the provided one, thus, there is no conflict at all.
382 return 0;
383}
384
385static int _ccv_nnc_tensor_ref_version_find_alias(const ccv_nnc_tensor_ref_t* const tensor_ref, const ccv_array_t* const autograd_tensor_symbols, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const ccv_nnc_tensor_symbol_info_t* const alias)
386{
387 assert(alias->alias_ref > 0)((void) sizeof ((alias->alias_ref > 0) ? 1 : 0), __extension__
({ if (alias->alias_ref > 0) ; else __assert_fail ("alias->alias_ref > 0"
, "ccv_nnc_symbolic_graph_backward.c", 387, __extension__ __PRETTY_FUNCTION__
); }))
;
388 // No alias_registry, thus, cannot find the exact matched alias.
389 if (!tensor_ref->alias_registry)
390 return -1;
391 int i;
392 for (i = 0; i < tensor_ref->alias_registry->rnum; i++)
393 {
394 const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, i)((void*)(((char*)((tensor_ref->alias_registry)->data)) +
(size_t)(tensor_ref->alias_registry)->rsize * (size_t)
(i)))
;
395 assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1
: 0), __extension__ ({ if (d < autograd_tensor_symbols->
rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum"
, "ccv_nnc_symbolic_graph_backward.c", 395, __extension__ __PRETTY_FUNCTION__
); }))
;
396 ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(d)))
;
397 // This must reference to an alias.
398 assert(tensor_symbol_info[autograd->d].alias_ref)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref
) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd->
d].alias_ref) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref"
, "ccv_nnc_symbolic_graph_backward.c", 398, __extension__ __PRETTY_FUNCTION__
); }))
;
399 const int* inc = tensor_symbol_info[autograd->d].inc;
400 const int* ofs = tensor_symbol_info[autograd->d].ofs;
401 const int* dim = tensor_symbol_info[autograd->d].info.dim;
402 // If everything matches, this is the required alias.
403 if (memcmp(inc, alias->inc, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) == 0 &&
404 memcmp(ofs, alias->ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) == 0 &&
405 memcmp(dim, alias->info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) == 0)
406 return d;
407 }
408 return -1;
409}
410
411static int _ccv_nnc_tensor_ref_version_has_this_alias_exclusively(const ccv_nnc_tensor_ref_t* const tensor_ref, const ccv_array_t* const autograd_tensor_symbols, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const ccv_nnc_tensor_symbol_info_t* const alias)
412{
413 assert(alias->alias_ref > 0)((void) sizeof ((alias->alias_ref > 0) ? 1 : 0), __extension__
({ if (alias->alias_ref > 0) ; else __assert_fail ("alias->alias_ref > 0"
, "ccv_nnc_symbolic_graph_backward.c", 413, __extension__ __PRETTY_FUNCTION__
); }))
;
414 // No alias_registry, thus, cannot find the exact matched alias.
415 if (!tensor_ref->alias_registry)
416 return 0;
417 int i;
418 for (i = 0; i < tensor_ref->alias_registry->rnum; i++)
419 {
420 const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, i)((void*)(((char*)((tensor_ref->alias_registry)->data)) +
(size_t)(tensor_ref->alias_registry)->rsize * (size_t)
(i)))
;
421 assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1
: 0), __extension__ ({ if (d < autograd_tensor_symbols->
rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum"
, "ccv_nnc_symbolic_graph_backward.c", 421, __extension__ __PRETTY_FUNCTION__
); }))
;
422 ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(d)))
;
423 // This must reference to an alias.
424 assert(tensor_symbol_info[autograd->d].alias_ref)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref
) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd->
d].alias_ref) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref"
, "ccv_nnc_symbolic_graph_backward.c", 424, __extension__ __PRETTY_FUNCTION__
); }))
;
425 const int* inc = tensor_symbol_info[autograd->d].inc;
426 const int* ofs = tensor_symbol_info[autograd->d].ofs;
427 const int* dim = tensor_symbol_info[autograd->d].info.dim;
428 if (memcmp(inc, alias->inc, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) != 0 ||
429 memcmp(ofs, alias->ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) != 0 ||
430 memcmp(dim, alias->info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) != 0)
431 return 0;
432 }
433 // If everything matches for every alias in registry, we can use any of the alias directly.
434 return 1;
435}
436
437static int _ccv_nnc_graph_sum_autograd_tensor_versions_alias(const int idx, const int d, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const int exec_symbol_info_size, const ccv_nnc_tensor_symbol_info_t* const alias, ccv_nnc_autograd_tensor_version_t* const tensor_ver, ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs, ccv_array_t* const autograd_tensor_symbols, ccv_array_t* const sum_or_set_execs)
438{
439 assert(tensor_ver->c < tensor_ver->ref_version->rnum)((void) sizeof ((tensor_ver->c < tensor_ver->ref_version
->rnum) ? 1 : 0), __extension__ ({ if (tensor_ver->c <
tensor_ver->ref_version->rnum) ; else __assert_fail ("tensor_ver->c < tensor_ver->ref_version->rnum"
, "ccv_nnc_symbolic_graph_backward.c", 439, __extension__ __PRETTY_FUNCTION__
); }))
;
440 int i, j = 0;
441 struct {
442 int k;
443 int i;
444 } kd[tensor_ver->ref_version->rnum - tensor_ver->c];
445 for (i = tensor_ver->c; i < tensor_ver->ref_version->rnum; i++)
446 {
447 ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, i)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(i)))
;
448 const int k = _ccv_nnc_tensor_ref_version_find_alias(tensor_ref, autograd_tensor_symbols, tensor_symbol_info, alias);
449 if (k >= 0)
450 kd[j++] = (typeof(kd[0])){
451 .k = k, .i = i
452 };
453 else if (_ccv_nnc_tensor_ref_version_involve_alias(tensor_ref, autograd_tensor_symbols, tensor_symbol_info, alias))
454 kd[j++] = (typeof(kd[0])) {
455 .k = -1, .i = i // It has dependency to the original tensor (non-alias) now, label this with highest bit.
456 };
457 }
458 // Can only find one. This is the easy case, we can simply return that symbol (or its alias).
459 if (j == 1)
460 {
461 if (kd[0].k >= 0)
462 return kd[0].k; // Only can find one alias, that is the one.
463 // Otherwise, need to create a new alias.
464 ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[0].i)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd[0
].i)))
;
465 ccv_nnc_autograd_tensor_symbol_t* ref = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref->
d)))
;
466 // Since we create new alias, we need to set the referenced one to be allocated with 0s.
467 if (ref->alias_ref) // If this is an alias, it has to be zero initialized.
468 {
469 ref = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, ref->alias_ref - 1)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(ref->alias_ref
- 1)))
;
470 assert(ref->alias_ref == 0)((void) sizeof ((ref->alias_ref == 0) ? 1 : 0), __extension__
({ if (ref->alias_ref == 0) ; else __assert_fail ("ref->alias_ref == 0"
, "ccv_nnc_symbolic_graph_backward.c", 470, __extension__ __PRETTY_FUNCTION__
); }))
; // This is original.
471 ref->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS;
472 } else if (tensor_ref->alias_registry && // Otherwise, to see if this symbol is fully occupied.
473 // Loop over to see if this tensor is fully occupied to avoid extra zero step.
474 !_ccv_nnc_tensor_ref_fully_assigned_with_aliases(tensor_ref, autograd_tensor_symbols, tensor_symbol_info)) {
475 ref->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS;
476 }
477 ccv_nnc_autograd_tensor_symbol_t tensor_sym = {
478 .d = d,
479 .alias_ref = tensor_ref->d + 1
480 };
481 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
482 const int ad = autograd_tensor_symbols->rnum - 1;
483 if (tensor_ref->alias_registry) // Only push this when it has an alias registry (otherwise it already conflict with everyone).
484 ccv_array_push(tensor_ref->alias_registry, &ad);
485 // The newly inserted tensor symbol.
486 return ad;
487 }
488 // Otherwise, we need to create the sum operation out of these.
489 const int input_size = j;
490 int has_this_alias_exclusively = 1;
491 int* inputs = input_size > 0 ? (int*)ccmallocmalloc(sizeof(int) * input_size) : 0;
492 for (i = 0; i < input_size; i++)
493 {
494 ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[i].i)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd[i
].i)))
;
495 // Can take a fast path if every ref involved has the same alias, our sum operation can be faster (using alias directly).
496 if (has_this_alias_exclusively && kd[i].k >= 0 && _ccv_nnc_tensor_ref_version_has_this_alias_exclusively(tensor_ref, autograd_tensor_symbols, tensor_symbol_info, alias))
497 inputs[i] = *(int*)ccv_array_get(tensor_ref->alias_registry, 0)((void*)(((char*)((tensor_ref->alias_registry)->data)) +
(size_t)(tensor_ref->alias_registry)->rsize * (size_t)
(0)))
; // Assigning the alias.
498 else {
499 if (has_this_alias_exclusively)
500 {
501 has_this_alias_exclusively = 0;
502 for (j = 0; j < i; j++)
503 inputs[j] = ((ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[j].i)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd[j
].i)))
)->d;
504 }
505 inputs[i] = tensor_ref->d;
506 }
507 }
508 ccv_nnc_autograd_tensor_symbol_t tensor_sym = {
509 .d = alias->alias_ref - 1
510 };
511 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
512 const int tensor_ref_d = autograd_tensor_symbols->rnum - 1;
513 tensor_sym.d = d;
514 tensor_sym.alias_ref = tensor_ref_d + 1;
515 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
516 const int ad = autograd_tensor_symbols->rnum - 1;
517 ccv_nnc_sum_or_set_graph_exec_symbol_t sum_exec = {
518 .input_size = input_size,
519 .inputs = inputs,
520 .output = has_this_alias_exclusively ? ad : tensor_ref_d /* If has this alias exclusively, the output should be alias as well. Otherwise the output is the real tensor. */
521 };
522 if (idx >= 0)
523 {
524 sum_exec.outgoings = ccv_array_new(sizeof(int), 1, 0);
525 ccv_array_push(sum_exec.outgoings, &idx);
526 }
527 ccv_array_push(sum_or_set_execs, &sum_exec);
528 const int outgoing = exec_symbol_info_size + sum_or_set_execs->rnum - 1;
529 int no_alias_registry = 0;
530 for (i = 0; i < input_size; i++)
531 {
532 ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[i].i)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd[i
].i)))
;
533 if (!has_this_alias_exclusively)
534 {
535 // If the sum operation is not operating on one alias. I need to zero this tensor out when it is first
536 // allocated (see discussions around the flags I use).
537 ccv_nnc_autograd_tensor_symbol_t* tensor_sym = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref->
d)))
;
538 if (tensor_sym->alias_ref)
539 {
540 // Find the original tensor_sym and set its flags (I prefer to set flags on its original).
541 ccv_nnc_autograd_tensor_symbol_t* ref = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_sym->alias_ref - 1)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(tensor_sym->
alias_ref - 1)))
;
542 assert(ref->alias_ref == 0)((void) sizeof ((ref->alias_ref == 0) ? 1 : 0), __extension__
({ if (ref->alias_ref == 0) ; else __assert_fail ("ref->alias_ref == 0"
, "ccv_nnc_symbolic_graph_backward.c", 542, __extension__ __PRETTY_FUNCTION__
); }))
; // This is original.
543 ref->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS;
544 } else if (tensor_ref->alias_registry && // Otherwise, to see if this symbol is fully occupied.
545 // Loop over to see if this tensor is fully occupied to avoid extra zero step.
546 !_ccv_nnc_tensor_ref_fully_assigned_with_aliases(tensor_ref, autograd_tensor_symbols, tensor_symbol_info)) {
547 tensor_sym->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS;
548 }
549 }
550 // Check to see if any of these tensors doesn't have alias.
551 no_alias_registry |= (!tensor_ref->alias_registry);
552 const int x = tensor_ref->x;
553 assert(x >= 0)((void) sizeof ((x >= 0) ? 1 : 0), __extension__ ({ if (x >=
0) ; else __assert_fail ("x >= 0", "ccv_nnc_symbolic_graph_backward.c"
, 553, __extension__ __PRETTY_FUNCTION__); }))
; /* Otherwise, this is initialization tensor, which is impossible to be summed up by. */
554 if (x < exec_symbol_info_size)
555 {
556 ccv_nnc_autograd_graph_exec_symbol_t* back_exec = autograd_execs + x;
557 if (!back_exec->outgoings)
558 back_exec->outgoings = ccv_array_new(sizeof(int), 1, 0);
559 ccv_array_push(back_exec->outgoings, &outgoing);
560 } else {
561 ccv_nnc_sum_or_set_graph_exec_symbol_t* sum_or_set = (ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, x - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs
)->rsize * (size_t)(x - exec_symbol_info_size)))
;
562 ccv_array_push(sum_or_set->outgoings, &outgoing);
563 }
564 if (tensor_ref->exec_registry)
565 for (j = 0; j < tensor_ref->exec_registry->rnum; j++)
566 {
567 const int x = *(int*)ccv_array_get(tensor_ref->exec_registry, j)((void*)(((char*)((tensor_ref->exec_registry)->data)) +
(size_t)(tensor_ref->exec_registry)->rsize * (size_t)(
j)))
;
568 assert(x >= 0)((void) sizeof ((x >= 0) ? 1 : 0), __extension__ ({ if (x >=
0) ; else __assert_fail ("x >= 0", "ccv_nnc_symbolic_graph_backward.c"
, 568, __extension__ __PRETTY_FUNCTION__); }))
; /* Otherwise, this is initialization tensor, which is impossible to be summed up by. */
569 assert(x < exec_symbol_info_size)((void) sizeof ((x < exec_symbol_info_size) ? 1 : 0), __extension__
({ if (x < exec_symbol_info_size) ; else __assert_fail ("x < exec_symbol_info_size"
, "ccv_nnc_symbolic_graph_backward.c", 569, __extension__ __PRETTY_FUNCTION__
); }))
; // exec_registry is only used by alias_registry, it simply cannot reference to a sum operation.
570 ccv_nnc_autograd_graph_exec_symbol_t* back_exec = autograd_execs + x;
571 if (!back_exec->outgoings)
572 back_exec->outgoings = ccv_array_new(sizeof(int), 1, 0);
573 ccv_array_push(back_exec->outgoings, &outgoing);
574 }
575 }
576 const ccv_nnc_tensor_ref_t tensor_ref = {
577 .d = tensor_ref_d,
578 .x = outgoing,
579 .exec_registry = 0, // I don't need to take execution dependencies because this tensor is generated by sum, therefore, we already take that dependency.
580 .alias_registry = !no_alias_registry || has_this_alias_exclusively ? ccv_array_new(sizeof(int), 1, 0) : 0
581 };
582 // If there is no alias registry, then we take the whole tensor ref as one.
583 if (!no_alias_registry || has_this_alias_exclusively)
584 {
585 // If this tensor ref contains multiple different types of alias, have to add them together (otherwise
586 // the computation for if there is an empty slot in this tensor ref is not correct without all the
587 // occupancy availability information).
588 if (!has_this_alias_exclusively)
589 for (i = 0; i < input_size; i++)
590 {
591 ccv_nnc_tensor_ref_t* ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[i].i)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd[i
].i)))
;
592 assert(ref->alias_registry)((void) sizeof ((ref->alias_registry) ? 1 : 0), __extension__
({ if (ref->alias_registry) ; else __assert_fail ("ref->alias_registry"
, "ccv_nnc_symbolic_graph_backward.c", 592, __extension__ __PRETTY_FUNCTION__
); }))
;
593 // It may get duplicates. But whatever, won't matter the computation.
594 for (j = 0; j < ref->alias_registry->rnum; j++)
595 ccv_array_push(tensor_ref.alias_registry, ccv_array_get(ref->alias_registry, j)((void*)(((char*)((ref->alias_registry)->data)) + (size_t
)(ref->alias_registry)->rsize * (size_t)(j)))
);
596 }
597 ccv_array_push(tensor_ref.alias_registry, &ad);
598 }
599 assert(input_size <= tensor_ver->ref_version->rnum - tensor_ver->c)((void) sizeof ((input_size <= tensor_ver->ref_version->
rnum - tensor_ver->c) ? 1 : 0), __extension__ ({ if (input_size
<= tensor_ver->ref_version->rnum - tensor_ver->c
) ; else __assert_fail ("input_size <= tensor_ver->ref_version->rnum - tensor_ver->c"
, "ccv_nnc_symbolic_graph_backward.c", 599, __extension__ __PRETTY_FUNCTION__
); }))
;
600 ccv_nnc_tensor_ref_t x;
601 for (i = 0; i < input_size; i++)
602 // If the current one (i + tensor_ver->c) is smaller than the one referenced to, exchange.
603 if (kd[i].i > i + tensor_ver->c)
604 CCV_SWAP(*(ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, i + tensor_ver->c), *(ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[i].i), x)((x) = (*(ccv_nnc_tensor_ref_t*)((void*)(((char*)((tensor_ver
->ref_version)->data)) + (size_t)(tensor_ver->ref_version
)->rsize * (size_t)(i + tensor_ver->c)))), (*(ccv_nnc_tensor_ref_t
*)((void*)(((char*)((tensor_ver->ref_version)->data)) +
(size_t)(tensor_ver->ref_version)->rsize * (size_t)(i +
tensor_ver->c)))) = (*(ccv_nnc_tensor_ref_t*)((void*)(((char
*)((tensor_ver->ref_version)->data)) + (size_t)(tensor_ver
->ref_version)->rsize * (size_t)(kd[i].i)))), (*(ccv_nnc_tensor_ref_t
*)((void*)(((char*)((tensor_ver->ref_version)->data)) +
(size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd
[i].i)))) = (x))
;
605 ccv_array_push(tensor_ver->ref_version, &tensor_ref);
606 // We've consumed input_size tensor refs, now move c up to the pointer of non-consumed tensors.
607 tensor_ver->c += input_size;
608 return ad;
609}
610
611typedef struct ccv_nnc_symbolic_graph_backward_prep_s {
612 int exec_symbol_info_size; // Number of graph exec symbols before adding any new symbols related to automatic differentiation.
613 int tensor_symbol_info_size; // Number of tensor symbols before adding anything new.
614 int sub_prep_size;
615 ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info;
616 ccv_nnc_tensor_symbol_info_t* tensor_symbol_info;
617 ccv_nnc_graph_backward_info_t* backward_info; // Corresponding to forward graph exec symbol info, it is exactly in reverse.
618 ccv_nnc_graph_visit_t* forward_visit; // The visitor structure (top sorted index) when doing traversal.
619 ccv_nnc_graph_visit_t* backward_visit; // The visitor structure (top sorted index) when doing reverse traversal.
620 ccv_nnc_autograd_graph_exec_symbol_t* autograd_execs; // The graph exec symbols we need for automatic differentiation. This is a 1:1 mapping for forward graph exec symbols, however, unlike backward_info, its outgoings may be more complex (may contain outgoing flows to sum nodes).
621 ccv_nnc_autograd_tensor_version_t* autograd_tensor_versions; // Corresponding to forward tensor symbols, each may contain multiple versions (due to multi-write).
622 ccv_array_t* autograd_tensor_symbols; // The tensor symbols we need for automatic differentiation (it may not be 1:1 mapping).
623 ccv_array_t* sum_or_set_execs; // The sum nodes, because in reverse mode, a tensor could have multiple versions, we need to sum them up before use.
624 struct ccv_nnc_symbolic_graph_backward_prep_s* sub_preps; // The preps of its sub-graphs.
625 // Pointers not managed by this struct
626 ccv_nnc_symbolic_graph_t* graph;
627} ccv_nnc_symbolic_graph_backward_prep_t;
628
629static ccv_nnc_symbolic_graph_backward_prep_t _ccv_nnc_symbolic_graph_backward_prep(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
630{
631 const int exec_symbol_info_size = graph->exec_symbol_info->rnum;
632 assert(exec_symbol_info_size > 0)((void) sizeof ((exec_symbol_info_size > 0) ? 1 : 0), __extension__
({ if (exec_symbol_info_size > 0) ; else __assert_fail ("exec_symbol_info_size > 0"
, "ccv_nnc_symbolic_graph_backward.c", 632, __extension__ __PRETTY_FUNCTION__
); }))
;
633 const int tensor_symbol_info_size = graph->tensor_symbol_info->rnum;
634 assert(tensor_symbol_info_size > 0)((void) sizeof ((tensor_symbol_info_size > 0) ? 1 : 0), __extension__
({ if (tensor_symbol_info_size > 0) ; else __assert_fail (
"tensor_symbol_info_size > 0", "ccv_nnc_symbolic_graph_backward.c"
, 634, __extension__ __PRETTY_FUNCTION__); }))
;
635 ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_info_t) * exec_symbol_info_size);
636 ccv_nnc_tensor_symbol_info_t* tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_info_t) * tensor_symbol_info_size);
637 ccv_nnc_graph_visit_t* forward_visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0), exec_symbol_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_symbol_info_size) - 1)); _visit_->size = 0; do { typedef
struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_symbol_info_size); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_i_]
.outgoings->rnum : 0; const int _heap_mem_ = (exec_symbol_info_size
+ _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (exec_symbol_info_size) + sizeof(int32_t
) * ((exec_symbol_info_size) * 2 + _incoming_edges_)); else _incomings_
= (ccv_nnc_incoming_t*)__builtin_alloca (sizeof(ccv_nnc_incoming_t
) * (exec_symbol_info_size) + sizeof(int32_t) * ((exec_symbol_info_size
) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t
) * (exec_symbol_info_size)); int32_t* _exists_[2] = { (int32_t
*)(_incomings_ + (exec_symbol_info_size)), (int32_t*)(_incomings_
+ (exec_symbol_info_size)) + (exec_symbol_info_size), }; int32_t
* const _edges_ = _exists_[1] + (exec_symbol_info_size); for (
_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_backward.c", 637, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_
[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r == 1) continue; _incomings_[
_idx_].r = 1; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*
)(((char*)((graph->exec_symbol_info)->data)) + (size_t)
(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_
[d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_backward.c", 637, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0
] = (source_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int
_bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[
_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
== 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_
[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; _exists_[
_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) =
(_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_backward.c", 637, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_
[0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0;
for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_backward.c", 637, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_backward.c", 637, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum == 1) { const int d = *(int*)((
void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((graph->exec_symbol_info)->data)) + (size_t)(
graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_]
.outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
3 && _d_ < (destination_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
3 && _d_ < (destination_size)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size
); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph ==
graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_backward.c", 637, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 4) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_nnc_symbolic_graph_backward.c", 637, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (exec_symbol_info_size)) ? 1 : 0), __extension__ (
{ if (_visit_->size <= (exec_symbol_info_size)) ; else __assert_fail
("_visit_->size <= (exec_symbol_info_size)", "ccv_nnc_symbolic_graph_backward.c"
, 637, __extension__ __PRETTY_FUNCTION__); })); _visit_; })
;
638 ccv_nnc_symbolic_graph_symbol_infer(graph, forward_visit, sources, source_size, destinations, destination_size, 0, 0, tensor_symbol_info, exec_symbol_info);
639 int i;
640 // Now, for each one of these, find a reverse graph.
641 ccv_nnc_graph_backward_info_t* backward_info = (ccv_nnc_graph_backward_info_t*)cccalloccalloc(exec_symbol_info_size, sizeof(ccv_nnc_graph_backward_info_t));
642 ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_
++) { const int idx __attribute__((unused)) = (forward_visit)
->node[_i_].index; const int _node_unused_ __attribute__((
unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info
)) const node __attribute__((unused)) = (exec_symbol_info) + idx
;
{
643 assert(ccv_nnc_cmd_is_forward(node->cmd) || node->cmd.cmd == CCV_NNC_NOOP)((void) sizeof ((ccv_nnc_cmd_is_forward(node->cmd) || node
->cmd.cmd == CCV_NNC_NOOP) ? 1 : 0), __extension__ ({ if (
ccv_nnc_cmd_is_forward(node->cmd) || node->cmd.cmd == CCV_NNC_NOOP
) ; else __assert_fail ("ccv_nnc_cmd_is_forward(node->cmd) || node->cmd.cmd == CCV_NNC_NOOP"
, "ccv_nnc_symbolic_graph_backward.c", 643, __extension__ __PRETTY_FUNCTION__
); }))
;
644 if (node->outgoings)
645 for (i = 0; i < node->outgoings->rnum; i++)
646 {
647 int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
648 if (backward_info[d].outgoings == 0)
649 backward_info[d].outgoings = ccv_array_new(sizeof(int32_t), 1, 0);
650 ccv_array_push(backward_info[d].outgoings, &idx);
651 }
652 } ccv_nnc_graph_visit_endfor} }
653 // Also mark only the output bits that we use.
654 for (i = 0; i < exec_symbol_info_size; i++)
655 {
656 backward_info[i].input_bitmask_size = ((exec_symbol_info[i].output_size * 2 + exec_symbol_info[i].input_size + 63) >> 6);
657 backward_info[i].output_bitmask_size = ((exec_symbol_info[i].input_size + 63) >> 6);
658 // Allocate input / output bitmasks
659 if (backward_info[i].input_bitmask_size + backward_info[i].output_bitmask_size > 0)
660 {
661 backward_info[i].input_bitmasks = (uint64_t*)cccalloccalloc(backward_info[i].input_bitmask_size + backward_info[i].output_bitmask_size, sizeof(uint64_t));
662 if (backward_info[i].output_bitmask_size)
663 backward_info[i].output_bitmasks = backward_info[i].input_bitmasks + backward_info[i].input_bitmask_size;
664 }
665 }
666 ccv_nnc_graph_visit_t* backward_visit = ccv_nnc_graph_visit_new(graph, backward_info, exec_symbol_info_size, destinations, destination_size, sources, source_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((exec_symbol_info_size) - 1)); _visit_->size = 0; do { typedef
struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t
; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ <
(exec_symbol_info_size); _i_++) _incoming_edges_ += ((backward_info
)[_i_].outgoings) ? (backward_info)[_i_].outgoings->rnum :
0; const int _heap_mem_ = (exec_symbol_info_size + _incoming_edges_
> 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_)
_incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t
) * (exec_symbol_info_size) + sizeof(int32_t) * ((exec_symbol_info_size
) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (exec_symbol_info_size
) + sizeof(int32_t) * ((exec_symbol_info_size) * 2 + _incoming_edges_
)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (exec_symbol_info_size
)); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_symbol_info_size
)), (int32_t*)(_incomings_ + (exec_symbol_info_size)) + (exec_symbol_info_size
), }; int32_t* const _edges_ = _exists_[1] + (exec_symbol_info_size
); for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void
) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_backward.c"
, 666, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (destinations)[_i_].d; } int _exist_size_[2] = { (destination_size
), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0
) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r == 1) continue; _incomings_[_idx_].r = 1
; if ((backward_info)[_idx_].outgoings) for (_j_ = 0; _j_ <
(backward_info)[_idx_].outgoings->rnum; _j_++) { const int
d = *(int*)((void*)(((char*)(((backward_info)[_idx_].outgoings
)->data)) + (size_t)((backward_info)[_idx_].outgoings)->
rsize * (size_t)(_j_))); ++_incomings_[d].c; _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (
_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size
); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph ==
graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_backward.c", 666, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_
[0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) {
const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_
].r == 2) continue; _incomings_[_idx_].r = 2; if ((backward_info
)[_idx_].outgoings) for (_j_ = 0; _j_ < (backward_info)[_idx_
].outgoings->rnum; _j_++) { const int d = *(int*)((void*)(
((char*)(((backward_info)[_idx_].outgoings)->data)) + (size_t
)((backward_info)[_idx_].outgoings)->rsize * (size_t)(_j_)
)); if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_
; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; } _edges_
[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_
[d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_backward.c", 666, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0
] = (source_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while
(_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_
= 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_
= _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(source_size); _i_++) { ((void) sizeof (((sources)[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph
== graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_backward.c", 666, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].d = 1; } for (_i_ = 0; _i_
< (destination_size); _i_++) { ((void) sizeof (((destinations
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_backward.c", 666, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (destinations)[_i_].d; } _p_ = 0; _q_
= 1; _exist_size_[0] = (destination_size); _exist_size_[1] =
0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_
[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const
int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_
->size].index = ((_idx_)); _visit_->node[_visit_->size
].term = ((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 4; } if ((backward_info
)[_idx_].outgoings) { if ((backward_info)[_idx_].outgoings->
rnum == 1) { const int d = *(int*)((void*)(((char*)(((backward_info
)[_idx_].outgoings)->data)) + (size_t)((backward_info)[_idx_
].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 3 &&
_d_ < (source_size)) { _exists_[_p_][_i_] = d; continue; }
} else for (_j_ = 0; _j_ < (backward_info)[_idx_].outgoings
->rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((
backward_info)[_idx_].outgoings)->data)) + (size_t)((backward_info
)[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
3 && _d_ < (source_size)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size
); _i_++) { ((void) sizeof (((sources)[_i_].graph == graph) ?
1 : 0), __extension__ ({ if ((sources)[_i_].graph == graph) ;
else __assert_fail ("(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_backward.c"
, 666, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_
[(sources)[_i_].d].r == 4) continue; if (!(0)) { ((void) sizeof
((_incomings_[(sources)[_i_].d].c == 0) ? 1 : 0), __extension__
({ if (_incomings_[(sources)[_i_].d].c == 0) ; else __assert_fail
("_incomings_[(sources)[_i_].d].c == 0", "ccv_nnc_symbolic_graph_backward.c"
, 666, __extension__ __PRETTY_FUNCTION__); })); } else if (_incomings_
[(sources)[_i_].d].c > 0) continue; _visit_->node[_visit_
->size].index = (((sources)[_i_].d)); _visit_->node[_visit_
->size].term = ((_incomings_[(sources)[_i_].d].d)); ++_visit_
->size;; } if (_heap_mem_) free(_incomings_); } while (0);
; ((void) sizeof ((_visit_->size <= (exec_symbol_info_size
)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (exec_symbol_info_size
)) ; else __assert_fail ("_visit_->size <= (exec_symbol_info_size)"
, "ccv_nnc_symbolic_graph_backward.c", 666, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
667 const int sub_prep_size = graph->sub_graphs ? graph->sub_graphs->rnum : 0;
668 ccv_nnc_symbolic_graph_backward_prep_t* sub_preps = sub_prep_size > 0 ? (ccv_nnc_symbolic_graph_backward_prep_t*)cccalloccalloc(sub_prep_size, sizeof(ccv_nnc_symbolic_graph_backward_prep_t)) : 0;
669 for (i = 0; i < sub_prep_size; i++)
670 {
671 const ccv_nnc_symbolic_graph_t* const sub_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t
)(graph->sub_graphs)->rsize * (size_t)(i)))
;
672 sub_preps[i] = _ccv_nnc_symbolic_graph_backward_prep(sub_graph, ccv_nnc_symbolic_graph_sources(sub_graph), ccv_nnc_symbolic_graph_source_size(sub_graph), ccv_nnc_symbolic_graph_destinations(sub_graph), ccv_nnc_symbolic_graph_destination_size(sub_graph));
673 }
674 return (ccv_nnc_symbolic_graph_backward_prep_t){
675 .exec_symbol_info_size = exec_symbol_info_size,
676 .tensor_symbol_info_size = tensor_symbol_info_size,
677 .sub_prep_size = sub_prep_size,
678 .exec_symbol_info = exec_symbol_info,
679 .tensor_symbol_info = tensor_symbol_info,
680 .backward_info = backward_info,
681 .forward_visit = forward_visit,
682 .backward_visit = backward_visit,
683 .sub_preps = sub_preps,
684 .graph = (ccv_nnc_symbolic_graph_t*)graph,
685 };
686}
687
688static void _ccv_nnc_symbolic_graph_backward_exec_io(const ccv_nnc_graph_exec_symbol_info_t* const node, int** const back_input_map, int** const back_output_map, int* const back_input_size, int* const back_output_size)
689{
690 int i;
691 if (node->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
24
Assuming the condition is true
25
Taking true branch
692 {
693 *back_input_map = node->outputs;
694 *back_input_size = node->output_size;
695 for (i = 0; i < node->case_of.argument.offset; i++)
26
Assuming 'i' is >= field 'offset'
27
Loop condition is false. Execution continues on line 697
696 (*back_output_map)[i] = node->inputs[i];
697 const int argument_offset = node->case_of.argument.offset;
698 const int argument_size = node->case_of.argument.size;
699 // Skip the argument range.
700 for (i = argument_offset + argument_size; i < node->input_size; i++)
28
Assuming 'i' is >= field 'input_size'
29
Loop condition is false. Execution continues on line 702
701 (*back_output_map)[i - argument_size] = node->inputs[i];
702 *back_output_size = node->input_size - node->case_of.argument.size;
703 } else { // if (node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) {
704 *back_input_map = node->outputs;
705 *back_input_size = node->output_size;
706 *back_output_map = node->inputs;
707 *back_output_size = node->input_size;
708 }
709}
30
Returning without writing to '**back_output_map'
710
711static void _ccv_nnc_symbolic_graph_backward_prep_sub_f_wrt_symbols(const ccv_nnc_graph_exec_symbol_info_t* const forw_exec, const ccv_nnc_symbolic_graph_t* const sub_graph, const int graph_ref, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const uint64_t* const input_bitmasks, const uint64_t* const output_bitmasks, ccv_array_t* const sub_f_symbols, ccv_array_t* const sub_wrt_symbols)
712{
713 int i, j;
714 ccv_array_clear(sub_wrt_symbols);
715 int forw_outputs[ccv_max(1, forw_exec->output_size)({ typeof (1) _a = (1); typeof (forw_exec->output_size) _b
= (forw_exec->output_size); (_a > _b) ? _a : _b; })
];
19
Assuming '_a' is <= '_b'
20
'?' condition is false
716 int forw_inputs[ccv_max(1, forw_exec->input_size)({ typeof (1) _a = (1); typeof (forw_exec->input_size) _b =
(forw_exec->input_size); (_a > _b) ? _a : _b; })
];
21
Assuming '_a' is <= '_b'
22
'?' condition is false
717 int* back_input_map = forw_outputs;
718 int* back_output_map = forw_inputs;
719 int back_input_size, back_output_size;
720 _ccv_nnc_symbolic_graph_backward_exec_io(forw_exec, &back_input_map, &back_output_map, &back_input_size, &back_output_size);
23
Calling '_ccv_nnc_symbolic_graph_backward_exec_io'
31
Returning from '_ccv_nnc_symbolic_graph_backward_exec_io'
721 for (i = 0; i < back_output_size; i++)
32
The value 0 is assigned to 'i'
33
Assuming 'i' is < 'back_output_size'
34
Loop condition is true. Entering loop body
722 if (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
35
Assuming the condition is true
36
Taking true branch
723 {
724 const int d = back_output_map[i];
37
Assigned value is garbage or undefined
725 const ccv_array_t* const s_refs = tensor_symbol_info[d].s_ref;
726 const int s_ref = s_refs && s_refs->rnum > graph_ref ? *(int*)ccv_array_get(s_refs, graph_ref)((void*)(((char*)((s_refs)->data)) + (size_t)(s_refs)->
rsize * (size_t)(graph_ref)))
- 1 : -1;
727 if (s_ref >= 0)
728 {
729 ccv_nnc_tensor_symbol_t sub_wrt_symbol = {
730 .d = s_ref,
731 .graph = sub_graph,
732 };
733 ccv_array_push(sub_wrt_symbols, &sub_wrt_symbol);
734 } else
735 ccv_array_push(sub_wrt_symbols, &NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
736 }
737 ccv_array_clear(sub_f_symbols);
738 for (i = 0; i < back_input_size; i++)
739 if (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
740 {
741 const int d = back_input_map[i];
742 ccv_nnc_tensor_symbol_t sub_f_symbol = {
743 .d = *(int*)ccv_array_get(tensor_symbol_info[d].s_ref, graph_ref)((void*)(((char*)((tensor_symbol_info[d].s_ref)->data)) + (
size_t)(tensor_symbol_info[d].s_ref)->rsize * (size_t)(graph_ref
)))
- 1,
744 .graph = sub_graph,
745 };
746 ccv_array_push(sub_f_symbols, &sub_f_symbol);
747 }
748 // Go through all its assignments (parameterized loop), making them either wrt or f.
749 // The reason is these must flow through the graph, otherwise we cannot form a full
750 // enclosed loop. Also because they are the additional f / wrt symbols, there is
751 // no case that we cannot find their corresponding gradients in the backward sub graphs
752 // (these gradients have to be parameterized to form an enclosed loop as well).
753 for (i = 0; i < sub_graph->tensor_symbol_info->rnum; i++)
754 {
755 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, i)((void*)(((char*)((sub_graph->tensor_symbol_info)->data
)) + (size_t)(sub_graph->tensor_symbol_info)->rsize * (
size_t)(i)))
;
756 if (tensor_symbol_info->assign_ref)
757 {
758 const int assign_ref = tensor_symbol_info->assign_ref - 1;
759 // i is the wrt, assign_ref is the f.
760 int flag = 0;
761 for (j = 0; !flag && j < sub_wrt_symbols->rnum; j++)
762 flag = (((ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, j)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols
)->rsize * (size_t)(j)))
)->d == i);
763 if (!flag)
764 {
765 ccv_nnc_tensor_symbol_t sub_wrt_symbol = {
766 .d = i,
767 .graph = sub_graph,
768 };
769 ccv_array_push(sub_wrt_symbols, &sub_wrt_symbol);
770 }
771 flag = 0;
772 for (j = 0; !flag && j < sub_f_symbols->rnum; j++)
773 flag = (((ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, j)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols
)->rsize * (size_t)(j)))
)->d == assign_ref);
774 if (!flag)
775 {
776 ccv_nnc_tensor_symbol_t sub_f_symbol = {
777 .d = assign_ref,
778 .graph = sub_graph,
779 };
780 ccv_array_push(sub_f_symbols, &sub_f_symbol);
781 }
782 }
783 }
784}
785
786// Check whether for a given f_symbol, we can compute wrt_symbols at all, if we can, tag the minimal io and ops (some ops can be replaced with noop) required to do so.
787static int _ccv_nnc_symbolic_graph_backward_prep_prune_ops(const ccv_nnc_symbolic_graph_backward_prep_t* const backward_prep, const ccv_nnc_tensor_symbol_t* const f_symbols, const int f_symbol_size, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
788{
789 int i, j, p;
790 const int tensor_symbol_info_size = backward_prep->tensor_symbol_info_size;
791 const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = backward_prep->exec_symbol_info;
792 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info =backward_prep->tensor_symbol_info;
793 const ccv_nnc_graph_visit_t* const forward_visit = backward_prep->forward_visit;
794 // Now, for each one of these, find a reverse graph.
795 ccv_nnc_graph_backward_info_t* const backward_info = backward_prep->backward_info;
796 const ccv_nnc_graph_visit_t* const backward_visit = backward_prep->backward_visit;
797 // Find the f_symbols, and tag its flows.
798 ccv_nnc_graph_visit_for(backward_visit, backward_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (backward_visit)->size; _i_
++) { const int idx __attribute__((unused)) = (backward_visit
)->node[_i_].index; const int _node_unused_ __attribute__(
(unused)) = (backward_visit)->node[_i_].term; typeof ((backward_info
)) const node __attribute__((unused)) = (backward_info) + idx
;
{
799 int f = node->f_wrt & 0x1;
800 for (i = 0; i < exec_symbol_info[idx].output_size && !f; i++)
801 {
802 int d = exec_symbol_info[idx].outputs[i];
803 if (d < 0)
804 continue;
805 while (tensor_symbol_info[d].alias_ref)
806 d = tensor_symbol_info[d].alias_ref - 1;
807 for (j = 0; j < f_symbol_size && !f; j++)
808 if (d == f_symbols[j].d)
809 f = 1;
810 }
811 if (f)
812 {
813 node->f_wrt |= f;
814 if (node->outgoings)
815 for (i = 0; i < node->outgoings->rnum; i++)
816 {
817 int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
818 backward_info[d].f_wrt |= f;
819 }
820 }
821 } ccv_nnc_graph_visit_endfor} }
822 // Find the wrt_symbols, and tag its flows.
823 ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_
++) { const int idx __attribute__((unused)) = (forward_visit)
->node[_i_].index; const int _node_unused_ __attribute__((
unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info
)) const node __attribute__((unused)) = (exec_symbol_info) + idx
;
{
824 int wrt = backward_info[idx].f_wrt & 0x2;
825 for (i = 0; i < node->input_size && !wrt; i++)
826 {
827 int d = node->inputs[i];
828 if (d < 0)
829 continue;
830 while (tensor_symbol_info[d].alias_ref)
831 d = tensor_symbol_info[d].alias_ref - 1;
832 for (j = 0; j < wrt_symbol_size && !wrt; j++)
833 {
834 int wrt_d = wrt_symbols[j].d;
835 // Find the root of this tensor alias.
836 if (tensor_symbol_info[wrt_d].alias_ref)
837 wrt_d = tensor_symbol_info[wrt_d].alias_ref - 1;
838 if (d == wrt_d)
839 wrt = 0x2;
840 }
841 }
842 if (wrt)
843 {
844 backward_info[idx].f_wrt |= wrt;
845 if (node->outgoings)
846 for (i = 0; i < node->outgoings->rnum; i++)
847 {
848 int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
849 backward_info[d].f_wrt |= wrt;
850 }
851 }
852 } ccv_nnc_graph_visit_endfor} }
853 enum {
854 WRT_SYMBOL_USE = 1,
855 F_SYMBOL_USE = 2
856 };
857 uint8_t* used_grad = (uint8_t*)cccalloccalloc(tensor_symbol_info_size, sizeof(uint8_t));
858 // First, all f_symbols and wrt_symbols are used.
859 for (i = 0; i < f_symbol_size; i++)
860 if (f_symbols[i].d >= 0)
861 used_grad[tensor_symbol_info[f_symbols[i].d].alias_ref ? tensor_symbol_info[f_symbols[i].d].alias_ref - 1 : f_symbols[i].d] |= F_SYMBOL_USE;
862 for (i = 0; i < wrt_symbol_size; i++)
863 if (wrt_symbols[i].d >= 0)
864 used_grad[tensor_symbol_info[wrt_symbols[i].d].alias_ref ? tensor_symbol_info[wrt_symbols[i].d].alias_ref - 1 : wrt_symbols[i].d] |= WRT_SYMBOL_USE;
865 // Do optimistic assumption, and then compute used_grad
866 ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, _, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_
++) { const int idx __attribute__((unused)) = (forward_visit)
->node[_i_].index; const int _node_unused_ __attribute__((
unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info
)) const _ __attribute__((unused)) = (exec_symbol_info) + idx
;
{
867 ccv_nnc_graph_backward_info_t* node = backward_info + idx;
868 /* Only interested in the ones on the f / wrt flow */
869 if ((node->f_wrt & 0x3) == 0x3)
870 {
871 const ccv_nnc_graph_exec_symbol_info_t* forw_exec = exec_symbol_info + idx;
872 ccv_nnc_cmd_t cmd = forw_exec->cmd;
873 if (cmd.cmd != CCV_NNC_NOOP)
874 cmd.cmd += 1; /* Backward command is the one after forward command. */
875 assert(ccv_nnc_cmd_is_backward(cmd) || cmd.cmd == CCV_NNC_NOOP)((void) sizeof ((ccv_nnc_cmd_is_backward(cmd) || cmd.cmd == CCV_NNC_NOOP
) ? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_is_backward(cmd)
|| cmd.cmd == CCV_NNC_NOOP) ; else __assert_fail ("ccv_nnc_cmd_is_backward(cmd) || cmd.cmd == CCV_NNC_NOOP"
, "ccv_nnc_symbolic_graph_backward.c", 875, __extension__ __PRETTY_FUNCTION__
); }))
;
876 for (i = 0; i < forw_exec->output_size * 2 + forw_exec->input_size; i++)
877 if (!(i >= forw_exec->output_size && i < forw_exec->output_size + forw_exec->input_size &&
878 forw_exec->inputs[i - forw_exec->output_size] < 0) && // If the input is empty, no need.
879 !(i >= forw_exec->output_size + forw_exec->input_size && i < forw_exec->output_size * 2 + forw_exec->input_size &&
880 forw_exec->outputs[i - forw_exec->output_size - forw_exec->input_size] < 0) && // If the output is empty, no need.
881 !(i < forw_exec->output_size && forw_exec->outputs[i] < 0)) // If the output is empty for gradient, no need.
882 node->input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
883 for (i = 0; i < forw_exec->input_size; i++)
884 if (!(forw_exec->inputs[i] < 0)) // If the inputs is empty, no need.
885 node->output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
886 int maybe_noop = 1;
887 for (i = 0; i < forw_exec->input_size; i++)
888 /* See if it is used as wrt, if not, no need to run this node at all. */
889 if (forw_exec->inputs[i] >= 0 && used_grad[tensor_symbol_info[forw_exec->inputs[i]].alias_ref ? tensor_symbol_info[forw_exec->inputs[i]].alias_ref - 1 : forw_exec->inputs[i]] & WRT_SYMBOL_USE)
890 {
891 maybe_noop = 0;
892 break;
893 }
894 if (maybe_noop)
895 {
896 for (i = 0; i < node->input_bitmask_size; i++)
897 node->input_bitmasks[i] = 0;
898 for (i = 0; i < node->output_bitmask_size; i++)
899 node->output_bitmasks[i] = 0;
900 node->output_bitmask_size = 0;
901 } else if (cmd.cmd == CCV_NNC_GRAPH_FORWARD || cmd.cmd == CCV_NNC_GRAPH_BACKWARD) {
902 // Clear out all potential outputs if we think it is not a wrt symbols.
903 for (i = 0; i < forw_exec->input_size; i++)
904 if ((node->output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) &&
905 !(used_grad[tensor_symbol_info[forw_exec->inputs[i]].alias_ref ? tensor_symbol_info[forw_exec->inputs[i]].alias_ref - 1 : forw_exec->inputs[i]] & WRT_SYMBOL_USE))
906 node->output_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
907 // But for now, assuming we need all input gradients.
908 // Clear out all inputs / outputs from forward op.
909 for (i = forw_exec->output_size; i < forw_exec->output_size * 2 + forw_exec->input_size; i++)
910 node->input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
911 } else if (ccv_nnc_cmd_bitmask(cmd, forw_exec->output_size * 2 + forw_exec->input_size, forw_exec->input_size, node->input_bitmasks, node->input_bitmask_size, node->output_bitmasks, node->output_bitmask_size)) {
912 int flag; /* Only continue if it changed */
913 do {
914 flag = 0;
915 /* Check if the output first */
916 for (i = 0; i < forw_exec->input_size; i++)
917 /* Only try to eliminate the one that is not used. */
918 if ((node->output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) &&
919 !(used_grad[tensor_symbol_info[forw_exec->inputs[i]].alias_ref ? tensor_symbol_info[forw_exec->inputs[i]].alias_ref - 1 : forw_exec->inputs[i]] & WRT_SYMBOL_USE))
920 {
921 node->output_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
922 /* If it worked, mark it as flagged. */
923 if (ccv_nnc_cmd_bitmask(cmd, forw_exec->output_size * 2 + forw_exec->input_size, forw_exec->input_size, node->input_bitmasks, node->input_bitmask_size, node->output_bitmasks, node->output_bitmask_size))
924 flag = 1;
925 else /* Refit this with the bit back again. */
926 node->output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
927 }
928 for (i = 0; i < forw_exec->output_size * 2 + forw_exec->input_size; i++)
929 if ((node->input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) &&
930 (i >= forw_exec->output_size ||
931 !(used_grad[tensor_symbol_info[forw_exec->outputs[i]].alias_ref ? tensor_symbol_info[forw_exec->outputs[i]].alias_ref - 1 : forw_exec->outputs[i]] & F_SYMBOL_USE)))
932 { /* Try to eliminate one of the input. */
933 node->input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
934 /* If it worked, mark it as flagged. */
935 if (ccv_nnc_cmd_bitmask(cmd, forw_exec->output_size * 2 + forw_exec->input_size, forw_exec->input_size, node->input_bitmasks, node->input_bitmask_size, node->output_bitmasks, node->output_bitmask_size))
936 flag = 1;
937 else /* Refit this with the bit back again. */
938 node->input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
939 }
940 } while (flag);
941 }
942 for (i = 0; i < forw_exec->output_size; i++)
943 if (node->input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
944 /* Mark it is used as wrt. */
945 used_grad[tensor_symbol_info[forw_exec->outputs[i]].alias_ref ? tensor_symbol_info[forw_exec->outputs[i]].alias_ref - 1 : forw_exec->outputs[i]] |= WRT_SYMBOL_USE;
946 for (i = 0; i < forw_exec->input_size; i++)
947 /* Mark it is used as f. */
948 if (node->output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
949 used_grad[tensor_symbol_info[forw_exec->inputs[i]].alias_ref ? tensor_symbol_info[forw_exec->inputs[i]].alias_ref - 1 : forw_exec->inputs[i]] |= F_SYMBOL_USE;
950 }
951 } ccv_nnc_graph_visit_endfor} }
952 ccv_array_t* sub_f_symbols = 0;
953 ccv_array_t* sub_wrt_symbols = 0;
954 ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, _, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_
++) { const int idx __attribute__((unused)) = (forward_visit)
->node[_i_].index; const int _node_unused_ __attribute__((
unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info
)) const _ __attribute__((unused)) = (exec_symbol_info) + idx
;
{
955 ccv_nnc_graph_backward_info_t* node = backward_info + idx;
956 const ccv_nnc_graph_exec_symbol_info_t* forw_exec = exec_symbol_info + idx;
957 /* Only interested in the ones on the f / wrt flow */
958 if ((node->f_wrt & 0x3) == 0x3 && forw_exec->graph_ref_size > 0)
959 {
960 uint64_t stack_input_bitmasks1[node->input_bitmask_size];
961 uint64_t stack_input_bitmasks2[node->input_bitmask_size];
962 uint64_t* const input_bitmasks = forw_exec->graph_ref_size > 1 ? stack_input_bitmasks1 : node->input_bitmasks;
963 // We collect input masks into this location.
964 if (forw_exec->graph_ref_size > 1)
965 memset(stack_input_bitmasks2, 0, sizeof(uint64_t) * node->input_bitmask_size);
966 for (p = 0; p < forw_exec->graph_ref_size; p++)
967 {
968 // Reset the stack input bitmasks.
969 if (forw_exec->graph_ref_size > 1)
970 memcpy(stack_input_bitmasks1, node->input_bitmasks, sizeof(uint64_t) * node->input_bitmask_size);
971 // Now calling it recursively until we are sure no f_symbols can be removed.
972 const int graph_ref = CCV_NNC_GRAPH_REF(forw_exec)((forw_exec)->_heap_graph_ref ? (forw_exec)->_heap_graph_ref
: (forw_exec)->_inline_graph_ref)
[p] - 1;
973 ccv_nnc_symbolic_graph_backward_prep_t* const sub_prep = backward_prep->sub_preps + graph_ref;
974 if (!sub_wrt_symbols)
975 sub_wrt_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
976 else
977 ccv_array_clear(sub_wrt_symbols);
978 for (i = 0; i < forw_exec->input_size; i++)
979 if (node->output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
980 {
981 const ccv_array_t* const s_refs = tensor_symbol_info[forw_exec->inputs[i]].s_ref;
982 const int s_ref = s_refs && s_refs->rnum > graph_ref ? *(int*)ccv_array_get(s_refs, graph_ref)((void*)(((char*)((s_refs)->data)) + (size_t)(s_refs)->
rsize * (size_t)(graph_ref)))
- 1 : -1;
983 if (s_ref >= 0)
984 {
985 ccv_nnc_tensor_symbol_t sub_wrt_symbol = {
986 .d = s_ref,
987 .graph = sub_prep->graph,
988 };
989 ccv_array_push(sub_wrt_symbols, &sub_wrt_symbol);
990 }
991 }
992 int flag; // Only continue if it changed */
993 do {
994 flag = 0;
995 for (i = 0; i < forw_exec->output_size; i++)
996 // Try to reduce number of inputs for the backward graph. If it is not tagged as F_SYMBOL_USE, we can reduce it.
997 // It is reducible because this sub graph may have multiple computation paths, therefore, some of these may not
998 // involve our wrt symbols at all.
999 if (!(used_grad[tensor_symbol_info[forw_exec->outputs[i]].alias_ref ? tensor_symbol_info[forw_exec->outputs[i]].alias_ref - 1 : forw_exec->outputs[i]] & F_SYMBOL_USE) &&
1000 input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
1001 { /* Try to eliminate one of the input. */
1002 input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
1003 if (!sub_f_symbols)
1004 sub_f_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
1005 else
1006 ccv_array_clear(sub_f_symbols);
1007 for (j = 0; j < forw_exec->output_size; j++)
1008 if (node->input_bitmasks[j >> 6] & ((uint64_t)1 << (j & 63)))
1009 {
1010 const int s_ref = *(int*)ccv_array_get(tensor_symbol_info[forw_exec->outputs[j]].s_ref, graph_ref)((void*)(((char*)((tensor_symbol_info[forw_exec->outputs[j
]].s_ref)->data)) + (size_t)(tensor_symbol_info[forw_exec->
outputs[j]].s_ref)->rsize * (size_t)(graph_ref)))
- 1;
1011 assert(s_ref >= 0)((void) sizeof ((s_ref >= 0) ? 1 : 0), __extension__ ({ if
(s_ref >= 0) ; else __assert_fail ("s_ref >= 0", "ccv_nnc_symbolic_graph_backward.c"
, 1011, __extension__ __PRETTY_FUNCTION__); }))
;
1012 ccv_nnc_tensor_symbol_t sub_f_symbol = {
1013 .d = s_ref,
1014 .graph = sub_prep->graph,
1015 };
1016 ccv_array_push(sub_f_symbols, &sub_f_symbol);
1017 }
1018 if (_ccv_nnc_symbolic_graph_backward_prep_prune_ops(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, 0)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols
)->rsize * (size_t)(0)))
, sub_f_symbols->rnum, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols
)->rsize * (size_t)(0)))
, sub_wrt_symbols->rnum, ccv_nnc_symbolic_graph_sources(sub_prep->graph), ccv_nnc_symbolic_graph_source_size(sub_prep->graph), ccv_nnc_symbolic_graph_destinations(sub_prep->graph), ccv_nnc_symbolic_graph_destination_size(sub_prep->graph)))
1019 flag = 1;
1020 else /* Refit this with the bit back again. */
1021 input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
1022 }
1023 } while (flag);
1024 // I am done, need to redo above for sub_prep, and it has to be successful now.
1025 if (!sub_f_symbols)
1026 sub_f_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
1027 else
1028 ccv_array_clear(sub_f_symbols);
1029 for (i = 0; i < forw_exec->output_size; i++)
1030 if (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
1031 {
1032 const int s_ref = *(int*)ccv_array_get(tensor_symbol_info[forw_exec->outputs[i]].s_ref, graph_ref)((void*)(((char*)((tensor_symbol_info[forw_exec->outputs[i
]].s_ref)->data)) + (size_t)(tensor_symbol_info[forw_exec->
outputs[i]].s_ref)->rsize * (size_t)(graph_ref)))
- 1;
1033 assert(s_ref >= 0)((void) sizeof ((s_ref >= 0) ? 1 : 0), __extension__ ({ if
(s_ref >= 0) ; else __assert_fail ("s_ref >= 0", "ccv_nnc_symbolic_graph_backward.c"
, 1033, __extension__ __PRETTY_FUNCTION__); }))
;
1034 ccv_nnc_tensor_symbol_t sub_f_symbol = {
1035 .d = s_ref,
1036 .graph = sub_prep->graph,
1037 };
1038 ccv_array_push(sub_f_symbols, &sub_f_symbol);
1039 }
1040 _ccv_nnc_symbolic_graph_backward_prep_prune_ops(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, 0)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols
)->rsize * (size_t)(0)))
, sub_f_symbols->rnum, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols
)->rsize * (size_t)(0)))
, sub_wrt_symbols->rnum, ccv_nnc_symbolic_graph_sources(sub_prep->graph), ccv_nnc_symbolic_graph_source_size(sub_prep->graph), ccv_nnc_symbolic_graph_destinations(sub_prep->graph), ccv_nnc_symbolic_graph_destination_size(sub_prep->graph));
1041 if (forw_exec->graph_ref_size > 1)
1042 for (i = 0; i < node->input_bitmask_size; i++)
1043 stack_input_bitmasks2[i] |= input_bitmasks[i];
1044 }
1045 if (forw_exec->graph_ref_size > 1)
1046 memcpy(node->input_bitmasks, stack_input_bitmasks2, sizeof(uint64_t) * node->input_bitmask_size);
1047 }
1048 } ccv_nnc_graph_visit_endfor} }
1049 if (sub_f_symbols)
1050 ccv_array_free(sub_f_symbols);
1051 if (sub_wrt_symbols)
1052 ccv_array_free(sub_wrt_symbols);
1053 int flag = 1;
1054 for (i = 0; i < f_symbol_size && flag; i++)
1055 flag = (used_grad[tensor_symbol_info[f_symbols[i].d].alias_ref ? tensor_symbol_info[f_symbols[i].d].alias_ref - 1 : f_symbols[i].d] & WRT_SYMBOL_USE);
1056 ccfreefree(used_grad);
1057 return flag;
1058}
1059
1060static void _ccv_nnc_symbolic_graph_backward_prep_gen(ccv_nnc_symbolic_graph_backward_prep_t* const backward_prep, const ccv_nnc_tensor_symbol_t* const f_symbols, const int f_symbol_size, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, const int is_while, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
1061{
1062 const int exec_symbol_info_size = backward_prep->exec_symbol_info_size;
1063 const int tensor_symbol_info_size = backward_prep->tensor_symbol_info_size;
1064 const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = backward_prep->exec_symbol_info;
1065 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info =backward_prep->tensor_symbol_info;
1066 const ccv_nnc_graph_visit_t* const forward_visit = backward_prep->forward_visit;
1067 // Now, for each one of these, find a reverse graph.
1068 ccv_nnc_graph_backward_info_t* const backward_info = backward_prep->backward_info;
1069 const ccv_nnc_graph_visit_t* const backward_visit = backward_prep->backward_visit;
1070 int i, j;
1071 // Now, only the flow from f_symbols back to wrt_symbols are interested to us.
1072 // Visit the graph in reverse order, build the AD nodes.
1073 ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs = (ccv_nnc_autograd_graph_exec_symbol_t*)cccalloccalloc(exec_symbol_info_size, sizeof(ccv_nnc_autograd_graph_exec_symbol_t));
1074 int max_forw_input_size = 0, max_forw_output_size = 0;
1075 for (i = 0; i < exec_symbol_info_size; i++)
1076 if ((backward_info[i].f_wrt & 0x3) == 0x3)
1077 {
1078 max_forw_input_size = ccv_max(max_forw_input_size, exec_symbol_info[i].input_size)({ typeof (max_forw_input_size) _a = (max_forw_input_size); typeof
(exec_symbol_info[i].input_size) _b = (exec_symbol_info[i].input_size
); (_a > _b) ? _a : _b; })
;
1079 max_forw_output_size = ccv_max(max_forw_output_size, exec_symbol_info[i].output_size)({ typeof (max_forw_output_size) _a = (max_forw_output_size);
typeof (exec_symbol_info[i].output_size) _b = (exec_symbol_info
[i].output_size); (_a > _b) ? _a : _b; })
;
1080 if (backward_info[i].outgoings)
1081 {
1082 // Copy over the outgoing bits.
1083 autograd_execs[i].outgoings = ccv_array_new(sizeof(int), backward_info[i].outgoings->rnum, 0);
1084 for (j = 0; j < backward_info[i].outgoings->rnum; j++)
1085 {
1086 const int d = *(int*)ccv_array_get(backward_info[i].outgoings, j)((void*)(((char*)((backward_info[i].outgoings)->data)) + (
size_t)(backward_info[i].outgoings)->rsize * (size_t)(j)))
;
1087 // Only push the outgoing node if it is in the f_wrt path.
1088 if ((backward_info[d].f_wrt & 0x3) == 0x3)
1089 ccv_array_push(autograd_execs[i].outgoings, &d);
1090 }
1091 }
1092 }
1093 int max_forw_inputs[ccv_max(1, max_forw_input_size)({ typeof (1) _a = (1); typeof (max_forw_input_size) _b = (max_forw_input_size
); (_a > _b) ? _a : _b; })
];
1094 int max_forw_outputs[ccv_max(1, max_forw_output_size)({ typeof (1) _a = (1); typeof (max_forw_output_size) _b = (max_forw_output_size
); (_a > _b) ? _a : _b; })
];
1095 ccv_nnc_autograd_tensor_version_t* const autograd_tensor_versions = (ccv_nnc_autograd_tensor_version_t*)cccalloccalloc(tensor_symbol_info_size, sizeof(ccv_nnc_autograd_tensor_version_t));
1096 ccv_array_t* autograd_tensor_symbols = ccv_array_new(sizeof(ccv_nnc_autograd_tensor_symbol_t), tensor_symbol_info_size, 0);
1097 ccv_array_t* sum_or_set_execs = ccv_array_new(sizeof(ccv_nnc_sum_or_set_graph_exec_symbol_t), 0, 0);
1098 ccv_nnc_graph_visit_for(backward_visit, backward_info, back_info_node, idx){ int _i_; for (_i_ = 0; _i_ < (backward_visit)->size; _i_
++) { const int idx __attribute__((unused)) = (backward_visit
)->node[_i_].index; const int _node_unused_ __attribute__(
(unused)) = (backward_visit)->node[_i_].term; typeof ((backward_info
)) const back_info_node __attribute__((unused)) = (backward_info
) + idx;
{
1099 /* This is required by both f flow and wrt flow, therefore, an interest to us */
1100 if ((back_info_node->f_wrt & 0x3) == 0x3)
1101 {
1102 const ccv_nnc_graph_exec_symbol_info_t* forw_exec = exec_symbol_info + idx;
1103 ccv_nnc_autograd_graph_exec_symbol_t* back_exec = autograd_execs + idx;
1104 back_exec->cmd = forw_exec->cmd;
1105 if (back_exec->cmd.cmd != CCV_NNC_NOOP)
1106 back_exec->cmd.cmd += 1; /* Backward command is the one after forward command. */
1107 assert(ccv_nnc_cmd_is_backward(back_exec->cmd) || back_exec->cmd.cmd == CCV_NNC_NOOP)((void) sizeof ((ccv_nnc_cmd_is_backward(back_exec->cmd) ||
back_exec->cmd.cmd == CCV_NNC_NOOP) ? 1 : 0), __extension__
({ if (ccv_nnc_cmd_is_backward(back_exec->cmd) || back_exec
->cmd.cmd == CCV_NNC_NOOP) ; else __assert_fail ("ccv_nnc_cmd_is_backward(back_exec->cmd) || back_exec->cmd.cmd == CCV_NNC_NOOP"
, "ccv_nnc_symbolic_graph_backward.c", 1107, __extension__ __PRETTY_FUNCTION__
); }))
;
1108 if (!back_info_node->output_bitmask_size) /* This has no output, can be a noop. */
1109 back_exec->cmd.cmd = CCV_NNC_NOOP;
1110 else {
1111 int* back_input_map = max_forw_outputs;
1112 int* back_output_map = max_forw_inputs;
1113 _ccv_nnc_symbolic_graph_backward_exec_io(forw_exec, &back_input_map, &back_output_map, &back_exec->input_size, &back_exec->output_size);
1114 back_exec->inputs = ccmallocmalloc(sizeof(int) * (back_exec->input_size + back_exec->output_size));
1115 back_exec->outputs = back_exec->inputs + back_exec->input_size;
1116 /* Need to compute input before we compute output */
1117 for (i = 0; i < back_exec->input_size; i++)
1118 {
1119 /* If we can skip this input, do that. */
1120 if (!(back_info_node->input_bitmasks[i >> 6] & ((uint64_t)1 << i)))
1121 continue;
1122 const int d = back_input_map[i];
1123 const int alias_ref = tensor_symbol_info[d].alias_ref;
1124 ccv_nnc_autograd_tensor_version_t* tensor_ver = alias_ref ? autograd_tensor_versions + (alias_ref - 1) : autograd_tensor_versions + d;
1125 /* Initialization tensor, should corresponding to f symbols */
1126 if (!tensor_ver->ref_version)
1127 {
1128 ccv_nnc_autograd_tensor_symbol_t tensor_sym = {};
1129 if (!alias_ref)
1130 {
1131 tensor_sym.d = d;
1132 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
1133 const ccv_nnc_tensor_ref_t tensor_ref = {
1134 .d = autograd_tensor_symbols->rnum - 1,
1135 .x = idx,
1136 .alias_registry = 0
1137 };
1138 tensor_ver->ref_version = ccv_array_new(sizeof(ccv_nnc_tensor_ref_t), 1, 0);
1139 ccv_array_push(tensor_ver->ref_version, &tensor_ref);
1140 } else {
1141 tensor_sym.d = alias_ref - 1;
1142 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
1143 const ccv_nnc_tensor_ref_t tensor_ref = {
1144 .d = autograd_tensor_symbols->rnum - 1,
1145 .x = idx,
1146 .alias_registry = ccv_array_new(sizeof(int), 1, 0)
1147 };
1148 tensor_ver->ref_version = ccv_array_new(sizeof(ccv_nnc_tensor_ref_t), 1, 0);
1149 ccv_array_push(tensor_ver->ref_version, &tensor_ref);
1150 tensor_sym.d = d; /* set back */
1151 tensor_sym.alias_ref = tensor_ref.d + 1;
1152 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
1153 const int ad = autograd_tensor_symbols->rnum - 1;
1154 ccv_array_push(tensor_ref.alias_registry, &ad);
1155 }
1156 }
1157 /* The simplest case (most common), it is not an alias. */
1158 if (!alias_ref)
1159 {
1160 /* Even simpler, this only have one reference tensor, thus, pass this as input. */
1161 if (tensor_ver->c == tensor_ver->ref_version->rnum - 1)
1162 {
1163 ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, tensor_ver->c)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(tensor_ver
->c)))
;
1164 /* There are alias associated with this tensor ref, zero it out when this tensor is allocated. */
1165 /* This is is required. Consider the case that we have an alias of this tensor used somehwere */
1166 /* on forward pass, when we compute backward, we have that alias computed first, however, its */
1167 /* underlying tensor is not zero initialized, and we will end up with garbage values here. */
1168 if (tensor_ref->alias_registry &&
1169 /* Loop over to see if this tensor is fully occupied to avoid extra zero step. */
1170 !_ccv_nnc_tensor_ref_fully_assigned_with_aliases(tensor_ref, autograd_tensor_symbols, tensor_symbol_info))
1171 {
1172 ccv_nnc_autograd_tensor_symbol_t* tensor_sym = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref->
d)))
;
1173 assert(tensor_sym->alias_ref == 0)((void) sizeof ((tensor_sym->alias_ref == 0) ? 1 : 0), __extension__
({ if (tensor_sym->alias_ref == 0) ; else __assert_fail (
"tensor_sym->alias_ref == 0", "ccv_nnc_symbolic_graph_backward.c"
, 1173, __extension__ __PRETTY_FUNCTION__); }))
;
1174 tensor_sym->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS;
1175 }
1176 back_exec->inputs[i] = tensor_ref->d;
1177 } else {
1178 /* Otherwise, we need to sum them up, and then pass the summed result to the computation. */
1179 _ccv_nnc_graph_sum_autograd_tensor_versions(idx, d, exec_symbol_info_size, tensor_symbol_info, tensor_ver, autograd_execs, autograd_tensor_symbols, sum_or_set_execs);
1180 ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, tensor_ver->c)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(tensor_ver
->c)))
;
1181 back_exec->inputs[i] = tensor_ref->d;
1182 }
1183 } else
1184 /* If this is an alias, go through all available tensor ref versions */
1185 back_exec->inputs[i] = _ccv_nnc_graph_sum_autograd_tensor_versions_alias(idx, d, tensor_symbol_info, exec_symbol_info_size, tensor_symbol_info + d, tensor_ver, autograd_execs, autograd_tensor_symbols, sum_or_set_execs);
1186 }
1187 for (i = 0; i < back_exec->output_size; i++)
1188 {
1189 /* If we can skip this output, do that. */
1190 if (!(back_info_node->output_bitmasks[i >> 6] & ((uint64_t)1 << i)))
1191 continue;
1192 const int d = back_output_map[i];
1193 const int alias_ref = tensor_symbol_info[d].alias_ref;
1194 ccv_nnc_autograd_tensor_symbol_t tensor_sym = {
1195 .d = d
1196 };
1197 /* The simplest case (most common), it is not an alias. */
1198 if (!alias_ref)
1199 {
1200 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
1201 const ccv_nnc_tensor_ref_t tensor_ref = {
1202 .d = autograd_tensor_symbols->rnum - 1,
1203 .x = idx,
1204 .exec_registry = 0,
1205 .alias_registry = 0
1206 };
1207 ccv_nnc_autograd_tensor_version_t* tensor_ver = autograd_tensor_versions + d;
1208 if (!tensor_ver->ref_version)
1209 tensor_ver->ref_version = ccv_array_new(sizeof(ccv_nnc_tensor_ref_t), 1, 0);
1210 ccv_array_push(tensor_ver->ref_version, &tensor_ref);
1211 back_exec->outputs[i] = tensor_ref.d;
1212 } else {
1213 /* Otherwise, in case that this is an alias, we try to find the existing one (in tensor_ver
1214 * see if can meet the need (thus, for the tensor info / ofs, it fits). */
1215 ccv_nnc_autograd_tensor_version_t* tensor_ver = autograd_tensor_versions + (alias_ref - 1);
1216 if (!tensor_ver->ref_version)
1217 tensor_ver->ref_version = ccv_array_new(sizeof(ccv_nnc_tensor_ref_t), 1, 0);
1218 /* If already exists a ref version, check if any of these not-sealed tensors have free space. */
1219 int found = 0;
1220 for (j = tensor_ver->c; !found && j < tensor_ver->ref_version->rnum; j++)
1221 {
1222 ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, j)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(j)))
;
1223 if (!_ccv_nnc_tensor_ref_version_involve_alias(tensor_ref, autograd_tensor_symbols, tensor_symbol_info, tensor_symbol_info + d))
1224 {
1225 tensor_sym.alias_ref = tensor_ref->d + 1;
1226 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
1227 const int ad = autograd_tensor_symbols->rnum - 1;
1228 ccv_array_push(tensor_ref->alias_registry, &ad);
1229 if (!tensor_ref->exec_registry)
1230 tensor_ref->exec_registry = ccv_array_new(sizeof(int), 1, 0);
1231 ccv_array_push(tensor_ref->exec_registry, &idx);
1232 back_exec->outputs[i] = ad;
1233 found = 1;
1234 }
1235 }
1236 if (!found) /* Cannot find an tensor ref to insert, create one first */
1237 {
1238 tensor_sym.d = alias_ref - 1; /* Reference back to the non-alias. */
1239 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
1240 const ccv_nnc_tensor_ref_t tensor_ref = {
1241 .d = autograd_tensor_symbols->rnum - 1,
1242 .x = idx,
1243 .exec_registry = 0,
1244 .alias_registry = ccv_array_new(sizeof(int), 1, 0)
1245 };
1246 ccv_array_push(tensor_ver->ref_version, &tensor_ref);
1247 tensor_sym.d = d; /* set back */
1248 tensor_sym.alias_ref = tensor_ref.d + 1;
1249 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
1250 const int ad = autograd_tensor_symbols->rnum - 1;
1251 ccv_array_push(tensor_ref.alias_registry, &ad);
1252 back_exec->outputs[i] = ad;
1253 }
1254 }
1255 }
1256 }
1257 }
1258 } ccv_nnc_graph_visit_endfor} }
1259 // Find all relevant wrt symbols, generate sum for them if needed.
1260 for (i = 0; i < wrt_symbol_size; i++)
1261 {
1262 const int d = wrt_symbols[i].d;
1263 if (d < 0)
1264 continue;
1265 const int ref_d = (!tensor_symbol_info[d].alias_ref) ? d : tensor_symbol_info[d].alias_ref - 1;
1266 ccv_nnc_autograd_tensor_version_t* tensor_ver = autograd_tensor_versions + ref_d;
1267 if (!tensor_ver->ref_version)
1268 {
1269 // This wrt symbol is not available at all, for this case, we set its flag to init zero.
1270 const ccv_nnc_autograd_tensor_symbol_t tensor_sym = {
1271 .d = ref_d
1272 };
1273 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
1274 ccv_nnc_sum_or_set_graph_exec_symbol_t set_exec = {
1275 .value = 0,
1276 .output = autograd_tensor_symbols->rnum - 1,
1277 };
1278 ccv_array_push(sum_or_set_execs, &set_exec);
1279 // Insert the one to be set to zero.
1280 const ccv_nnc_tensor_ref_t tensor_ref = {
1281 .d = autograd_tensor_symbols->rnum - 1,
1282 .x = exec_symbol_info_size + sum_or_set_execs->rnum - 1,
1283 };
1284 tensor_ver->ref_version = ccv_array_new(sizeof(ccv_nnc_tensor_ref_t), 1, 0);
1285 ccv_array_push(tensor_ver->ref_version, &tensor_ref);
1286 continue;
1287 }
1288 // If it is a while loop, we need to insert an accumulator to the graph (this is expressed as a initialization tensor summed with existing results).
1289 // First, insert the initialization tensor if this wrt results is not used directly in next while loop (thus, it participates the computation, therefore, no need to accumulate).
1290 if (is_while && !tensor_symbol_info[ref_d].assign_ref &&
1291 _ccv_nnc_tensor_ref_version_find_init(tensor_ver) < 0) // If the initialization tensor is not inserted yet.
1292 {
1293 const ccv_nnc_autograd_tensor_symbol_t tensor_sym = {
1294 .d = ref_d
1295 };
1296 ccv_array_push(autograd_tensor_symbols, &tensor_sym);
1297 // Insert the one to be summed.
1298 const ccv_nnc_tensor_ref_t tensor_ref = {
1299 .d = autograd_tensor_symbols->rnum - 1,
1300 .x = -1, // This denotes it is an initialization vector.
1301 };
1302 ccv_array_push(tensor_ver->ref_version, &tensor_ref);
1303 }
1304 // If there are more than one tensor in the list, it is possible to sum them up.
1305 if (tensor_ver->c < tensor_ver->ref_version->rnum - 1)
1306 _ccv_nnc_graph_sum_autograd_tensor_versions(-1, ref_d, exec_symbol_info_size, tensor_symbol_info, tensor_ver, autograd_execs, autograd_tensor_symbols, sum_or_set_execs);
1307 // The tensor version should have ref_version, and only one now (after sum up).
1308 assert(tensor_ver->c == tensor_ver->ref_version->rnum - 1)((void) sizeof ((tensor_ver->c == tensor_ver->ref_version
->rnum - 1) ? 1 : 0), __extension__ ({ if (tensor_ver->
c == tensor_ver->ref_version->rnum - 1) ; else __assert_fail
("tensor_ver->c == tensor_ver->ref_version->rnum - 1"
, "ccv_nnc_symbolic_graph_backward.c", 1308, __extension__ __PRETTY_FUNCTION__
); }))
;
1309 }
1310 // Adding additional fields to backward_prep now.
1311 backward_prep->autograd_execs = autograd_execs;
1312 backward_prep->autograd_tensor_versions = autograd_tensor_versions;
1313 backward_prep->autograd_tensor_symbols = autograd_tensor_symbols;
1314 backward_prep->sum_or_set_execs = sum_or_set_execs;
1315 ccv_array_t* sub_f_symbols = 0;
1316 ccv_array_t* sub_wrt_symbols = 0;
1317 ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, _, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_
++) { const int idx __attribute__((unused)) = (forward_visit)
->node[_i_].index; const int _node_unused_ __attribute__((
unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info
)) const _ __attribute__((unused)) = (exec_symbol_info) + idx
;
{
1318 ccv_nnc_graph_backward_info_t* node = backward_info + idx;
1319 const ccv_nnc_graph_exec_symbol_info_t* forw_exec = exec_symbol_info + idx;
1320 /* Only interested in the ones on the f / wrt flow */
1321 if ((node->f_wrt & 0x3) == 0x3)
1322 {
1323 const int is_while = (forw_exec->flags & CCV_NNC_GRAPH_EXEC_P_WHILE);
1324 for (i = 0; i < forw_exec->graph_ref_size; i++)
1325 {
1326 // Now calling it recursively until we are sure no f_symbols can be removed.
1327 const int graph_ref = CCV_NNC_GRAPH_REF(forw_exec)((forw_exec)->_heap_graph_ref ? (forw_exec)->_heap_graph_ref
: (forw_exec)->_inline_graph_ref)
[i] - 1;
1328 ccv_nnc_symbolic_graph_backward_prep_t* const sub_prep = backward_prep->sub_preps + graph_ref;
1329 if (!sub_wrt_symbols)
1330 sub_wrt_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
1331 if (!sub_f_symbols)
1332 sub_f_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
1333 _ccv_nnc_symbolic_graph_backward_prep_sub_f_wrt_symbols(forw_exec, sub_prep->graph, graph_ref, tensor_symbol_info, node->input_bitmasks, node->output_bitmasks, sub_f_symbols, sub_wrt_symbols);
1334 _ccv_nnc_symbolic_graph_backward_prep_gen(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, 0)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols
)->rsize * (size_t)(0)))
, sub_f_symbols->rnum, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols
)->rsize * (size_t)(0)))
, sub_wrt_symbols->rnum, is_while, ccv_nnc_symbolic_graph_sources(sub_prep->graph), ccv_nnc_symbolic_graph_source_size(sub_prep->graph), ccv_nnc_symbolic_graph_destinations(sub_prep->graph), ccv_nnc_symbolic_graph_destination_size(sub_prep->graph));
1335 }
1336 }
1337 } ccv_nnc_graph_visit_endfor} }
1338 if (sub_f_symbols)
1339 ccv_array_free(sub_f_symbols);
1340 if (sub_wrt_symbols)
1341 ccv_array_free(sub_wrt_symbols);
1342}
1343
1344static void _ccv_nnc_symbolic_graph_backward_prep_free(const ccv_nnc_symbolic_graph_backward_prep_t backward_prep)
1345{
1346 int i, j;
1347 const int exec_symbol_info_size = backward_prep.exec_symbol_info_size;
1348 const int tensor_symbol_info_size = backward_prep.tensor_symbol_info_size;
1349 ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs = backward_prep.autograd_execs;
1350 if (autograd_execs)
1351 {
1352 for (i = 0; i < exec_symbol_info_size; i++)
1353 {
1354 if (autograd_execs[i].inputs)
1355 ccfreefree(autograd_execs[i].inputs);
1356 if (autograd_execs[i].outgoings)
1357 ccv_array_free(autograd_execs[i].outgoings);
1358 }
1359 ccfreefree(autograd_execs);
1360 }
1361 ccv_nnc_autograd_tensor_version_t* const autograd_tensor_versions = backward_prep.autograd_tensor_versions;
1362 if (autograd_tensor_versions)
1363 {
1364 for (i = 0; i < tensor_symbol_info_size; i++)
1365 {
1366 if (autograd_tensor_versions[i].ref_version)
1367 {
1368 for (j = 0; j < autograd_tensor_versions[i].ref_version->rnum; j++)
1369 {
1370 ccv_nnc_tensor_ref_t* ref_version = (ccv_nnc_tensor_ref_t*)ccv_array_get(autograd_tensor_versions[i].ref_version, j)((void*)(((char*)((autograd_tensor_versions[i].ref_version)->
data)) + (size_t)(autograd_tensor_versions[i].ref_version)->
rsize * (size_t)(j)))
;
1371 if (ref_version->exec_registry)
1372 ccv_array_free(ref_version->exec_registry);
1373 if (ref_version->alias_registry)
1374 ccv_array_free(ref_version->alias_registry);
1375 }
1376 ccv_array_free(autograd_tensor_versions[i].ref_version);
1377 }
1378 }
1379 ccfreefree(autograd_tensor_versions);
1380 }
1381 if (backward_prep.autograd_tensor_symbols)
1382 ccv_array_free(backward_prep.autograd_tensor_symbols);
1383 ccv_array_t* const sum_or_set_execs = backward_prep.sum_or_set_execs;
1384 if (sum_or_set_execs)
1385 {
1386 for (i = 0; i < sum_or_set_execs->rnum; i++)
1387 {
1388 ccv_nnc_sum_or_set_graph_exec_symbol_t* sum_or_set = (ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, i)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs
)->rsize * (size_t)(i)))
;
1389 if (sum_or_set->inputs)
1390 ccfreefree(sum_or_set->inputs);
1391 if (sum_or_set->outgoings)
1392 ccv_array_free(sum_or_set->outgoings);
1393 }
1394 ccv_array_free(sum_or_set_execs);
1395 }
1396 // Now afterwards, these are mandatory.
1397 ccv_nnc_graph_backward_info_t* const backward_info = backward_prep.backward_info;
1398 for (i = 0; i < exec_symbol_info_size; i++)
1399 {
1400 if (backward_info[i].outgoings)
1401 ccv_array_free(backward_info[i].outgoings);
1402 if (backward_info[i].input_bitmasks)
1403 ccfreefree(backward_info[i].input_bitmasks);
1404 }
1405 ccfreefree(backward_info);
1406 ccv_nnc_graph_visit_free(backward_prep.backward_visit);
1407 ccv_nnc_graph_visit_free(backward_prep.forward_visit);
1408 ccfreefree(backward_prep.exec_symbol_info);
1409 ccfreefree(backward_prep.tensor_symbol_info);
1410 for (i = 0; i < backward_prep.sub_prep_size; i++)
1411 _ccv_nnc_symbolic_graph_backward_prep_free(backward_prep.sub_preps[i]);
1412 if (backward_prep.sub_preps)
1413 ccfreefree(backward_prep.sub_preps);
1414}
1415
1416static void _ccv_nnc_add_backward_breakpoint_for_symbol(const ccv_nnc_symbolic_graph_backward_prep_t* const backward_prep, const ccv_nnc_graph_exec_symbol_t breakpoint, ccv_nnc_symbolic_graph_t* const graph, ccv_array_t* const sub_breakpoints)
1417{
1418 const ccv_nnc_graph_exec_symbol_t noop = ccv_nnc_graph_exec_symbol_new(graph, ccv_nnc_cmd(CCV_NNC_NOOP, 0, CMD_GENERIC()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}), 0), 0, 0, 0, 0, 0);
1419 ccv_array_push(sub_breakpoints, &noop);
1420 // Now need to hook this up to the graph.
1421 const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = backward_prep->exec_symbol_info;
1422 const ccv_nnc_graph_visit_t* const forward_visit = backward_prep->forward_visit;
1423 // Now, for each one of these, find a reverse graph.
1424 ccv_nnc_graph_backward_info_t* const backward_info = backward_prep->backward_info;
1425 int i;
1426 // Clean up the high bit.
1427 for (i = 0; i < backward_prep->exec_symbol_info_size; i++)
1428 backward_info[i].f_wrt &= ~0x4;
1429 assert((backward_info[breakpoint.d].f_wrt & 0x3) != 0x3)((void) sizeof (((backward_info[breakpoint.d].f_wrt & 0x3
) != 0x3) ? 1 : 0), __extension__ ({ if ((backward_info[breakpoint
.d].f_wrt & 0x3) != 0x3) ; else __assert_fail ("(backward_info[breakpoint.d].f_wrt & 0x3) != 0x3"
, "ccv_nnc_symbolic_graph_backward.c", 1429, __extension__ __PRETTY_FUNCTION__
); }))
;
1430 backward_info[breakpoint.d].f_wrt |= 0x4;
1431 const ccv_nnc_graph_visit_t* const backward_visit = backward_prep->backward_visit;
1432 const ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs = backward_prep->autograd_execs;
1433 // Going forward to find whether this breakpoint is a source node to some f_wrt nodes.
1434 ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, forw_exec, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_
++) { const int idx __attribute__((unused)) = (forward_visit)
->node[_i_].index; const int _node_unused_ __attribute__((
unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info
)) const forw_exec __attribute__((unused)) = (exec_symbol_info
) + idx;
{
1435 ccv_nnc_graph_backward_info_t* const node = backward_info + idx;
1436 // If it is tagged on breakpoint flow, but not as both f or wrt, flow through it.
1437 if ((node->f_wrt & 0x4) && (node->f_wrt & 0x3) != 0x3)
1438 for (i = 0; forw_exec->outgoings && i < forw_exec->outgoings->rnum; i++)
1439 {
1440 const int outgoing_idx = *(int*)ccv_array_get(forw_exec->outgoings, i)((void*)(((char*)((forw_exec->outgoings)->data)) + (size_t
)(forw_exec->outgoings)->rsize * (size_t)(i)))
;
1441 ccv_nnc_graph_backward_info_t* const outgoing_node = backward_info + outgoing_idx;
1442 // If this is a f_wrt node. Concatenate.
1443 if (!(outgoing_node->f_wrt & 0x4) && (outgoing_node->f_wrt & 0x3) == 0x3)
1444 ccv_nnc_graph_exec_symbol_concat(graph, autograd_execs[outgoing_idx].symbol, noop);
1445 outgoing_node->f_wrt |= 0x4;
1446 }
1447 } ccv_nnc_graph_visit_endfor} }
1448 // Going backward to find whether this breakpoint is a destination node for some f_wrt_nodes.
1449 ccv_nnc_graph_visit_for(backward_visit, backward_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (backward_visit)->size; _i_
++) { const int idx __attribute__((unused)) = (backward_visit
)->node[_i_].index; const int _node_unused_ __attribute__(
(unused)) = (backward_visit)->node[_i_].term; typeof ((backward_info
)) const node __attribute__((unused)) = (backward_info) + idx
;
{
1450 if ((node->f_wrt & 0x4) && (node->f_wrt & 0x3) != 0x3)
1451 for (i = 0; node->outgoings && i < node->outgoings->rnum; i++)
1452 {
1453 const int outgoing_idx = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
1454 ccv_nnc_graph_backward_info_t* const outgoing_node = backward_info + outgoing_idx;
1455 // If this is a f_wrt node. Concatenate.
1456 if (!(outgoing_node->f_wrt & 0x4) && (outgoing_node->f_wrt & 0x3) == 0x3)
1457 ccv_nnc_graph_exec_symbol_concat(graph, noop, autograd_execs[outgoing_idx].symbol);
1458 outgoing_node->f_wrt |= 0x4;
1459 }
1460 } ccv_nnc_graph_visit_endfor} }
1461}
1462
1463static ccv_nnc_autograd_tensor_symbol_t* _ccv_nnc_autograd_tensor_symbol_from_tensor_version(ccv_array_t* const autograd_tensor_symbols, const ccv_nnc_autograd_tensor_version_t* const tensor_ver)
1464{
1465 assert(tensor_ver->ref_version)((void) sizeof ((tensor_ver->ref_version) ? 1 : 0), __extension__
({ if (tensor_ver->ref_version) ; else __assert_fail ("tensor_ver->ref_version"
, "ccv_nnc_symbolic_graph_backward.c", 1465, __extension__ __PRETTY_FUNCTION__
); }))
;
1466 const ccv_nnc_tensor_ref_t* const tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, tensor_ver->c)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(tensor_ver
->c)))
;
1467 return (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref->
d)))
;
1468}
1469
1470static void _ccv_nnc_symbolic_graph_set_backward_carry_overs(const ccv_nnc_symbolic_graph_backward_prep_t* const backward_prep, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, ccv_nnc_symbolic_graph_t* const graph)
1471{
1472 int i;
1473 for (i = 0; i < backward_prep->graph->tensor_symbol_info->rnum; i++)
1474 {
1475 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = backward_prep->tensor_symbol_info + i;
1476 if (tensor_symbol_info->assign_ref)
1477 {
1478 const int assign_ref = tensor_symbol_info->assign_ref - 1;
1479 ccv_nnc_autograd_tensor_symbol_t* const destination_autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(backward_prep->autograd_tensor_symbols, backward_prep->autograd_tensor_versions + assign_ref);
1480 ccv_nnc_autograd_tensor_symbol_t* const source_autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(backward_prep->autograd_tensor_symbols, backward_prep->autograd_tensor_versions + i);
1481 ccv_nnc_symbolic_graph_set_carry_overs(graph, (ccv_nnc_tensor_symbol_map_t []){
1482 { .source = source_autograd_symbol->symbol, .destination = destination_autograd_symbol->symbol }
1483 }, 1);
1484 }
1485 }
1486 for (i = 0; i < wrt_symbol_size; i++)
1487 {
1488 const int d = wrt_symbols[i].d;
1489 if (d < 0)
1490 continue;
1491 const int ref_d = (!backward_prep->tensor_symbol_info[d].alias_ref) ? d : backward_prep->tensor_symbol_info[d].alias_ref - 1;
1492 const ccv_nnc_autograd_tensor_version_t* const tensor_ver = backward_prep->autograd_tensor_versions + ref_d;
1493 const int init_ref_ver = _ccv_nnc_tensor_ref_version_find_init(tensor_ver);
1494 if (init_ref_ver >= 0)
1495 {
1496 const int init_d = ((ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, init_ref_ver)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(init_ref_ver
)))
)->d;
1497 ccv_nnc_autograd_tensor_symbol_t* const destination_autograd_symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(backward_prep->autograd_tensor_symbols, init_d)((void*)(((char*)((backward_prep->autograd_tensor_symbols)
->data)) + (size_t)(backward_prep->autograd_tensor_symbols
)->rsize * (size_t)(init_d)))
;
1498 ccv_nnc_autograd_tensor_symbol_t* const source_autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(backward_prep->autograd_tensor_symbols, backward_prep->autograd_tensor_versions + ref_d);
1499 ccv_nnc_symbolic_graph_set_carry_overs(graph, (ccv_nnc_tensor_symbol_map_t []){
1500 { .source = source_autograd_symbol->symbol, .destination = destination_autograd_symbol->symbol }
1501 }, 1);
1502 }
1503 }
1504}
1505
1506static void _ccv_nnc_symbolic_graph_add_init_zeros(const ccv_nnc_symbolic_graph_backward_prep_t* const sub_prep, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_t* const sub_graph, ccv_array_t* const symbols)
1507{
1508 int i;
1509 for (i = 0; i < wrt_symbol_size; i++)
1510 {
1511 const int d = wrt_symbols[i].d;
1512 if (d < 0)
1513 continue;
1514 const int ref_d = (!sub_prep->tensor_symbol_info[d].alias_ref) ? d : sub_prep->tensor_symbol_info[d].alias_ref - 1;
1515 const ccv_nnc_autograd_tensor_version_t* const tensor_ver = sub_prep->autograd_tensor_versions + ref_d;
1516 const int init_ref_ver = _ccv_nnc_tensor_ref_version_find_init(tensor_ver);
1517 if (init_ref_ver >= 0)
1518 {
1519 // Need de-dup logic.
1520 const int init_d = ((ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, init_ref_ver)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(init_ref_ver
)))
)->d;
1521 ccv_nnc_autograd_tensor_symbol_t* const init_autograd_symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(sub_prep->autograd_tensor_symbols, init_d)((void*)(((char*)((sub_prep->autograd_tensor_symbols)->
data)) + (size_t)(sub_prep->autograd_tensor_symbols)->rsize
* (size_t)(init_d)))
;
1522 const ccv_nnc_tensor_symbol_info_t* const sub_init_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, init_autograd_symbol->symbol.d)((void*)(((char*)((sub_graph->tensor_symbol_info)->data
)) + (size_t)(sub_graph->tensor_symbol_info)->rsize * (
size_t)(init_autograd_symbol->symbol.d)))
;
1523 // If it doesn't have a parent ref yet, create one.
1524 if (!sub_init_symbol_info->p_ref)
1525 {
1526 ccv_nnc_tensor_symbol_t new_symbol = ccv_nnc_tensor_symbol_new(graph, sub_prep->tensor_symbol_info[ref_d].info, 0);
1527 ccv_nnc_tensor_symbol_set_flags(graph, new_symbol, CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS);
1528 ccv_array_push(symbols, &new_symbol);
1529 ccv_nnc_tensor_symbol_hookup(graph, sub_graph, new_symbol, init_autograd_symbol->symbol);
1530 }
1531 }
1532 }
1533}
1534
1535static void _ccv_nnc_symbolic_graph_add_tape_vars(const ccv_nnc_symbolic_graph_backward_prep_t* const sub_prep, ccv_nnc_symbolic_graph_t* const root, ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_t* const sub_graph, ccv_array_t* const symbols)
1536{
1537 int i;
1538 for (i = 0; i < sub_graph->tensor_symbol_info->rnum; i++)
1539 {
1540 const ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, i)((void*)(((char*)((sub_graph->tensor_symbol_info)->data
)) + (size_t)(sub_graph->tensor_symbol_info)->rsize * (
size_t)(i)))
;
1541 if ((symbol_info->flags & CCV_NNC_TENSOR_SYMBOL_TAPE_VAR) && symbol_info->pair_ref)
1542 {
1543 const int pair_ref = symbol_info->pair_ref - 1;
1544 const ccv_nnc_tensor_symbol_t root_symbol = ccv_nnc_tensor_symbol_resolve(root, (ccv_nnc_tensor_symbol_t){
1545 .d = pair_ref,
1546 .graph = sub_prep->graph,
1547 });
1548 if (root_symbol.d >= 0)
1549 {
1550 ccv_nnc_tensor_symbol_hookup(root, sub_graph, root_symbol, (ccv_nnc_tensor_symbol_t){
1551 .d = i,
1552 .graph = sub_graph,
1553 });
1554 if (symbols)
1555 {
1556 const ccv_nnc_tensor_symbol_t p_symbol = ccv_nnc_tensor_symbol_resolve(graph, (ccv_nnc_tensor_symbol_t){
1557 .d = i,
1558 .graph = sub_graph,
1559 });
1560 ccv_array_push(symbols, &p_symbol);
1561 }
1562 }
1563 }
1564 }
1565}
1566
1567static void _ccv_nnc_symbolic_graph_backward_gen(const ccv_nnc_symbolic_graph_backward_prep_t* const backward_prep, const ccv_nnc_tensor_symbol_t* const f_symbols, const int f_symbol_size, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_t* const root)
1568{
1569 assert(graph == backward_prep->graph || graph->pair == backward_prep->graph)((void) sizeof ((graph == backward_prep->graph || graph->
pair == backward_prep->graph) ? 1 : 0), __extension__ ({ if
(graph == backward_prep->graph || graph->pair == backward_prep
->graph) ; else __assert_fail ("graph == backward_prep->graph || graph->pair == backward_prep->graph"
, "ccv_nnc_symbolic_graph_backward.c", 1569, __extension__ __PRETTY_FUNCTION__
); }))
;
1
Assuming 'graph' is not equal to field 'graph'
2
Assuming field 'pair' is equal to field 'graph'
3
Taking true branch
1570 const int exec_symbol_info_size = backward_prep->exec_symbol_info_size;
1571 const int tensor_symbol_info_size = backward_prep->tensor_symbol_info_size;
1572 const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = backward_prep->exec_symbol_info;
1573 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = backward_prep->tensor_symbol_info;
1574 int i, j, k, p;
1575 ccv_array_t* const autograd_tensor_symbols = backward_prep->autograd_tensor_symbols;
1576 // Generate required symbols based on the information gathered above.
1577 for (i = 0; i < autograd_tensor_symbols->rnum; i++)
4
Assuming 'i' is >= field 'rnum'
5
Loop condition is false. Execution continues on line 1596
1578 {
1579 ccv_nnc_autograd_tensor_symbol_t* symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, i)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(i)))
;
1580 assert(symbol->d >= 0)((void) sizeof ((symbol->d >= 0) ? 1 : 0), __extension__
({ if (symbol->d >= 0) ; else __assert_fail ("symbol->d >= 0"
, "ccv_nnc_symbolic_graph_backward.c", 1580, __extension__ __PRETTY_FUNCTION__
); }))
;
1581 assert(symbol->d < tensor_symbol_info_size)((void) sizeof ((symbol->d < tensor_symbol_info_size) ?
1 : 0), __extension__ ({ if (symbol->d < tensor_symbol_info_size
) ; else __assert_fail ("symbol->d < tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_backward.c", 1581, __extension__ __PRETTY_FUNCTION__
); }))
;
1582 const ccv_nnc_tensor_symbol_info_t* const forw_symbol = tensor_symbol_info + symbol->d;
1583 if (!symbol->alias_ref)
1584 {
1585 assert(!forw_symbol->alias_ref)((void) sizeof ((!forw_symbol->alias_ref) ? 1 : 0), __extension__
({ if (!forw_symbol->alias_ref) ; else __assert_fail ("!forw_symbol->alias_ref"
, "ccv_nnc_symbolic_graph_backward.c", 1585, __extension__ __PRETTY_FUNCTION__
); }))
;
1586 symbol->symbol = ccv_nnc_tensor_symbol_new(graph, forw_symbol->info, 0);
1587 ccv_nnc_tensor_symbol_set_flags(graph, symbol->symbol, symbol->flags);
1588 } else {
1589 assert(forw_symbol->alias_ref)((void) sizeof ((forw_symbol->alias_ref) ? 1 : 0), __extension__
({ if (forw_symbol->alias_ref) ; else __assert_fail ("forw_symbol->alias_ref"
, "ccv_nnc_symbolic_graph_backward.c", 1589, __extension__ __PRETTY_FUNCTION__
); }))
;
1590 assert(symbol->flags == 0)((void) sizeof ((symbol->flags == 0) ? 1 : 0), __extension__
({ if (symbol->flags == 0) ; else __assert_fail ("symbol->flags == 0"
, "ccv_nnc_symbolic_graph_backward.c", 1590, __extension__ __PRETTY_FUNCTION__
); }))
; // We don't set flags on alias.
1591 // Due to our generation order, this must be after the original symbol is created.
1592 ccv_nnc_autograd_tensor_symbol_t* ref = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, symbol->alias_ref - 1)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(symbol->alias_ref
- 1)))
;
1593 symbol->symbol = ccv_nnc_tensor_symbol_alias_new(graph, ref->symbol, forw_symbol->ofs, forw_symbol->inc, forw_symbol->info, 0);
1594 }
1595 }
1596 ccv_nnc_graph_backward_info_t* const backward_info = backward_prep->backward_info;
1597 ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs = backward_prep->autograd_execs;
1598 ccv_array_t* symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
1599 ccv_array_t* symbol_map = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_map_t), 0, 0);
1600 ccv_array_t* sub_f_symbols = 0;
1601 ccv_array_t* sub_wrt_symbols = 0;
1602 ccv_array_t* sub_execs = 0;
1603 for (i = 0; i < exec_symbol_info_size; i++)
6
Assuming 'i' is < 'exec_symbol_info_size'
7
Loop condition is true. Entering loop body
1604 {
1605 // This is not going to be an interesting node. Skip.
1606 if ((backward_info[i].f_wrt & 0x3) != 0x3)
8
Assuming the condition is false
9
Taking false branch
1607 continue;
1608 ccv_nnc_graph_backward_info_t* const back_info = backward_info + i;
1609 ccv_nnc_autograd_graph_exec_symbol_t* const back_exec = autograd_execs + i;
1610 if (back_exec->cmd.cmd == CCV_NNC_NOOP)
10
Assuming field 'cmd' is not equal to CCV_NNC_NOOP
11
Taking false branch
1611 {
1612 back_exec->symbol = ccv_nnc_graph_exec_symbol_new(graph, back_exec->cmd, 0, 0, 0, 0, 0);
1613 continue;
1614 }
1615 const ccv_nnc_graph_exec_symbol_info_t* const forw_exec = exec_symbol_info + i;
1616 if (forw_exec->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
12
Assuming the condition is true
13
Taking true branch
1617 {
1618 ccv_array_clear(symbols);
1619 const int graph_ref = CCV_NNC_GRAPH_REF(forw_exec)((forw_exec)->_heap_graph_ref ? (forw_exec)->_heap_graph_ref
: (forw_exec)->_inline_graph_ref)
[0] - 1;
14
Assuming field '_heap_graph_ref' is null
15
'?' condition is false
1620 ccv_nnc_symbolic_graph_backward_prep_t* sub_prep = backward_prep->sub_preps + graph_ref;
1621 ccv_nnc_symbolic_graph_t* sub_graph = ccv_nnc_symbolic_graph_new();
1622 sub_graph->pair = sub_prep->graph;
1623 if (!sub_wrt_symbols
15.1
'sub_wrt_symbols' is null
)
16
Taking true branch
1624 sub_wrt_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
1625 // I am done, need to redo above for sub_prep, and it has to be successful now.
1626 if (!sub_f_symbols
16.1
'sub_f_symbols' is null
)
17
Taking true branch
1627 sub_f_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
1628 _ccv_nnc_symbolic_graph_backward_prep_sub_f_wrt_symbols(forw_exec, sub_prep->graph, graph_ref, tensor_symbol_info, back_info->input_bitmasks, back_info->output_bitmasks, sub_f_symbols, sub_wrt_symbols);
18
Calling '_ccv_nnc_symbolic_graph_backward_prep_sub_f_wrt_symbols'
1629 _ccv_nnc_symbolic_graph_backward_gen(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, 0)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols
)->rsize * (size_t)(0)))
, sub_f_symbols->rnum, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols
)->rsize * (size_t)(0)))
, sub_wrt_symbols->rnum, sub_graph, root);
1630 back_exec->symbol = ccv_nnc_symbolic_graph_while(graph, back_exec->cmd.cmd, sub_graph, forw_exec->name);
1631 if (!sub_execs)
1632 sub_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
1633 ccv_array_clear(sub_execs);
1634 // Find the breakpoints in forward graph, creating the reverse one.
1635 for (j = 0; j < sub_prep->graph->breakpoint_size; j++)
1636 {
1637 const int d = sub_prep->graph->breakpoints[j].d;
1638 if (sub_prep->autograd_execs[d].symbol.graph)
1639 ccv_array_push(sub_execs, &sub_prep->autograd_execs[d].symbol);
1640 else
1641 _ccv_nnc_add_backward_breakpoint_for_symbol(sub_prep, sub_prep->graph->breakpoints[j], sub_graph, sub_execs);
1642 }
1643 ccv_nnc_symbolic_graph_set_while_expr(sub_graph, NOOP_GRAPH_WHILE_EXPR(ccv_nnc_graph_while_f)(1), 0, 0, 0, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sub_execs, 0)((void*)(((char*)((sub_execs)->data)) + (size_t)(sub_execs
)->rsize * (size_t)(0)))
, sub_execs->rnum);
1644 ccv_nnc_graph_exec_symbol_autogen(sub_graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1645 _ccv_nnc_symbolic_graph_set_backward_carry_overs(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols
)->rsize * (size_t)(0)))
, sub_wrt_symbols->rnum, sub_graph);
1646 for (j = 0; j < back_exec->input_size; j++)
1647 if (back_info->input_bitmasks[j >> 6] & ((uint64_t)1 << j))
1648 ccv_array_push(symbols, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->inputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(back_exec->
inputs[j])))
)->symbol));
1649 // Find whether in the wrt symbols, anything we need to init to zero, if there are, these need to be inputs here too.
1650 _ccv_nnc_symbolic_graph_add_init_zeros(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols
)->rsize * (size_t)(0)))
, sub_wrt_symbols->rnum, graph, sub_graph, symbols);
1651 _ccv_nnc_symbolic_graph_add_tape_vars(sub_prep, root, graph, sub_graph, symbols);
1652 // input_size at this point, may be different from the back_exec->input_size, the reason is because we may added zeroing tensors as input tensors.
1653 const int input_size = symbols->rnum;
1654 for (j = 0; j < back_exec->output_size; j++)
1655 if (back_info->output_bitmasks[j >> 6] & ((uint64_t)1 << j))
1656 ccv_array_push(symbols, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->outputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(back_exec->
outputs[j])))
)->symbol));
1657 const int output_size = symbols->rnum - input_size;
1658 const int p_idx = sub_prep->graph->p_idx - 1;
1659 assert(back_exec->input_size == forw_exec->output_size)((void) sizeof ((back_exec->input_size == forw_exec->output_size
) ? 1 : 0), __extension__ ({ if (back_exec->input_size == forw_exec
->output_size) ; else __assert_fail ("back_exec->input_size == forw_exec->output_size"
, "ccv_nnc_symbolic_graph_backward.c", 1659, __extension__ __PRETTY_FUNCTION__
); }))
;
1660 k = 0;
1661 for (j = 0; j < back_exec->input_size; j++)
1662 if (back_info->input_bitmasks[j >> 6] & ((uint64_t)1 << j))
1663 {
1664 const ccv_nnc_tensor_symbol_info_t* const info = tensor_symbol_info + forw_exec->outputs[j];
1665 const int s_idx = *(int*)ccv_array_get(info->s_ref, p_idx)((void*)(((char*)((info->s_ref)->data)) + (size_t)(info
->s_ref)->rsize * (size_t)(p_idx)))
- 1;
1666 assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if
(s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_backward.c"
, 1666, __extension__ __PRETTY_FUNCTION__); }))
;
1667 const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(sub_prep->autograd_tensor_symbols, sub_prep->autograd_tensor_versions + s_idx);
1668 ccv_nnc_tensor_symbol_hookup(graph, sub_graph, *(ccv_nnc_tensor_symbol_t*)ccv_array_get(symbols, k)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)->
rsize * (size_t)(k)))
, autograd_symbol->symbol);
1669 ++k;
1670 }
1671 k = input_size; // Reset k, the symbol pass already set up by add_init_zeros.
1672 assert(back_exec->output_size == forw_exec->input_size)((void) sizeof ((back_exec->output_size == forw_exec->input_size
) ? 1 : 0), __extension__ ({ if (back_exec->output_size ==
forw_exec->input_size) ; else __assert_fail ("back_exec->output_size == forw_exec->input_size"
, "ccv_nnc_symbolic_graph_backward.c", 1672, __extension__ __PRETTY_FUNCTION__
); }))
;
1673 for (j = 0; j < back_exec->output_size; j++)
1674 if (back_info->output_bitmasks[j >> 6] & ((uint64_t)1 << j))
1675 {
1676 const ccv_nnc_tensor_symbol_info_t* const info = tensor_symbol_info + forw_exec->inputs[j];
1677 const int s_idx = *(int*)ccv_array_get(info->s_ref, p_idx)((void*)(((char*)((info->s_ref)->data)) + (size_t)(info
->s_ref)->rsize * (size_t)(p_idx)))
- 1;
1678 assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if
(s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_backward.c"
, 1678, __extension__ __PRETTY_FUNCTION__); }))
;
1679 const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(sub_prep->autograd_tensor_symbols, sub_prep->autograd_tensor_versions + s_idx);
1680 ccv_nnc_tensor_symbol_hookup(graph, sub_graph, *(ccv_nnc_tensor_symbol_t*)ccv_array_get(symbols, k)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)->
rsize * (size_t)(k)))
, autograd_symbol->symbol);
1681 ++k;
1682 }
1683 ccv_nnc_graph_exec_symbol_set_io(graph, back_exec->symbol, ccv_array_get(symbols, 0)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)->
rsize * (size_t)(0)))
, input_size, ccv_array_get(symbols, input_size)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)->
rsize * (size_t)(input_size)))
, output_size);
1684 } else if (forw_exec->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) {
1685 ccv_array_clear(symbol_map);
1686 for (j = 0; j < back_exec->output_size; j++)
1687 if (back_info->output_bitmasks[j >> 6] & ((uint64_t)1 << j))
1688 {
1689 ccv_nnc_tensor_symbol_map_t symbol = {
1690 .source = ((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->inputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(back_exec->
inputs[j])))
)->symbol,
1691 .destination = ((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->outputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(back_exec->
outputs[j])))
)->symbol,
1692 };
1693 ccv_array_push(symbol_map, &symbol);
1694 }
1695 const int symbol_map_size = symbol_map->rnum;
1696 back_exec->symbol = ccv_nnc_symbolic_graph_case_of_new(graph, back_exec->cmd.cmd, 0, 0, ccv_array_get(symbol_map, 0)((void*)(((char*)((symbol_map)->data)) + (size_t)(symbol_map
)->rsize * (size_t)(0)))
, symbol_map_size, forw_exec->name);
1697 ccv_nnc_symbolic_graph_set_case_of_expr(graph, back_exec->symbol, NOOP_GRAPH_CASE_OF_EXPR(ccv_nnc_graph_case_of_f)(1), 0);
1698 for (p = 0; p < forw_exec->graph_ref_size; p++)
1699 {
1700 const int graph_ref = CCV_NNC_GRAPH_REF(forw_exec)((forw_exec)->_heap_graph_ref ? (forw_exec)->_heap_graph_ref
: (forw_exec)->_inline_graph_ref)
[p] - 1;
1701 ccv_nnc_symbolic_graph_backward_prep_t* sub_prep = backward_prep->sub_preps + graph_ref;
1702 ccv_nnc_symbolic_graph_t* sub_graph = ccv_nnc_symbolic_graph_new();
1703 sub_graph->pair = sub_prep->graph;
1704 if (!sub_wrt_symbols)
1705 sub_wrt_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
1706 // I am done, need to redo above for sub_prep, and it has to be successful now.
1707 if (!sub_f_symbols)
1708 sub_f_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
1709 _ccv_nnc_symbolic_graph_backward_prep_sub_f_wrt_symbols(forw_exec, sub_prep->graph, graph_ref, tensor_symbol_info, back_info->input_bitmasks, back_info->output_bitmasks, sub_f_symbols, sub_wrt_symbols);
1710 _ccv_nnc_symbolic_graph_backward_gen(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, 0)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols
)->rsize * (size_t)(0)))
, sub_f_symbols->rnum, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols
)->rsize * (size_t)(0)))
, sub_wrt_symbols->rnum, sub_graph, root);
1711 ccv_array_clear(symbol_map);
1712 k = 0;
1713 for (j = 0; j < back_exec->output_size; j++)
1714 if (back_info->output_bitmasks[j >> 6] & ((uint64_t)1 << j))
1715 {
1716 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, k)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols
)->rsize * (size_t)(k)))
)->d;
1717 if (d >= 0)
1718 {
1719 const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(sub_prep->autograd_tensor_symbols, sub_prep->autograd_tensor_versions + d);
1720 ccv_nnc_tensor_symbol_map_t symbol = {
1721 .source = autograd_symbol->symbol,
1722 .destination = ((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->outputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(back_exec->
outputs[j])))
)->symbol,
1723 };
1724 ccv_array_push(symbol_map, &symbol);
1725 } else {
1726 // Create a new tensor in sub-graph and set it to be 0.
1727 const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->outputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(back_exec->
outputs[j])))
;
1728 // autograd_symbol->d points to the corresponding forward tensor.
1729 ccv_nnc_tensor_symbol_t zero_symbol = ccv_nnc_tensor_symbol_new(sub_graph, tensor_symbol_info[autograd_symbol->d].info, 0);
1730 ccv_nnc_graph_exec_symbol_new(sub_graph, CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={0,}}}, 0)
, 0, 0, &zero_symbol, 1, 0);
1731 ccv_nnc_tensor_symbol_map_t symbol = {
1732 .source = zero_symbol,
1733 .destination = autograd_symbol->symbol,
1734 };
1735 ccv_array_push(symbol_map, &symbol);
1736 }
1737 ++k;
1738 }
1739 ccv_nnc_graph_exec_symbol_autogen(sub_graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
1740 const int symbol_map_size = symbol_map->rnum;
1741 ccv_nnc_symbolic_graph_set_case_of(graph, back_exec->symbol, sub_graph, p, ccv_array_get(symbol_map, 0)((void*)(((char*)((symbol_map)->data)) + (size_t)(symbol_map
)->rsize * (size_t)(0)))
, symbol_map_size);
1742 // Hookup input only after this becomes a sub graph of the graph.
1743 k = 0;
1744 for (j = 0; j < back_exec->input_size; j++)
1745 if (back_info->input_bitmasks[j >> 6] & ((uint64_t)1 << j))
1746 {
1747 const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, k)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols
)->rsize * (size_t)(k)))
)->d;
1748 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_nnc_symbolic_graph_backward.c"
, 1748, __extension__ __PRETTY_FUNCTION__); }))
;
1749 // No corresponding sub tensors allocated. Skip.
1750 if (!sub_prep->autograd_tensor_versions[d].ref_version ||
1751 !sub_prep->autograd_tensor_versions[d].ref_version->rnum)
1752 continue;
1753 const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(sub_prep->autograd_tensor_symbols, sub_prep->autograd_tensor_versions + d);
1754 ccv_nnc_tensor_symbol_hookup(graph, sub_graph, ((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->inputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(back_exec->
inputs[j])))
)->symbol, autograd_symbol->symbol);
1755 ++k;
1756 }
1757 // Need to make sure tape vars are hooked up.
1758 _ccv_nnc_symbolic_graph_add_tape_vars(sub_prep, root, graph, sub_graph, 0);
1759 }
1760 } else {
1761 ccv_array_clear(symbols);
1762 // Gradient inputs.
1763 for (j = 0; j < back_exec->input_size; j++)
1764 if (back_info->input_bitmasks[j >> 6] & ((uint64_t)1 << j))
1765 ccv_array_push(symbols, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->inputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(back_exec->
inputs[j])))
)->symbol));
1766 else
1767 ccv_array_push(symbols, &NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
1768 // Inputs from forward function.
1769 for (j = 0; j < forw_exec->input_size; j++)
1770 if (!(back_info->input_bitmasks[(j + back_exec->input_size) >> 6] & ((uint64_t)1 << (j + back_exec->input_size))))
1771 ccv_array_push(symbols, &NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
1772 else {
1773 const ccv_nnc_tensor_symbol_t symbol = {
1774 .d = forw_exec->inputs[j],
1775 .graph = backward_prep->graph
1776 };
1777 if (graph == backward_prep->graph)
1778 ccv_array_push(symbols, &symbol);
1779 else { // Otherwise, create a new symbol, and set its pair to the old symbol.
1780 const ccv_nnc_tensor_symbol_t new_symbol = ccv_nnc_tensor_symbol_new(graph, tensor_symbol_info[forw_exec->inputs[j]].info, tensor_symbol_info[forw_exec->inputs[j]].name);
1781 ccv_nnc_tensor_symbol_pair_with(graph, new_symbol, symbol);
1782 const int flags = ccv_nnc_tensor_symbol_flags(backward_prep->graph, symbol) | CCV_NNC_TENSOR_SYMBOL_TAPE_VAR;
1783 ccv_nnc_tensor_symbol_set_flags(graph, new_symbol, flags);
1784 ccv_nnc_tensor_symbol_set_flags(backward_prep->graph, symbol, flags);
1785 ccv_array_push(symbols, &new_symbol);
1786 }
1787 }
1788 // Outputs from forward function.
1789 for (j = 0; j < forw_exec->output_size; j++)
1790 if (!(back_info->input_bitmasks[(j + back_exec->input_size + forw_exec->input_size) >> 6] & ((uint64_t)1 << (j + back_exec->input_size + forw_exec->input_size))))
1791 ccv_array_push(symbols, &NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
1792 else {
1793 const ccv_nnc_tensor_symbol_t symbol = {
1794 .d = forw_exec->outputs[j],
1795 .graph = backward_prep->graph
1796 };
1797 if (graph == backward_prep->graph)
1798 ccv_array_push(symbols, &symbol);
1799 else { // Otherwise, create a new symbol, and set its pair to the old symbol.
1800 const ccv_nnc_tensor_symbol_t new_symbol = ccv_nnc_tensor_symbol_new(graph, tensor_symbol_info[forw_exec->outputs[j]].info, tensor_symbol_info[forw_exec->outputs[j]].name);
1801 ccv_nnc_tensor_symbol_pair_with(graph, new_symbol, symbol);
1802 const int flags = ccv_nnc_tensor_symbol_flags(backward_prep->graph, symbol) | CCV_NNC_TENSOR_SYMBOL_TAPE_VAR;
1803 ccv_nnc_tensor_symbol_set_flags(graph, new_symbol, flags);
1804 ccv_nnc_tensor_symbol_set_flags(backward_prep->graph, symbol, flags);
1805 ccv_array_push(symbols, &new_symbol);
1806 }
1807 }
1808 for (j = 0; j < back_exec->output_size; j++)
1809 if (back_info->output_bitmasks[j >> 6] & ((uint64_t)1 << j))
1810 ccv_array_push(symbols, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->outputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(back_exec->
outputs[j])))
)->symbol));
1811 else
1812 ccv_array_push(symbols, &NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
);
1813 back_exec->symbol = ccv_nnc_graph_exec_symbol_new(graph, back_exec->cmd, ccv_array_get(symbols, 0)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)->
rsize * (size_t)(0)))
, back_exec->input_size + forw_exec->input_size + forw_exec->output_size, ccv_array_get(symbols, back_exec->input_size + forw_exec->input_size + forw_exec->output_size)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)->
rsize * (size_t)(back_exec->input_size + forw_exec->input_size
+ forw_exec->output_size)))
, back_exec->output_size, 0);
1814 ccv_nnc_graph_exec_symbol_set_hint(graph, back_exec->symbol, exec_symbol_info[i].hint);
1815 ccv_nnc_graph_exec_symbol_pair_with(graph, back_exec->symbol, (ccv_nnc_graph_exec_symbol_t){
1816 .d = i,
1817 .graph = backward_prep->graph,
1818 });
1819 }
1820 }
1821 if (sub_f_symbols)
1822 ccv_array_free(sub_f_symbols);
1823 if (sub_wrt_symbols)
1824 ccv_array_free(sub_wrt_symbols);
1825 if (sub_execs)
1826 ccv_array_free(sub_execs);
1827 ccv_array_t* const sum_or_set_execs = backward_prep->sum_or_set_execs;
1828 for (i = 0; i < sum_or_set_execs->rnum; i++)
1829 {
1830 ccv_nnc_sum_or_set_graph_exec_symbol_t* sum_or_set_exec = (ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, i)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs
)->rsize * (size_t)(i)))
;
1831 // It is sum, set don't have inputs.
1832 if (sum_or_set_exec->input_size)
1833 {
1834 ccv_array_clear(symbols);
1835 // This is to sum.
1836 for (j = 0; j < sum_or_set_exec->input_size; j++)
1837 ccv_array_push(symbols, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, sum_or_set_exec->inputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(sum_or_set_exec
->inputs[j])))
)->symbol));
1838 ccv_nnc_cmd_t cmd = ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, CMD_GENERIC()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}), 0);
1839 sum_or_set_exec->symbol = ccv_nnc_graph_exec_symbol_new(graph, cmd, ccv_array_get(symbols, 0)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)->
rsize * (size_t)(0)))
, sum_or_set_exec->input_size, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, sum_or_set_exec->output)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(sum_or_set_exec
->output)))
)->symbol), 1, 0);
1840 } else
1841 sum_or_set_exec->symbol = ccv_nnc_graph_exec_symbol_new(graph, CMD_SET_FORWARD(sum_or_set_exec->value)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size
={.dim={1,1,1}},.blas={.a={sum_or_set_exec->value,}}}, 0)
, 0, 0, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, sum_or_set_exec->output)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(sum_or_set_exec
->output)))
)->symbol), 1, 0);
1842 }
1843 ccv_array_free(symbol_map);
1844 ccv_array_free(symbols);
1845 for (i = 0; i < exec_symbol_info_size; i++)
1846 {
1847 // This is not going to be an interesting node. Skip.
1848 if ((backward_info[i].f_wrt & 0x3) != 0x3)
1849 continue;
1850 ccv_nnc_autograd_graph_exec_symbol_t* const back_exec = autograd_execs + i;
1851 // If on the same graph, we cannot decide whether it is before or after the forw_exec, enforcing it is after forw_exec.
1852 if (graph == backward_prep->graph)
1853 ccv_nnc_graph_exec_symbol_concat(graph, (ccv_nnc_graph_exec_symbol_t){
1854 .d = i,
1855 .graph = graph
1856 }, back_exec->symbol);
1857 if (back_exec->outgoings)
1858 for (j = 0; j < back_exec->outgoings->rnum; j++)
1859 {
1860 int d = *(int*)ccv_array_get(back_exec->outgoings, j)((void*)(((char*)((back_exec->outgoings)->data)) + (size_t
)(back_exec->outgoings)->rsize * (size_t)(j)))
;
1861 if (d < exec_symbol_info_size)
1862 ccv_nnc_graph_exec_symbol_concat(graph, back_exec->symbol, autograd_execs[d].symbol);
1863 else
1864 ccv_nnc_graph_exec_symbol_concat(graph, back_exec->symbol, ((ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, d - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs
)->rsize * (size_t)(d - exec_symbol_info_size)))
)->symbol);
1865 }
1866 }
1867 for (i = 0; i < sum_or_set_execs->rnum; i++)
1868 {
1869 ccv_nnc_sum_or_set_graph_exec_symbol_t* exec = (ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, i)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs
)->rsize * (size_t)(i)))
;
1870 if (exec->outgoings)
1871 for (j = 0; j < exec->outgoings->rnum; j++)
1872 {
1873 int d = *(int*)ccv_array_get(exec->outgoings, j)((void*)(((char*)((exec->outgoings)->data)) + (size_t)(
exec->outgoings)->rsize * (size_t)(j)))
;
1874 if (d < exec_symbol_info_size)
1875 ccv_nnc_graph_exec_symbol_concat(graph, exec->symbol, autograd_execs[d].symbol);
1876 else
1877 ccv_nnc_graph_exec_symbol_concat(graph, exec->symbol, ((ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, d - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs
)->rsize * (size_t)(d - exec_symbol_info_size)))
)->symbol);
1878 }
1879 }
1880 // Now, everything is done, set the metadata on graph so that we can lookup later for backward symbols
1881 if (graph->backward.tensor_symbol_idx)
1882 graph->backward.tensor_symbol_idx = (int*)ccreallocrealloc(graph->backward.tensor_symbol_idx, sizeof(int) * (graph->tensor_symbol_info->rnum + tensor_symbol_info_size));
1883 else
1884 graph->backward.tensor_symbol_idx = (int*)ccmallocmalloc(sizeof(int) * (graph->tensor_symbol_info->rnum + tensor_symbol_info_size));
1885 graph->backward.tensor_symbol_size = tensor_symbol_info_size;
1886 graph->backward.exec_symbol_idx = graph->backward.tensor_symbol_idx + tensor_symbol_info_size;
1887 graph->backward.exec_symbol_size = graph->tensor_symbol_info->rnum;
1888 for (i = 0; i < tensor_symbol_info_size; i++)
1889 graph->backward.tensor_symbol_idx[i] = -1;
1890 for (i = 0; i < graph->backward.exec_symbol_size; i++)
1891 graph->backward.exec_symbol_idx[i] = -1;
1892 ccv_nnc_autograd_tensor_version_t* const autograd_tensor_versions = backward_prep->autograd_tensor_versions;
1893 // Assigning for wrt symbols.
1894 for (i = 0; i < wrt_symbol_size; i++)
1895 {
1896 const int d = wrt_symbols[i].d;
1897 if (d < 0)
1898 continue;
1899 assert(d < tensor_symbol_info_size)((void) sizeof ((d < tensor_symbol_info_size) ? 1 : 0), __extension__
({ if (d < tensor_symbol_info_size) ; else __assert_fail (
"d < tensor_symbol_info_size", "ccv_nnc_symbolic_graph_backward.c"
, 1899, __extension__ __PRETTY_FUNCTION__); }))
;
1900 const ccv_nnc_tensor_symbol_info_t* const forw_symbol = tensor_symbol_info + d;
1901 ccv_nnc_autograd_tensor_version_t* const tensor_ver = autograd_tensor_versions + ((!forw_symbol->alias_ref) ? d : forw_symbol->alias_ref - 1);
1902 assert(tensor_ver->ref_version)((void) sizeof ((tensor_ver->ref_version) ? 1 : 0), __extension__
({ if (tensor_ver->ref_version) ; else __assert_fail ("tensor_ver->ref_version"
, "ccv_nnc_symbolic_graph_backward.c", 1902, __extension__ __PRETTY_FUNCTION__
); }))
;
1903 ccv_nnc_tensor_ref_t* const tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, tensor_ver->c)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(tensor_ver
->c)))
;
1904 ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref->
d)))
;
1905 // If this wrt symbol is an alias, create extra alias for this.
1906 if (!forw_symbol->alias_ref)
1907 graph->backward.tensor_symbol_idx[d] = autograd_symbol->symbol.d;
1908 else // We create new alias, and this cannot be referenced from exec_symbol_idx because its size limited to previous tensor symbol size.
1909 graph->backward.tensor_symbol_idx[d] = ccv_nnc_tensor_symbol_alias_new(graph, autograd_symbol->symbol, forw_symbol->ofs, forw_symbol->inc, forw_symbol->info, 0).d;
1910 const int dd = autograd_symbol->symbol.d;
1911 const int x = tensor_ref->x;
1912 if (tensor_ref->exec_registry && tensor_ref->exec_registry->rnum) // Create no-op node.
1913 {
1914 ccv_nnc_graph_exec_symbol_t noop = ccv_nnc_graph_exec_symbol_new(graph, ccv_nnc_cmd(CCV_NNC_NOOP, 0, CMD_GENERIC()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}), 0), 0, 0, 0, 0, 0);
1915 if (x < exec_symbol_info_size)
1916 ccv_nnc_graph_exec_symbol_concat(graph, autograd_execs[x].symbol, noop);
1917 else
1918 ccv_nnc_graph_exec_symbol_concat(graph, ((ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, x - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs
)->rsize * (size_t)(x - exec_symbol_info_size)))
)->symbol, noop);
1919 for (j = 0; j < tensor_ref->exec_registry->rnum; j++)
1920 {
1921 const int x = *(int*)ccv_array_get(tensor_ref->exec_registry, j)((void*)(((char*)((tensor_ref->exec_registry)->data)) +
(size_t)(tensor_ref->exec_registry)->rsize * (size_t)(
j)))
;
1922 assert(x >= 0)((void) sizeof ((x >= 0) ? 1 : 0), __extension__ ({ if (x >=
0) ; else __assert_fail ("x >= 0", "ccv_nnc_symbolic_graph_backward.c"
, 1922, __extension__ __PRETTY_FUNCTION__); }))
; /* Otherwise, this is initialization tensor, which is impossible to be summed up by. */
1923 assert(x < exec_symbol_info_size)((void) sizeof ((x < exec_symbol_info_size) ? 1 : 0), __extension__
({ if (x < exec_symbol_info_size) ; else __assert_fail ("x < exec_symbol_info_size"
, "ccv_nnc_symbolic_graph_backward.c", 1923, __extension__ __PRETTY_FUNCTION__
); }))
; // exec_registry is only used by alias_registry, it simply cannot reference to a sum operation.
1924 ccv_nnc_graph_exec_symbol_concat(graph, autograd_execs[x].symbol, noop);
1925 }
1926 graph->backward.exec_symbol_idx[dd] = noop.d;
1927 } else {
1928 if (x < exec_symbol_info_size)
1929 graph->backward.exec_symbol_idx[dd] = autograd_execs[x].symbol.d;
1930 else
1931 graph->backward.exec_symbol_idx[dd] = ((ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, x - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs
)->rsize * (size_t)(x - exec_symbol_info_size)))
)->symbol.d;
1932 }
1933 }
1934 // Assigning for f symbols.
1935 for (i = 0; i < f_symbol_size; i++)
1936 {
1937 const int d = f_symbols[i].d;
1938 assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >=
0) ; else __assert_fail ("d >= 0", "ccv_nnc_symbolic_graph_backward.c"
, 1938, __extension__ __PRETTY_FUNCTION__); }))
;
1939 assert(d < tensor_symbol_info_size)((void) sizeof ((d < tensor_symbol_info_size) ? 1 : 0), __extension__
({ if (d < tensor_symbol_info_size) ; else __assert_fail (
"d < tensor_symbol_info_size", "ccv_nnc_symbolic_graph_backward.c"
, 1939, __extension__ __PRETTY_FUNCTION__); }))
;
1940 const ccv_nnc_autograd_tensor_version_t* const tensor_ver = autograd_tensor_versions + d;
1941 if (tensor_ver->ref_version)
1942 {
1943 // We don't use _ccv_nnc_autograd_tensor_symbol_from_tensor_version because that select the last version, but for us, we need the first version.
1944 const ccv_nnc_tensor_ref_t* const tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, 0)((void*)(((char*)((tensor_ver->ref_version)->data)) + (
size_t)(tensor_ver->ref_version)->rsize * (size_t)(0)))
;
1945 const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t
)(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref->
d)))
;
1946 graph->backward.tensor_symbol_idx[d] = autograd_symbol->symbol.d;
1947 // Cannot find relevant backward exec symbols for f, it could be many.
1948 }
1949 }
1950}
1951
1952void ccv_nnc_symbolic_graph_backward(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const f_symbols, const int f_symbol_size, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
1953{
1954 int i;
1955 // f symbols cannot be alias.
1956 for (i = 0; i < f_symbol_size; i++)
1957 {
1958 assert(f_symbols[i].graph == graph)((void) sizeof ((f_symbols[i].graph == graph) ? 1 : 0), __extension__
({ if (f_symbols[i].graph == graph) ; else __assert_fail ("f_symbols[i].graph == graph"
, "ccv_nnc_symbolic_graph_backward.c", 1958, __extension__ __PRETTY_FUNCTION__
); }))
; // f symbol has to be in the current graph.
1959 assert(!((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, f_symbols[i].d))->alias_ref)((void) sizeof ((!((ccv_nnc_tensor_symbol_info_t*)((void*)(((
char*)((graph->tensor_symbol_info)->data)) + (size_t)(graph
->tensor_symbol_info)->rsize * (size_t)(f_symbols[i].d)
)))->alias_ref) ? 1 : 0), __extension__ ({ if (!((ccv_nnc_tensor_symbol_info_t
*)((void*)(((char*)((graph->tensor_symbol_info)->data))
+ (size_t)(graph->tensor_symbol_info)->rsize * (size_t
)(f_symbols[i].d))))->alias_ref) ; else __assert_fail ("!((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, f_symbols[i].d))->alias_ref"
, "ccv_nnc_symbolic_graph_backward.c", 1959, __extension__ __PRETTY_FUNCTION__
); }))
;
1960 }
1961 for (i = 0; i < wrt_symbol_size; i++)
1962 {
1963 assert(wrt_symbols[i].graph == graph)((void) sizeof ((wrt_symbols[i].graph == graph) ? 1 : 0), __extension__
({ if (wrt_symbols[i].graph == graph) ; else __assert_fail (
"wrt_symbols[i].graph == graph", "ccv_nnc_symbolic_graph_backward.c"
, 1963, __extension__ __PRETTY_FUNCTION__); }))
;
1964 // This is not an alias, or what it refers to is not an alias.
1965 assert(!((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, wrt_symbols[i].d))->alias_ref || !((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, wrt_symbols[i].d))->alias_ref - 1))->alias_ref)((void) sizeof ((!((ccv_nnc_tensor_symbol_info_t*)((void*)(((
char*)((graph->tensor_symbol_info)->data)) + (size_t)(graph
->tensor_symbol_info)->rsize * (size_t)(wrt_symbols[i].
d))))->alias_ref || !((ccv_nnc_tensor_symbol_info_t*)((void
*)(((char*)((graph->tensor_symbol_info)->data)) + (size_t
)(graph->tensor_symbol_info)->rsize * (size_t)(((ccv_nnc_tensor_symbol_info_t
*)((void*)(((char*)((graph->tensor_symbol_info)->data))
+ (size_t)(graph->tensor_symbol_info)->rsize * (size_t
)(wrt_symbols[i].d))))->alias_ref - 1))))->alias_ref) ?
1 : 0), __extension__ ({ if (!((ccv_nnc_tensor_symbol_info_t
*)((void*)(((char*)((graph->tensor_symbol_info)->data))
+ (size_t)(graph->tensor_symbol_info)->rsize * (size_t
)(wrt_symbols[i].d))))->alias_ref || !((ccv_nnc_tensor_symbol_info_t
*)((void*)(((char*)((graph->tensor_symbol_info)->data))
+ (size_t)(graph->tensor_symbol_info)->rsize * (size_t
)(((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((graph->
tensor_symbol_info)->data)) + (size_t)(graph->tensor_symbol_info
)->rsize * (size_t)(wrt_symbols[i].d))))->alias_ref - 1
))))->alias_ref) ; else __assert_fail ("!((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, wrt_symbols[i].d))->alias_ref || !((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, wrt_symbols[i].d))->alias_ref - 1))->alias_ref"
, "ccv_nnc_symbolic_graph_backward.c", 1965, __extension__ __PRETTY_FUNCTION__
); }))
;
1966 }
1967 const int exec_symbol_info_size = graph->exec_symbol_info->rnum;
1968 const int tensor_symbol_info_size = graph->tensor_symbol_info->rnum;
1969 assert(exec_symbol_info_size > 0)((void) sizeof ((exec_symbol_info_size > 0) ? 1 : 0), __extension__
({ if (exec_symbol_info_size > 0) ; else __assert_fail ("exec_symbol_info_size > 0"
, "ccv_nnc_symbolic_graph_backward.c", 1969, __extension__ __PRETTY_FUNCTION__
); }))
;
1970 assert(tensor_symbol_info_size > 0)((void) sizeof ((tensor_symbol_info_size > 0) ? 1 : 0), __extension__
({ if (tensor_symbol_info_size > 0) ; else __assert_fail (
"tensor_symbol_info_size > 0", "ccv_nnc_symbolic_graph_backward.c"
, 1970, __extension__ __PRETTY_FUNCTION__); }))
;
1971 ccv_nnc_symbolic_graph_backward_prep_t backward_prep = _ccv_nnc_symbolic_graph_backward_prep(graph, sources, source_size, destinations, destination_size);
1972 _ccv_nnc_symbolic_graph_backward_prep_prune_ops(&backward_prep, f_symbols, f_symbol_size, wrt_symbols, wrt_symbol_size, sources, source_size, destinations, destination_size);
1973 _ccv_nnc_symbolic_graph_backward_prep_gen(&backward_prep, f_symbols, f_symbol_size, wrt_symbols, wrt_symbol_size, 0, sources, source_size, destinations, destination_size);
1974 _ccv_nnc_symbolic_graph_backward_gen(&backward_prep, f_symbols, f_symbol_size, wrt_symbols, wrt_symbol_size, graph, graph);
1975 _ccv_nnc_symbolic_graph_backward_prep_free(backward_prep);
1976}
1977
1978ccv_nnc_tensor_symbol_t ccv_nnc_tensor_symbol_for_backward(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol)
1979{
1980 assert(symbol.d >= 0)((void) sizeof ((symbol.d >= 0) ? 1 : 0), __extension__ ({
if (symbol.d >= 0) ; else __assert_fail ("symbol.d >= 0"
, "ccv_nnc_symbolic_graph_backward.c", 1980, __extension__ __PRETTY_FUNCTION__
); }))
;
1981 assert(symbol.d < graph->backward.tensor_symbol_size)((void) sizeof ((symbol.d < graph->backward.tensor_symbol_size
) ? 1 : 0), __extension__ ({ if (symbol.d < graph->backward
.tensor_symbol_size) ; else __assert_fail ("symbol.d < graph->backward.tensor_symbol_size"
, "ccv_nnc_symbolic_graph_backward.c", 1981, __extension__ __PRETTY_FUNCTION__
); }))
;
1982 if (graph->backward.tensor_symbol_idx[symbol.d] < 0)
1983 return NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
}
;
1984 ccv_nnc_tensor_symbol_t tensor = {
1985 .d = graph->backward.tensor_symbol_idx[symbol.d],
1986 .graph = graph,
1987 };
1988 return tensor;
1989}
1990
1991ccv_nnc_graph_exec_symbol_t ccv_nnc_graph_exec_symbol_for_backward(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol)
1992{
1993 assert(symbol.d >= 0)((void) sizeof ((symbol.d >= 0) ? 1 : 0), __extension__ ({
if (symbol.d >= 0) ; else __assert_fail ("symbol.d >= 0"
, "ccv_nnc_symbolic_graph_backward.c", 1993, __extension__ __PRETTY_FUNCTION__
); }))
;
1994 assert(symbol.d < graph->tensor_symbol_info->rnum)((void) sizeof ((symbol.d < graph->tensor_symbol_info->
rnum) ? 1 : 0), __extension__ ({ if (symbol.d < graph->
tensor_symbol_info->rnum) ; else __assert_fail ("symbol.d < graph->tensor_symbol_info->rnum"
, "ccv_nnc_symbolic_graph_backward.c", 1994, __extension__ __PRETTY_FUNCTION__
); }))
;
1995 int dd = symbol.d;
1996 // Check if this is an alias. Use the original if it is.
1997 ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, dd)((void*)(((char*)((graph->tensor_symbol_info)->data)) +
(size_t)(graph->tensor_symbol_info)->rsize * (size_t)(
dd)))
;
1998 if (symbol_info->alias_ref)
1999 dd = symbol_info->alias_ref - 1;
2000 assert(dd >= 0)((void) sizeof ((dd >= 0) ? 1 : 0), __extension__ ({ if (dd
>= 0) ; else __assert_fail ("dd >= 0", "ccv_nnc_symbolic_graph_backward.c"
, 2000, __extension__ __PRETTY_FUNCTION__); }))
;
2001 assert(dd < graph->backward.exec_symbol_size)((void) sizeof ((dd < graph->backward.exec_symbol_size)
? 1 : 0), __extension__ ({ if (dd < graph->backward.exec_symbol_size
) ; else __assert_fail ("dd < graph->backward.exec_symbol_size"
, "ccv_nnc_symbolic_graph_backward.c", 2001, __extension__ __PRETTY_FUNCTION__
); }))
;
2002 assert(graph->backward.exec_symbol_idx[dd] >= 0)((void) sizeof ((graph->backward.exec_symbol_idx[dd] >=
0) ? 1 : 0), __extension__ ({ if (graph->backward.exec_symbol_idx
[dd] >= 0) ; else __assert_fail ("graph->backward.exec_symbol_idx[dd] >= 0"
, "ccv_nnc_symbolic_graph_backward.c", 2002, __extension__ __PRETTY_FUNCTION__
); }))
;
2003 ccv_nnc_graph_exec_symbol_t exec = {
2004 .d = graph->backward.exec_symbol_idx[dd],
2005 .graph = graph
2006 };
2007 return exec;
2008}