| File: | nnc/ccv_nnc_symbolic_graph_backward.c |
| Warning: | line 759, column 4 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv_nnc.h" | |||
| 2 | #include "ccv_nnc_easy.h" | |||
| 3 | #include "ccv_nnc_internal.h" | |||
| 4 | #include "ccv_internal.h" | |||
| 5 | #include "_ccv_nnc_symbolic_graph.h" | |||
| 6 | ||||
| 7 | // MARK - Level-3.5 API | |||
| 8 | ||||
| 9 | typedef struct { | |||
| 10 | int f_wrt; // Check if both f_symbols and wrt_symbols flow through this node. | |||
| 11 | ccv_array_t* outgoings; // backward traverse nodes. | |||
| 12 | uint64_t* input_bitmasks; | |||
| 13 | int input_bitmask_size; | |||
| 14 | uint64_t* output_bitmasks; | |||
| 15 | int output_bitmask_size; | |||
| 16 | } ccv_nnc_graph_backward_info_t; | |||
| 17 | ||||
| 18 | typedef struct { | |||
| 19 | int input_size; | |||
| 20 | int* inputs; | |||
| 21 | int output; | |||
| 22 | ccv_array_t* outgoings; | |||
| 23 | float value; | |||
| 24 | ccv_nnc_graph_exec_symbol_t symbol; | |||
| 25 | } ccv_nnc_sum_or_set_graph_exec_symbol_t; | |||
| 26 | ||||
| 27 | typedef struct { | |||
| 28 | int input_size; | |||
| 29 | int output_size; | |||
| 30 | int* inputs; | |||
| 31 | int* outputs; | |||
| 32 | ccv_array_t* outgoings; | |||
| 33 | ccv_nnc_cmd_t cmd; | |||
| 34 | ccv_nnc_graph_exec_symbol_t symbol; | |||
| 35 | } ccv_nnc_autograd_graph_exec_symbol_t; | |||
| 36 | ||||
| 37 | typedef struct { | |||
| 38 | int d; // The pointer to the forward level object. | |||
| 39 | int alias_ref; // The alias ref to itself (autograd_tensor_symbols array). | |||
| 40 | int flags; // Flags for this symbol. | |||
| 41 | ccv_nnc_tensor_symbol_t symbol; | |||
| 42 | } ccv_nnc_autograd_tensor_symbol_t; | |||
| 43 | ||||
| 44 | typedef struct { | |||
| 45 | int d; // The tensor symbol ref. | |||
| 46 | int x; // The exec symbol ref. | |||
| 47 | ccv_array_t* exec_registry; // Additional exec symbol refs, similar to x, only useful for aliasing. | |||
| 48 | ccv_array_t* alias_registry; // int point to all the alias (if this is not an alias). The alias is the object in autograd_tensor_symbols, you need another level of indirection to get the actual forward level alias. | |||
| 49 | } ccv_nnc_tensor_ref_t; | |||
| 50 | ||||
| 51 | typedef struct { | |||
| 52 | int c; // The start non-accumulated version. | |||
| 53 | ccv_array_t* ref_version; // tensor ref point to the reverse tensor symbol. | |||
| 54 | } ccv_nnc_autograd_tensor_version_t; | |||
| 55 | ||||
| 56 | typedef struct { | |||
| 57 | int d; | |||
| 58 | int alias_ref; | |||
| 59 | } ccv_nnc_sum_variable_t; | |||
| 60 | ||||
| 61 | // This method tries to figure out if a set of aliases can cover the whole tensor dim. | |||
| 62 | // This is not a precise implementation though. The requirement is to answer this question | |||
| 63 | // with a given memory constraint, therefore, only allow up to 65536 different tensor locations. | |||
| 64 | // If you have more than that, it will assume that it doesn't have fully assigned aliases, | |||
| 65 | // and will return 0. | |||
| 66 | ||||
| 67 | // Return 1 if inserted successfully. | |||
| 68 | static inline int _ccv_nnc_try_mix(int* const md, const int ins, const int c) | |||
| 69 | { | |||
| 70 | if (!c) | |||
| 71 | { | |||
| 72 | md[0] = ins; | |||
| 73 | return 1; | |||
| 74 | } | |||
| 75 | int ll = 0, uu = c - 1; | |||
| 76 | int mm; | |||
| 77 | do { | |||
| 78 | mm = ll + ((uu - ll) >> 1); | |||
| 79 | if (ins == md[mm]) | |||
| 80 | return 0; | |||
| 81 | else if (ins < md[mm]) | |||
| 82 | uu = mm - 1; | |||
| 83 | else if (ins > md[mm]) | |||
| 84 | ll = mm + 1; | |||
| 85 | } while (ll <= uu); | |||
| 86 | if (ll < c) | |||
| 87 | memmove(md + ll + 1, md + ll, sizeof(int) * (c - ll)); | |||
| 88 | md[ll] = ins; | |||
| 89 | return 1; | |||
| 90 | } | |||
| 91 | ||||
| 92 | static inline int _ccv_nnc_mix_idx(const int* const md, const int ins, const int c) | |||
| 93 | { | |||
| 94 | if (c <= 1) | |||
| 95 | return 0; | |||
| 96 | int ll = 0, uu = c - 1; | |||
| 97 | int mm; | |||
| 98 | do { | |||
| 99 | mm = ll + ((uu - ll) >> 1); | |||
| 100 | if (ins == md[mm]) | |||
| 101 | return mm; | |||
| 102 | else if (ins < md[mm]) | |||
| 103 | uu = mm - 1; | |||
| 104 | else if (ins > md[mm]) | |||
| 105 | ll = mm + 1; | |||
| 106 | } while (ll <= uu); | |||
| 107 | assert(0 && "Shouldn't reach here")((void) sizeof ((0 && "Shouldn't reach here") ? 1 : 0 ), __extension__ ({ if (0 && "Shouldn't reach here") ; else __assert_fail ("0 && \"Shouldn't reach here\"", "ccv_nnc_symbolic_graph_backward.c", 107, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 108 | return -1; | |||
| 109 | } | |||
| 110 | ||||
| 111 | static inline void _ccv_nnc_try_set_pix_0(const int* const ofs, const int* const dim, const int* const tensor_dim, int* const* const scmd, const int* const cube_dim, const int* const cube_step, uint32_t* const cube, int offset) | |||
| 112 | { | |||
| 113 | const int s = (ofs[0] == 0) ? 0 : _ccv_nnc_mix_idx(scmd[0], ofs[0], cube_dim[0]) + 1; | |||
| 114 | const int d = ((ofs[0] + dim[0] == tensor_dim[0]) ? cube_dim[0] : _ccv_nnc_mix_idx(scmd[0], ofs[0] + ccv_max(1, dim[0])({ typeof (1) _a = (1); typeof (dim[0]) _b = (dim[0]); (_a > _b) ? _a : _b; }), cube_dim[0])) + 1; | |||
| 115 | assert(s >= 0 && d > s)((void) sizeof ((s >= 0 && d > s) ? 1 : 0), __extension__ ({ if (s >= 0 && d > s) ; else __assert_fail ( "s >= 0 && d > s", "ccv_nnc_symbolic_graph_backward.c" , 115, __extension__ __PRETTY_FUNCTION__); })); | |||
| 116 | int i; | |||
| 117 | for (i = s; i < d; i++) | |||
| 118 | // Fill this pix. I can make this faster by loop through full ones (divided by 8), but too lazy. | |||
| 119 | cube[(offset + i) >> 5] |= (1u << ((offset + i) & 0x1f)); | |||
| 120 | } | |||
| 121 | ||||
| 122 | static inline void _ccv_nnc_try_set_pix_1(const int* const ofs, const int* const dim, const int* const tensor_dim, int* const* const scmd, const int* const cube_dim, const int* const cube_step, uint32_t* const cube, int offset) | |||
| 123 | { | |||
| 124 | const int s0 = (ofs[0] == 0) ? 0 : _ccv_nnc_mix_idx(scmd[0], ofs[0], cube_dim[0]) + 1; | |||
| 125 | const int d0 = ((ofs[0] + dim[0] == tensor_dim[0]) ? cube_dim[0] : _ccv_nnc_mix_idx(scmd[0], ofs[0] + ccv_max(1, dim[0])({ typeof (1) _a = (1); typeof (dim[0]) _b = (dim[0]); (_a > _b) ? _a : _b; }), cube_dim[0])) + 1; | |||
| 126 | assert(s0 >= 0 && d0 > s0)((void) sizeof ((s0 >= 0 && d0 > s0) ? 1 : 0), __extension__ ({ if (s0 >= 0 && d0 > s0) ; else __assert_fail ("s0 >= 0 && d0 > s0", "ccv_nnc_symbolic_graph_backward.c" , 126, __extension__ __PRETTY_FUNCTION__); })); | |||
| 127 | const int s1 = (ofs[1] == 0) ? 0 : _ccv_nnc_mix_idx(scmd[1], ofs[1], cube_dim[1]) + 1; | |||
| 128 | const int d1 = ((ofs[1] + dim[1] == tensor_dim[1]) ? cube_dim[1] : _ccv_nnc_mix_idx(scmd[1], ofs[1] + ccv_max(1, dim[1])({ typeof (1) _a = (1); typeof (dim[1]) _b = (dim[1]); (_a > _b) ? _a : _b; }), cube_dim[1])) + 1; | |||
| 129 | assert(s1 >= 0 && d1 > s1)((void) sizeof ((s1 >= 0 && d1 > s1) ? 1 : 0), __extension__ ({ if (s1 >= 0 && d1 > s1) ; else __assert_fail ("s1 >= 0 && d1 > s1", "ccv_nnc_symbolic_graph_backward.c" , 129, __extension__ __PRETTY_FUNCTION__); })); | |||
| 130 | int i, j; | |||
| 131 | const int step1 = cube_step[1]; | |||
| 132 | if (step1 == d0 - s0) | |||
| 133 | { | |||
| 134 | // Faster one, we can simply loop through. | |||
| 135 | for (i = s1 * step1; i < d1 * step1; i++) | |||
| 136 | cube[(offset + i) >> 5] |= (1u << ((offset + i) & 0x1f)); | |||
| 137 | } else { | |||
| 138 | offset += s1 * step1; | |||
| 139 | // There are gaps, slow one. | |||
| 140 | for (i = s1; i < d1; i++, offset += step1) | |||
| 141 | for (j = s0; j < d0; j++) | |||
| 142 | cube[(offset + j) >> 5] |= (1u << ((offset + j) & 0x1f)); | |||
| 143 | } | |||
| 144 | } | |||
| 145 | ||||
| 146 | static inline void _ccv_nnc_try_set_pix(const int* const ofs, const int* const dim, const int* const tensor_dim, int* const* const scmd, const int* const cube_dim, const int* const cube_step, uint32_t* const cube, int offset, const int dim_idx) | |||
| 147 | { | |||
| 148 | switch (dim_idx) | |||
| 149 | { | |||
| 150 | case 1: | |||
| 151 | _ccv_nnc_try_set_pix_1(ofs, dim, tensor_dim, scmd, cube_dim, cube_step, cube, offset); | |||
| 152 | return; | |||
| 153 | case 0: | |||
| 154 | _ccv_nnc_try_set_pix_0(ofs, dim, tensor_dim, scmd, cube_dim, cube_step, cube, offset); | |||
| 155 | return; | |||
| 156 | } | |||
| 157 | int i; | |||
| 158 | const int s = (ofs[dim_idx] == 0) ? 0 : _ccv_nnc_mix_idx(scmd[dim_idx], ofs[dim_idx], cube_dim[dim_idx]) + 1; | |||
| 159 | const int d = ((ofs[dim_idx] + dim[dim_idx] == tensor_dim[dim_idx]) ? cube_dim[dim_idx] : _ccv_nnc_mix_idx(scmd[dim_idx], ofs[dim_idx] + ccv_max(1, dim[dim_idx])({ typeof (1) _a = (1); typeof (dim[dim_idx]) _b = (dim[dim_idx ]); (_a > _b) ? _a : _b; }), cube_dim[dim_idx])) + 1; | |||
| 160 | assert(s >= 0 && d > s)((void) sizeof ((s >= 0 && d > s) ? 1 : 0), __extension__ ({ if (s >= 0 && d > s) ; else __assert_fail ( "s >= 0 && d > s", "ccv_nnc_symbolic_graph_backward.c" , 160, __extension__ __PRETTY_FUNCTION__); })); | |||
| 161 | for (i = s; i < d; i++) | |||
| 162 | _ccv_nnc_try_set_pix(ofs, dim, tensor_dim, scmd, cube_dim, cube_step, cube, offset + i * cube_step[dim_idx], dim_idx - 1); | |||
| 163 | } | |||
| 164 | ||||
| 165 | static int _ccv_nnc_tensor_ref_fully_assigned_with_aliases(const ccv_nnc_tensor_ref_t* const tensor_ref, const ccv_array_t* const autograd_tensor_symbols, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info) | |||
| 166 | { | |||
| 167 | // Only work with tensor_ref of aliases. | |||
| 168 | assert(tensor_ref->alias_registry)((void) sizeof ((tensor_ref->alias_registry) ? 1 : 0), __extension__ ({ if (tensor_ref->alias_registry) ; else __assert_fail ( "tensor_ref->alias_registry", "ccv_nnc_symbolic_graph_backward.c" , 168, __extension__ __PRETTY_FUNCTION__); })); | |||
| 169 | const ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref-> d))); | |||
| 170 | assert(tensor_symbol_info[autograd->d].alias_ref == 0)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref == 0) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd ->d].alias_ref == 0) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref == 0" , "ccv_nnc_symbolic_graph_backward.c", 170, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 171 | const int* tensor_dim = tensor_symbol_info[autograd->d].info.dim; | |||
| 172 | const size_t tensor_count = ccv_nnc_dimension_count(tensor_dim); | |||
| 173 | int i, j; | |||
| 174 | for (i = 0; i < tensor_ref->alias_registry->rnum; i++) | |||
| 175 | { | |||
| 176 | const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, i)((void*)(((char*)((tensor_ref->alias_registry)->data)) + (size_t)(tensor_ref->alias_registry)->rsize * (size_t) (i))); | |||
| 177 | assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1 : 0), __extension__ ({ if (d < autograd_tensor_symbols-> rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum" , "ccv_nnc_symbolic_graph_backward.c", 177, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 178 | const ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(d))); | |||
| 179 | assert(tensor_symbol_info[autograd->d].alias_ref)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref ) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd-> d].alias_ref) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref" , "ccv_nnc_symbolic_graph_backward.c", 179, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 180 | const int* stride = tensor_symbol_info[autograd->d].stride; | |||
| 181 | const int* dim = tensor_symbol_info[autograd->d].info.dim; | |||
| 182 | // If this is just reshaped (i.e., dimension is the same, and inc covers the whole). We have fully assigned. | |||
| 183 | if (ccv_nnc_is_tensor_stride_packed(stride, dim) && ccv_nnc_dimension_count(dim) == tensor_count) | |||
| 184 | return 1; | |||
| 185 | } | |||
| 186 | int tensor_nd_reshaped = 0; | |||
| 187 | int tensor_dim_reshaped[CCV_NNC_MAX_DIM_ALLOC(12)] = {0}; | |||
| 188 | for (i = 0; i < tensor_ref->alias_registry->rnum; i++) | |||
| 189 | { | |||
| 190 | const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, i)((void*)(((char*)((tensor_ref->alias_registry)->data)) + (size_t)(tensor_ref->alias_registry)->rsize * (size_t) (i))); | |||
| 191 | assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1 : 0), __extension__ ({ if (d < autograd_tensor_symbols-> rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum" , "ccv_nnc_symbolic_graph_backward.c", 191, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 192 | const ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(d))); | |||
| 193 | assert(tensor_symbol_info[autograd->d].alias_ref)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref ) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd-> d].alias_ref) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref" , "ccv_nnc_symbolic_graph_backward.c", 193, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 194 | const int* stride = tensor_symbol_info[autograd->d].stride; | |||
| 195 | const int nd = ccv_nnc_tensor_nd(stride); | |||
| 196 | if (i == 0) // Derive a tensor dim from the first one, by doing divisions on strides. | |||
| 197 | { | |||
| 198 | if (nd > 0) | |||
| 199 | { | |||
| 200 | tensor_dim_reshaped[0] = tensor_count / stride[0]; | |||
| 201 | for (j = 1; j < nd; j++) | |||
| 202 | tensor_dim_reshaped[j] = stride[j - 1] / stride[j]; | |||
| 203 | tensor_nd_reshaped = nd; | |||
| 204 | } | |||
| 205 | continue; | |||
| 206 | } | |||
| 207 | // If reshaped differently, we cannot run out fill algorithm, do this conservatively. | |||
| 208 | if (nd != tensor_nd_reshaped) | |||
| 209 | return 0; | |||
| 210 | // Otherwise if inc doesn't match original dim, it is not covered. | |||
| 211 | if (!ccv_nnc_is_tensor_stride_packed(stride, tensor_dim_reshaped)) | |||
| 212 | return 0; | |||
| 213 | } | |||
| 214 | if (tensor_nd_reshaped > 0) | |||
| 215 | tensor_dim = tensor_dim_reshaped; | |||
| 216 | /* We need a solid cube (potentially hyper dimensional) to compute if there are overlaps. | |||
| 217 | * To make this cube as small as possible, we need to map the actual tensor dimension | |||
| 218 | * (therefore, we don't actually allocate the whole tensor to compute overlaps) to a smaller | |||
| 219 | * cube given the ofs and dim size of its aliases. | |||
| 220 | * | |||
| 221 | * The following code generated the dimension mapping (using scratch space) with binary search + insertion | |||
| 222 | * and then we fill the cube with a given tensor alias's dimensional information (ofs, dim). | |||
| 223 | * Afterwards, we simply need to check if the cube is totally filled up to know if this tensor | |||
| 224 | * is fully assigned with its aliases (if that is the case, we can skip zeroing for this tensor). | |||
| 225 | * | |||
| 226 | * There are several restrictions though to make this faster: 1). I cannot handle any cube that all side | |||
| 227 | * lengths combined larger than 1023 (scm only have 1024 scratch space). 2). I cannot handle any cube | |||
| 228 | * that the total volume is larger than 2048 * 8 (I only allocate 2K on stack for this). | |||
| 229 | * */ | |||
| 230 | int scm[1024]; // Having 1024 int scratch space for mapping dimensions. (Or sparse coordinate mapping). | |||
| 231 | int cube_dim[CCV_NNC_MAX_DIM_ALLOC(12)] = {}; // Mapping dimension size. | |||
| 232 | int cube_size = 1; | |||
| 233 | int* scmptr = scm; | |||
| 234 | for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC(12) && tensor_dim[i]; i++) | |||
| 235 | { | |||
| 236 | int head = 0, tail = 0; // Note that we touched both the head and tail (otherwise this dimension is not fully covered). | |||
| 237 | int len = 0; | |||
| 238 | for (j = 0; j < tensor_ref->alias_registry->rnum; j++) | |||
| 239 | { | |||
| 240 | const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, j)((void*)(((char*)((tensor_ref->alias_registry)->data)) + (size_t)(tensor_ref->alias_registry)->rsize * (size_t) (j))); | |||
| 241 | assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1 : 0), __extension__ ({ if (d < autograd_tensor_symbols-> rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum" , "ccv_nnc_symbolic_graph_backward.c", 241, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 242 | const ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(d))); | |||
| 243 | assert(tensor_symbol_info[autograd->d].alias_ref)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref ) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd-> d].alias_ref) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref" , "ccv_nnc_symbolic_graph_backward.c", 243, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 244 | const int* ofs = tensor_symbol_info[autograd->d].ofs; | |||
| 245 | const int* dim = tensor_symbol_info[autograd->d].info.dim; | |||
| 246 | head = head || (ofs[i] == 0); | |||
| 247 | tail = tail || (ofs[i] + ccv_max(1, dim[i])({ typeof (1) _a = (1); typeof (dim[i]) _b = (dim[i]); (_a > _b) ? _a : _b; }) == tensor_dim[i]); | |||
| 248 | if (ofs[i] != 0) | |||
| 249 | len += _ccv_nnc_try_mix(scmptr, ofs[i], len); | |||
| 250 | if (scmptr - scm + len >= 1024) // Cannot handle that much, abort. | |||
| 251 | return 0; | |||
| 252 | if (ofs[i] + ccv_max(1, dim[i])({ typeof (1) _a = (1); typeof (dim[i]) _b = (dim[i]); (_a > _b) ? _a : _b; }) < tensor_dim[i]) | |||
| 253 | len += _ccv_nnc_try_mix(scmptr, ofs[i] + ccv_max(1, dim[i])({ typeof (1) _a = (1); typeof (dim[i]) _b = (dim[i]); (_a > _b) ? _a : _b; }), len); | |||
| 254 | if (scmptr - scm + len >= 1024) // Cannot handle that much, abort. | |||
| 255 | return 0; | |||
| 256 | } | |||
| 257 | if (!head || !tail) | |||
| 258 | return 0; | |||
| 259 | cube_size *= (len + 1); | |||
| 260 | cube_dim[i] = len; | |||
| 261 | scmptr += len; // Moving to next level. | |||
| 262 | } | |||
| 263 | // The cube map is too large, cannot do the computation, assume it is not fully assigned. | |||
| 264 | if (cube_size > 2048 * 8) | |||
| 265 | return 0; | |||
| 266 | // binary map to see if it fills up. | |||
| 267 | uint32_t cube[(cube_size + 31) >> 5]; | |||
| 268 | memset(cube, 0, sizeof(uint32_t) * ((cube_size + 31) >> 5)); | |||
| 269 | int* scmd[CCV_NNC_MAX_DIM_ALLOC(12)] = {}; // Sparse coordinate map at dimension x. | |||
| 270 | int cube_step[CCV_NNC_MAX_DIM_ALLOC(12)] = {}; | |||
| 271 | for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC(12) && tensor_dim[i]; i++) | |||
| 272 | { | |||
| 273 | cube_step[i] = (i > 0) ? cube_step[i - 1] * (cube_dim[i - 1] + 1) : 1; | |||
| 274 | scmd[i] = (i > 0) ? scmd[i - 1] + cube_dim[i - 1] : scm; | |||
| 275 | } | |||
| 276 | const int max_dim = i; | |||
| 277 | for (i = 0; i < tensor_ref->alias_registry->rnum; i++) | |||
| 278 | { | |||
| 279 | const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, i)((void*)(((char*)((tensor_ref->alias_registry)->data)) + (size_t)(tensor_ref->alias_registry)->rsize * (size_t) (i))); | |||
| 280 | assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1 : 0), __extension__ ({ if (d < autograd_tensor_symbols-> rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum" , "ccv_nnc_symbolic_graph_backward.c", 280, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 281 | const ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(d))); | |||
| 282 | assert(tensor_symbol_info[autograd->d].alias_ref)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref ) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd-> d].alias_ref) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref" , "ccv_nnc_symbolic_graph_backward.c", 282, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 283 | const int* ofs = tensor_symbol_info[autograd->d].ofs; | |||
| 284 | const int* dim = tensor_symbol_info[autograd->d].info.dim; | |||
| 285 | _ccv_nnc_try_set_pix(ofs, dim, tensor_dim, scmd, cube_dim, cube_step, cube, 0, max_dim - 1); | |||
| 286 | } | |||
| 287 | // Compare to see now if the binary map filled up. If it filled up, we know it is fully assigned. | |||
| 288 | for (i = 0; i < (cube_size >> 5); i++) | |||
| 289 | if (cube[i] < 0xffffffff) | |||
| 290 | return 0; | |||
| 291 | if ((cube_size & 0x1f) > 0) | |||
| 292 | { | |||
| 293 | // Fetch the rest. | |||
| 294 | uint32_t r = 0; | |||
| 295 | for (i = 0; i < (cube_size & 0x1f); i++) | |||
| 296 | r |= (1u << i); | |||
| 297 | assert(cube[((cube_size + 31) >> 5) - 1] <= r)((void) sizeof ((cube[((cube_size + 31) >> 5) - 1] <= r) ? 1 : 0), __extension__ ({ if (cube[((cube_size + 31) >> 5) - 1] <= r) ; else __assert_fail ("cube[((cube_size + 31) >> 5) - 1] <= r" , "ccv_nnc_symbolic_graph_backward.c", 297, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 298 | if (cube[((cube_size + 31) >> 5) - 1] < r) | |||
| 299 | return 0; | |||
| 300 | } | |||
| 301 | return 1; | |||
| 302 | } | |||
| 303 | ||||
| 304 | static int _ccv_nnc_tensor_ref_version_find_init(const ccv_nnc_autograd_tensor_version_t* const tensor_ver) | |||
| 305 | { | |||
| 306 | int i; | |||
| 307 | for (i = 0; i < tensor_ver->ref_version->rnum; i++) | |||
| 308 | if (((ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, i)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(i))))->x < 0) | |||
| 309 | return i; | |||
| 310 | return -1; | |||
| 311 | } | |||
| 312 | ||||
| 313 | static void _ccv_nnc_graph_sum_autograd_tensor_versions(const int idx, const int d, const int exec_symbol_info_size, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, ccv_nnc_autograd_tensor_version_t* const tensor_ver, ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs, ccv_array_t* const autograd_tensor_symbols, ccv_array_t* const sum_or_set_execs) | |||
| 314 | { | |||
| 315 | int i, j; | |||
| 316 | assert(tensor_ver->c < tensor_ver->ref_version->rnum)((void) sizeof ((tensor_ver->c < tensor_ver->ref_version ->rnum) ? 1 : 0), __extension__ ({ if (tensor_ver->c < tensor_ver->ref_version->rnum) ; else __assert_fail ("tensor_ver->c < tensor_ver->ref_version->rnum" , "ccv_nnc_symbolic_graph_backward.c", 316, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 317 | const int input_size = tensor_ver->ref_version->rnum - tensor_ver->c; | |||
| 318 | int* inputs = (int*)ccmallocmalloc(sizeof(int) * input_size); | |||
| 319 | for (i = tensor_ver->c; i < tensor_ver->ref_version->rnum; i++) | |||
| 320 | inputs[i] = ((ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, i)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(i))))->d; | |||
| 321 | const ccv_nnc_autograd_tensor_symbol_t tensor_sym = { | |||
| 322 | .d = d | |||
| 323 | }; | |||
| 324 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 325 | ccv_nnc_sum_or_set_graph_exec_symbol_t sum_exec = { | |||
| 326 | .input_size = input_size, | |||
| 327 | .inputs = inputs, | |||
| 328 | .output = autograd_tensor_symbols->rnum - 1 | |||
| 329 | }; | |||
| 330 | if (idx >= 0) | |||
| 331 | { | |||
| 332 | sum_exec.outgoings = ccv_array_new(sizeof(int), 1, 0); | |||
| 333 | ccv_array_push(sum_exec.outgoings, &idx); | |||
| 334 | } | |||
| 335 | ccv_array_push(sum_or_set_execs, &sum_exec); | |||
| 336 | const int outgoing = exec_symbol_info_size + sum_or_set_execs->rnum - 1; | |||
| 337 | for (i = tensor_ver->c; i < tensor_ver->ref_version->rnum; i++) | |||
| 338 | { | |||
| 339 | const ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, i)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(i))); | |||
| 340 | const int x = tensor_ref->x; | |||
| 341 | if (x < 0) /* This is initialization tensor, it has to be occurred before the execution anyway. */ | |||
| 342 | { | |||
| 343 | // No alias. | |||
| 344 | assert(!tensor_ref->alias_registry)((void) sizeof ((!tensor_ref->alias_registry) ? 1 : 0), __extension__ ({ if (!tensor_ref->alias_registry) ; else __assert_fail ( "!tensor_ref->alias_registry", "ccv_nnc_symbolic_graph_backward.c" , 344, __extension__ __PRETTY_FUNCTION__); })); | |||
| 345 | // No associated additional execs. | |||
| 346 | assert(!tensor_ref->exec_registry)((void) sizeof ((!tensor_ref->exec_registry) ? 1 : 0), __extension__ ({ if (!tensor_ref->exec_registry) ; else __assert_fail ( "!tensor_ref->exec_registry", "ccv_nnc_symbolic_graph_backward.c" , 346, __extension__ __PRETTY_FUNCTION__); })); | |||
| 347 | continue; | |||
| 348 | } | |||
| 349 | if (x < exec_symbol_info_size) | |||
| 350 | { | |||
| 351 | ccv_nnc_autograd_graph_exec_symbol_t* back_exec = autograd_execs + x; | |||
| 352 | if (!back_exec->outgoings) | |||
| 353 | back_exec->outgoings = ccv_array_new(sizeof(int), 1, 0); | |||
| 354 | ccv_array_replace_unique_int(back_exec->outgoings, idx, outgoing); | |||
| 355 | } else { | |||
| 356 | // This tensor_ref is generated by the sum operation. | |||
| 357 | ccv_nnc_sum_or_set_graph_exec_symbol_t* sum_or_set = (ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, x - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs )->rsize * (size_t)(x - exec_symbol_info_size))); | |||
| 358 | ccv_array_replace_unique_int(sum_or_set->outgoings, idx, outgoing); | |||
| 359 | } | |||
| 360 | // If this tensor have associated alias, we need to init it to zeros when it is allocated (we only need to set a flag here) | |||
| 361 | // it is handled at compilation phase. | |||
| 362 | if (tensor_ref->alias_registry && | |||
| 363 | // Loop over to see if this tensor is fully occupied to avoid extra zero step. | |||
| 364 | !_ccv_nnc_tensor_ref_fully_assigned_with_aliases(tensor_ref, autograd_tensor_symbols, tensor_symbol_info)) | |||
| 365 | { | |||
| 366 | ccv_nnc_autograd_tensor_symbol_t* tensor_sym = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref-> d))); | |||
| 367 | // By having alias_registry, what this symbol represents must not by an alias. | |||
| 368 | assert(tensor_sym->alias_ref == 0)((void) sizeof ((tensor_sym->alias_ref == 0) ? 1 : 0), __extension__ ({ if (tensor_sym->alias_ref == 0) ; else __assert_fail ( "tensor_sym->alias_ref == 0", "ccv_nnc_symbolic_graph_backward.c" , 368, __extension__ __PRETTY_FUNCTION__); })); | |||
| 369 | tensor_sym->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS; | |||
| 370 | } | |||
| 371 | if (tensor_ref->exec_registry) | |||
| 372 | for (j = 0; j < tensor_ref->exec_registry->rnum; j++) | |||
| 373 | { | |||
| 374 | const int x = *(int*)ccv_array_get(tensor_ref->exec_registry, j)((void*)(((char*)((tensor_ref->exec_registry)->data)) + (size_t)(tensor_ref->exec_registry)->rsize * (size_t)( j))); | |||
| 375 | assert(x >= 0)((void) sizeof ((x >= 0) ? 1 : 0), __extension__ ({ if (x >= 0) ; else __assert_fail ("x >= 0", "ccv_nnc_symbolic_graph_backward.c" , 375, __extension__ __PRETTY_FUNCTION__); })); | |||
| 376 | // The exec_registry can only be generated by alias registry, therefore, it cannot reference to a sum operation. | |||
| 377 | assert(x < exec_symbol_info_size)((void) sizeof ((x < exec_symbol_info_size) ? 1 : 0), __extension__ ({ if (x < exec_symbol_info_size) ; else __assert_fail ("x < exec_symbol_info_size" , "ccv_nnc_symbolic_graph_backward.c", 377, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 378 | ccv_nnc_autograd_graph_exec_symbol_t* back_exec = autograd_execs + x; | |||
| 379 | if (!back_exec->outgoings) | |||
| 380 | back_exec->outgoings = ccv_array_new(sizeof(int), 1, 0); | |||
| 381 | ccv_array_replace_unique_int(back_exec->outgoings, idx, outgoing); | |||
| 382 | } | |||
| 383 | } | |||
| 384 | const ccv_nnc_tensor_ref_t tensor_ref = { | |||
| 385 | .d = autograd_tensor_symbols->rnum - 1, | |||
| 386 | .x = outgoing | |||
| 387 | }; | |||
| 388 | ccv_array_push(tensor_ver->ref_version, &tensor_ref); | |||
| 389 | /* Move the c pointer up to the latest summed result. */ | |||
| 390 | tensor_ver->c = tensor_ver->ref_version->rnum - 1; | |||
| 391 | } | |||
| 392 | ||||
| 393 | static int _ccv_nnc_tensor_ref_version_involve_alias(const ccv_nnc_tensor_ref_t* const tensor_ref, const ccv_array_t* const autograd_tensor_symbols, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const ccv_nnc_tensor_symbol_info_t* const alias) | |||
| 394 | { | |||
| 395 | assert(alias->alias_ref > 0)((void) sizeof ((alias->alias_ref > 0) ? 1 : 0), __extension__ ({ if (alias->alias_ref > 0) ; else __assert_fail ("alias->alias_ref > 0" , "ccv_nnc_symbolic_graph_backward.c", 395, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 396 | // No alias_registry, must conflict (owns the whole band). | |||
| 397 | if (!tensor_ref->alias_registry) | |||
| 398 | return 1; | |||
| 399 | int i; | |||
| 400 | for (i = 0; i < tensor_ref->alias_registry->rnum; i++) | |||
| 401 | { | |||
| 402 | const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, i)((void*)(((char*)((tensor_ref->alias_registry)->data)) + (size_t)(tensor_ref->alias_registry)->rsize * (size_t) (i))); | |||
| 403 | assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1 : 0), __extension__ ({ if (d < autograd_tensor_symbols-> rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum" , "ccv_nnc_symbolic_graph_backward.c", 403, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 404 | ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(d))); | |||
| 405 | if (ccv_nnc_over_tensor_symbol_aliases(tensor_symbol_info + autograd->d, alias)) | |||
| 406 | return 1; | |||
| 407 | } | |||
| 408 | // All aliases referenced by this ref_version doesn't overlap with the provided one, thus, there is no conflict at all. | |||
| 409 | return 0; | |||
| 410 | } | |||
| 411 | ||||
| 412 | static int _ccv_nnc_tensor_ref_version_find_alias(const ccv_nnc_tensor_ref_t* const tensor_ref, const ccv_array_t* const autograd_tensor_symbols, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const ccv_nnc_tensor_symbol_info_t* const alias) | |||
| 413 | { | |||
| 414 | assert(alias->alias_ref > 0)((void) sizeof ((alias->alias_ref > 0) ? 1 : 0), __extension__ ({ if (alias->alias_ref > 0) ; else __assert_fail ("alias->alias_ref > 0" , "ccv_nnc_symbolic_graph_backward.c", 414, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 415 | // No alias_registry, thus, cannot find the exact matched alias. | |||
| 416 | if (!tensor_ref->alias_registry) | |||
| 417 | return -1; | |||
| 418 | int i; | |||
| 419 | for (i = 0; i < tensor_ref->alias_registry->rnum; i++) | |||
| 420 | { | |||
| 421 | const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, i)((void*)(((char*)((tensor_ref->alias_registry)->data)) + (size_t)(tensor_ref->alias_registry)->rsize * (size_t) (i))); | |||
| 422 | assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1 : 0), __extension__ ({ if (d < autograd_tensor_symbols-> rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum" , "ccv_nnc_symbolic_graph_backward.c", 422, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 423 | ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(d))); | |||
| 424 | // This must reference to an alias. | |||
| 425 | assert(tensor_symbol_info[autograd->d].alias_ref)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref ) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd-> d].alias_ref) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref" , "ccv_nnc_symbolic_graph_backward.c", 425, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 426 | const int* stride = tensor_symbol_info[autograd->d].stride; | |||
| 427 | const int* ofs = tensor_symbol_info[autograd->d].ofs; | |||
| 428 | const int* dim = tensor_symbol_info[autograd->d].info.dim; | |||
| 429 | // If everything matches, this is the required alias. | |||
| 430 | if (memcmp(stride, alias->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) == 0 && | |||
| 431 | memcmp(ofs, alias->ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) == 0 && | |||
| 432 | memcmp(dim, alias->info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) == 0) | |||
| 433 | return d; | |||
| 434 | } | |||
| 435 | return -1; | |||
| 436 | } | |||
| 437 | ||||
| 438 | static int _ccv_nnc_tensor_ref_version_has_this_alias_exclusively(const ccv_nnc_tensor_ref_t* const tensor_ref, const ccv_array_t* const autograd_tensor_symbols, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const ccv_nnc_tensor_symbol_info_t* const alias) | |||
| 439 | { | |||
| 440 | assert(alias->alias_ref > 0)((void) sizeof ((alias->alias_ref > 0) ? 1 : 0), __extension__ ({ if (alias->alias_ref > 0) ; else __assert_fail ("alias->alias_ref > 0" , "ccv_nnc_symbolic_graph_backward.c", 440, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 441 | // No alias_registry, thus, cannot find the exact matched alias. | |||
| 442 | if (!tensor_ref->alias_registry) | |||
| 443 | return 0; | |||
| 444 | int i; | |||
| 445 | for (i = 0; i < tensor_ref->alias_registry->rnum; i++) | |||
| 446 | { | |||
| 447 | const int d = *(int*)ccv_array_get(tensor_ref->alias_registry, i)((void*)(((char*)((tensor_ref->alias_registry)->data)) + (size_t)(tensor_ref->alias_registry)->rsize * (size_t) (i))); | |||
| 448 | assert(d < autograd_tensor_symbols->rnum)((void) sizeof ((d < autograd_tensor_symbols->rnum) ? 1 : 0), __extension__ ({ if (d < autograd_tensor_symbols-> rnum) ; else __assert_fail ("d < autograd_tensor_symbols->rnum" , "ccv_nnc_symbolic_graph_backward.c", 448, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 449 | ccv_nnc_autograd_tensor_symbol_t* autograd = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(d))); | |||
| 450 | // This must reference to an alias. | |||
| 451 | assert(tensor_symbol_info[autograd->d].alias_ref)((void) sizeof ((tensor_symbol_info[autograd->d].alias_ref ) ? 1 : 0), __extension__ ({ if (tensor_symbol_info[autograd-> d].alias_ref) ; else __assert_fail ("tensor_symbol_info[autograd->d].alias_ref" , "ccv_nnc_symbolic_graph_backward.c", 451, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 452 | const int* stride = tensor_symbol_info[autograd->d].stride; | |||
| 453 | const int* ofs = tensor_symbol_info[autograd->d].ofs; | |||
| 454 | const int* dim = tensor_symbol_info[autograd->d].info.dim; | |||
| 455 | if (memcmp(stride, alias->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) != 0 || | |||
| 456 | memcmp(ofs, alias->ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) != 0 || | |||
| 457 | memcmp(dim, alias->info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)) != 0) | |||
| 458 | return 0; | |||
| 459 | } | |||
| 460 | // If everything matches for every alias in registry, we can use any of the alias directly. | |||
| 461 | return 1; | |||
| 462 | } | |||
| 463 | ||||
| 464 | static int _ccv_nnc_graph_sum_autograd_tensor_versions_alias(const int idx, const int d, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const int exec_symbol_info_size, const ccv_nnc_tensor_symbol_info_t* const alias, ccv_nnc_autograd_tensor_version_t* const tensor_ver, ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs, ccv_array_t* const autograd_tensor_symbols, ccv_array_t* const sum_or_set_execs) | |||
| 465 | { | |||
| 466 | assert(tensor_ver->c < tensor_ver->ref_version->rnum)((void) sizeof ((tensor_ver->c < tensor_ver->ref_version ->rnum) ? 1 : 0), __extension__ ({ if (tensor_ver->c < tensor_ver->ref_version->rnum) ; else __assert_fail ("tensor_ver->c < tensor_ver->ref_version->rnum" , "ccv_nnc_symbolic_graph_backward.c", 466, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 467 | int i, j = 0; | |||
| 468 | struct { | |||
| 469 | int k; | |||
| 470 | int i; | |||
| 471 | } kd[tensor_ver->ref_version->rnum - tensor_ver->c]; | |||
| 472 | for (i = tensor_ver->c; i < tensor_ver->ref_version->rnum; i++) | |||
| 473 | { | |||
| 474 | ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, i)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(i))); | |||
| 475 | const int k = _ccv_nnc_tensor_ref_version_find_alias(tensor_ref, autograd_tensor_symbols, tensor_symbol_info, alias); | |||
| 476 | if (k >= 0) | |||
| 477 | kd[j++] = (typeof(kd[0])){ | |||
| 478 | .k = k, .i = i | |||
| 479 | }; | |||
| 480 | else if (_ccv_nnc_tensor_ref_version_involve_alias(tensor_ref, autograd_tensor_symbols, tensor_symbol_info, alias)) | |||
| 481 | kd[j++] = (typeof(kd[0])) { | |||
| 482 | .k = -1, .i = i // It has dependency to the original tensor (non-alias) now, label this with highest bit. | |||
| 483 | }; | |||
| 484 | } | |||
| 485 | // Can only find one. This is the easy case, we can simply return that symbol (or its alias). | |||
| 486 | if (j == 1) | |||
| 487 | { | |||
| 488 | if (kd[0].k >= 0) | |||
| 489 | return kd[0].k; // Only can find one alias, that is the one. | |||
| 490 | // Otherwise, need to create a new alias. | |||
| 491 | ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[0].i)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd[0 ].i))); | |||
| 492 | ccv_nnc_autograd_tensor_symbol_t* ref = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref-> d))); | |||
| 493 | // Since we create new alias, we need to set the referenced one to be allocated with 0s. | |||
| 494 | if (ref->alias_ref) // If this is an alias, it has to be zero initialized. | |||
| 495 | { | |||
| 496 | ref = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, ref->alias_ref - 1)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(ref->alias_ref - 1))); | |||
| 497 | assert(ref->alias_ref == 0)((void) sizeof ((ref->alias_ref == 0) ? 1 : 0), __extension__ ({ if (ref->alias_ref == 0) ; else __assert_fail ("ref->alias_ref == 0" , "ccv_nnc_symbolic_graph_backward.c", 497, __extension__ __PRETTY_FUNCTION__ ); })); // This is original. | |||
| 498 | ref->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS; | |||
| 499 | } else if (tensor_ref->alias_registry && // Otherwise, to see if this symbol is fully occupied. | |||
| 500 | // Loop over to see if this tensor is fully occupied to avoid extra zero step. | |||
| 501 | !_ccv_nnc_tensor_ref_fully_assigned_with_aliases(tensor_ref, autograd_tensor_symbols, tensor_symbol_info)) { | |||
| 502 | ref->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS; | |||
| 503 | } | |||
| 504 | ccv_nnc_autograd_tensor_symbol_t tensor_sym = { | |||
| 505 | .d = d, | |||
| 506 | .alias_ref = tensor_ref->d + 1 | |||
| 507 | }; | |||
| 508 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 509 | const int ad = autograd_tensor_symbols->rnum - 1; | |||
| 510 | if (tensor_ref->alias_registry) // Only push this when it has an alias registry (otherwise it already conflict with everyone). | |||
| 511 | ccv_array_push(tensor_ref->alias_registry, &ad); | |||
| 512 | if (tensor_ref->x >= exec_symbol_info_size && idx >= 0) | |||
| 513 | { | |||
| 514 | ccv_nnc_sum_or_set_graph_exec_symbol_t* const sum_or_set_exec = (ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, tensor_ref->x - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs )->rsize * (size_t)(tensor_ref->x - exec_symbol_info_size ))); | |||
| 515 | // This may be summed, thus, we need to create a connection between this and the sum. | |||
| 516 | if (!sum_or_set_exec->outgoings) | |||
| 517 | sum_or_set_exec->outgoings = ccv_array_new(sizeof(int), 1, 0); | |||
| 518 | ccv_array_push(sum_or_set_exec->outgoings, &idx); | |||
| 519 | } | |||
| 520 | // The newly inserted tensor symbol. | |||
| 521 | return ad; | |||
| 522 | } | |||
| 523 | // Otherwise, we need to create the sum operation out of these. | |||
| 524 | const int input_size = j; | |||
| 525 | int has_this_alias_exclusively = 1; | |||
| 526 | int* inputs = input_size > 0 ? (int*)ccmallocmalloc(sizeof(int) * input_size) : 0; | |||
| 527 | for (i = 0; i < input_size; i++) | |||
| 528 | { | |||
| 529 | ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[i].i)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd[i ].i))); | |||
| 530 | // Can take a fast path if every ref involved has the same alias, our sum operation can be faster (using alias directly). | |||
| 531 | if (has_this_alias_exclusively && kd[i].k >= 0 && _ccv_nnc_tensor_ref_version_has_this_alias_exclusively(tensor_ref, autograd_tensor_symbols, tensor_symbol_info, alias)) | |||
| 532 | inputs[i] = *(int*)ccv_array_get(tensor_ref->alias_registry, 0)((void*)(((char*)((tensor_ref->alias_registry)->data)) + (size_t)(tensor_ref->alias_registry)->rsize * (size_t) (0))); // Assigning the alias. | |||
| 533 | else { | |||
| 534 | if (has_this_alias_exclusively) | |||
| 535 | { | |||
| 536 | has_this_alias_exclusively = 0; | |||
| 537 | for (j = 0; j < i; j++) | |||
| 538 | inputs[j] = ((ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[j].i)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd[j ].i))))->d; | |||
| 539 | } | |||
| 540 | inputs[i] = tensor_ref->d; | |||
| 541 | } | |||
| 542 | } | |||
| 543 | ccv_nnc_autograd_tensor_symbol_t tensor_sym = { | |||
| 544 | .d = alias->alias_ref - 1 | |||
| 545 | }; | |||
| 546 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 547 | const int tensor_ref_d = autograd_tensor_symbols->rnum - 1; | |||
| 548 | tensor_sym.d = d; | |||
| 549 | tensor_sym.alias_ref = tensor_ref_d + 1; | |||
| 550 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 551 | const int ad = autograd_tensor_symbols->rnum - 1; | |||
| 552 | ccv_nnc_sum_or_set_graph_exec_symbol_t sum_exec = { | |||
| 553 | .input_size = input_size, | |||
| 554 | .inputs = inputs, | |||
| 555 | .output = has_this_alias_exclusively ? ad : tensor_ref_d /* If has this alias exclusively, the output should be alias as well. Otherwise the output is the real tensor. */ | |||
| 556 | }; | |||
| 557 | if (idx >= 0) | |||
| 558 | { | |||
| 559 | sum_exec.outgoings = ccv_array_new(sizeof(int), 1, 0); | |||
| 560 | ccv_array_push(sum_exec.outgoings, &idx); | |||
| 561 | } | |||
| 562 | ccv_array_push(sum_or_set_execs, &sum_exec); | |||
| 563 | const int outgoing = exec_symbol_info_size + sum_or_set_execs->rnum - 1; | |||
| 564 | int no_alias_registry = 0; | |||
| 565 | for (i = 0; i < input_size; i++) | |||
| 566 | { | |||
| 567 | ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[i].i)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd[i ].i))); | |||
| 568 | if (!has_this_alias_exclusively) | |||
| 569 | { | |||
| 570 | // If the sum operation is not operating on one alias. I need to zero this tensor out when it is first | |||
| 571 | // allocated (see discussions around the flags I use). | |||
| 572 | ccv_nnc_autograd_tensor_symbol_t* tensor_sym = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref-> d))); | |||
| 573 | if (tensor_sym->alias_ref) | |||
| 574 | { | |||
| 575 | // Find the original tensor_sym and set its flags (I prefer to set flags on its original). | |||
| 576 | ccv_nnc_autograd_tensor_symbol_t* ref = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_sym->alias_ref - 1)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(tensor_sym-> alias_ref - 1))); | |||
| 577 | assert(ref->alias_ref == 0)((void) sizeof ((ref->alias_ref == 0) ? 1 : 0), __extension__ ({ if (ref->alias_ref == 0) ; else __assert_fail ("ref->alias_ref == 0" , "ccv_nnc_symbolic_graph_backward.c", 577, __extension__ __PRETTY_FUNCTION__ ); })); // This is original. | |||
| 578 | ref->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS; | |||
| 579 | } else if (tensor_ref->alias_registry && // Otherwise, to see if this symbol is fully occupied. | |||
| 580 | // Loop over to see if this tensor is fully occupied to avoid extra zero step. | |||
| 581 | !_ccv_nnc_tensor_ref_fully_assigned_with_aliases(tensor_ref, autograd_tensor_symbols, tensor_symbol_info)) { | |||
| 582 | tensor_sym->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS; | |||
| 583 | } | |||
| 584 | } | |||
| 585 | // Check to see if any of these tensors doesn't have alias. | |||
| 586 | no_alias_registry |= (!tensor_ref->alias_registry); | |||
| 587 | const int x = tensor_ref->x; | |||
| 588 | assert(x >= 0)((void) sizeof ((x >= 0) ? 1 : 0), __extension__ ({ if (x >= 0) ; else __assert_fail ("x >= 0", "ccv_nnc_symbolic_graph_backward.c" , 588, __extension__ __PRETTY_FUNCTION__); })); /* Otherwise, this is initialization tensor, which is impossible to be summed up by. */ | |||
| 589 | if (x < exec_symbol_info_size) | |||
| 590 | { | |||
| 591 | ccv_nnc_autograd_graph_exec_symbol_t* back_exec = autograd_execs + x; | |||
| 592 | if (!back_exec->outgoings) | |||
| 593 | back_exec->outgoings = ccv_array_new(sizeof(int), 1, 0); | |||
| 594 | ccv_array_push(back_exec->outgoings, &outgoing); | |||
| 595 | } else { | |||
| 596 | ccv_nnc_sum_or_set_graph_exec_symbol_t* sum_or_set = (ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, x - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs )->rsize * (size_t)(x - exec_symbol_info_size))); | |||
| 597 | ccv_array_push(sum_or_set->outgoings, &outgoing); | |||
| 598 | } | |||
| 599 | if (tensor_ref->exec_registry) | |||
| 600 | for (j = 0; j < tensor_ref->exec_registry->rnum; j++) | |||
| 601 | { | |||
| 602 | const int x = *(int*)ccv_array_get(tensor_ref->exec_registry, j)((void*)(((char*)((tensor_ref->exec_registry)->data)) + (size_t)(tensor_ref->exec_registry)->rsize * (size_t)( j))); | |||
| 603 | assert(x >= 0)((void) sizeof ((x >= 0) ? 1 : 0), __extension__ ({ if (x >= 0) ; else __assert_fail ("x >= 0", "ccv_nnc_symbolic_graph_backward.c" , 603, __extension__ __PRETTY_FUNCTION__); })); /* Otherwise, this is initialization tensor, which is impossible to be summed up by. */ | |||
| 604 | assert(x < exec_symbol_info_size)((void) sizeof ((x < exec_symbol_info_size) ? 1 : 0), __extension__ ({ if (x < exec_symbol_info_size) ; else __assert_fail ("x < exec_symbol_info_size" , "ccv_nnc_symbolic_graph_backward.c", 604, __extension__ __PRETTY_FUNCTION__ ); })); // exec_registry is only used by alias_registry, it simply cannot reference to a sum operation. | |||
| 605 | ccv_nnc_autograd_graph_exec_symbol_t* back_exec = autograd_execs + x; | |||
| 606 | if (!back_exec->outgoings) | |||
| 607 | back_exec->outgoings = ccv_array_new(sizeof(int), 1, 0); | |||
| 608 | ccv_array_push(back_exec->outgoings, &outgoing); | |||
| 609 | } | |||
| 610 | } | |||
| 611 | const ccv_nnc_tensor_ref_t tensor_ref = { | |||
| 612 | .d = tensor_ref_d, | |||
| 613 | .x = outgoing, | |||
| 614 | .exec_registry = 0, // I don't need to take execution dependencies because this tensor is generated by sum, therefore, we already take that dependency. | |||
| 615 | .alias_registry = !no_alias_registry || has_this_alias_exclusively ? ccv_array_new(sizeof(int), 1, 0) : 0 | |||
| 616 | }; | |||
| 617 | // If there is no alias registry, then we take the whole tensor ref as one. | |||
| 618 | if (!no_alias_registry || has_this_alias_exclusively) | |||
| 619 | { | |||
| 620 | // If this tensor ref contains multiple different types of alias, have to add them together (otherwise | |||
| 621 | // the computation for if there is an empty slot in this tensor ref is not correct without all the | |||
| 622 | // occupancy availability information). | |||
| 623 | if (!has_this_alias_exclusively) | |||
| 624 | for (i = 0; i < input_size; i++) | |||
| 625 | { | |||
| 626 | ccv_nnc_tensor_ref_t* ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[i].i)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd[i ].i))); | |||
| 627 | assert(ref->alias_registry)((void) sizeof ((ref->alias_registry) ? 1 : 0), __extension__ ({ if (ref->alias_registry) ; else __assert_fail ("ref->alias_registry" , "ccv_nnc_symbolic_graph_backward.c", 627, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 628 | // It may get duplicates. But whatever, won't matter the computation. | |||
| 629 | for (j = 0; j < ref->alias_registry->rnum; j++) | |||
| 630 | ccv_array_push(tensor_ref.alias_registry, ccv_array_get(ref->alias_registry, j)((void*)(((char*)((ref->alias_registry)->data)) + (size_t )(ref->alias_registry)->rsize * (size_t)(j)))); | |||
| 631 | } | |||
| 632 | ccv_array_push(tensor_ref.alias_registry, &ad); | |||
| 633 | } | |||
| 634 | assert(input_size <= tensor_ver->ref_version->rnum - tensor_ver->c)((void) sizeof ((input_size <= tensor_ver->ref_version-> rnum - tensor_ver->c) ? 1 : 0), __extension__ ({ if (input_size <= tensor_ver->ref_version->rnum - tensor_ver->c ) ; else __assert_fail ("input_size <= tensor_ver->ref_version->rnum - tensor_ver->c" , "ccv_nnc_symbolic_graph_backward.c", 634, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 635 | ccv_nnc_tensor_ref_t x; | |||
| 636 | for (i = 0; i < input_size; i++) | |||
| 637 | // If the current one (i + tensor_ver->c) is smaller than the one referenced to, exchange. | |||
| 638 | if (kd[i].i > i + tensor_ver->c) | |||
| 639 | CCV_SWAP(*(ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, i + tensor_ver->c), *(ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, kd[i].i), x)((x) = (*(ccv_nnc_tensor_ref_t*)((void*)(((char*)((tensor_ver ->ref_version)->data)) + (size_t)(tensor_ver->ref_version )->rsize * (size_t)(i + tensor_ver->c)))), (*(ccv_nnc_tensor_ref_t *)((void*)(((char*)((tensor_ver->ref_version)->data)) + (size_t)(tensor_ver->ref_version)->rsize * (size_t)(i + tensor_ver->c)))) = (*(ccv_nnc_tensor_ref_t*)((void*)(((char *)((tensor_ver->ref_version)->data)) + (size_t)(tensor_ver ->ref_version)->rsize * (size_t)(kd[i].i)))), (*(ccv_nnc_tensor_ref_t *)((void*)(((char*)((tensor_ver->ref_version)->data)) + (size_t)(tensor_ver->ref_version)->rsize * (size_t)(kd [i].i)))) = (x)); | |||
| 640 | ccv_array_push(tensor_ver->ref_version, &tensor_ref); | |||
| 641 | // We've consumed input_size tensor refs, now move c up to the pointer of non-consumed tensors. | |||
| 642 | tensor_ver->c += input_size; | |||
| 643 | return ad; | |||
| 644 | } | |||
| 645 | ||||
| 646 | typedef struct ccv_nnc_symbolic_graph_backward_prep_s { | |||
| 647 | int exec_symbol_info_size; // Number of graph exec symbols before adding any new symbols related to automatic differentiation. | |||
| 648 | int tensor_symbol_info_size; // Number of tensor symbols before adding anything new. | |||
| 649 | int sub_prep_size; | |||
| 650 | ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info; | |||
| 651 | ccv_nnc_tensor_symbol_info_t* tensor_symbol_info; | |||
| 652 | ccv_nnc_graph_backward_info_t* backward_info; // Corresponding to forward graph exec symbol info, it is exactly in reverse. | |||
| 653 | ccv_nnc_graph_visit_t* forward_visit; // The visitor structure (top sorted index) when doing traversal. | |||
| 654 | ccv_nnc_graph_visit_t* backward_visit; // The visitor structure (top sorted index) when doing reverse traversal. | |||
| 655 | ccv_nnc_autograd_graph_exec_symbol_t* autograd_execs; // The graph exec symbols we need for automatic differentiation. This is a 1:1 mapping for forward graph exec symbols, however, unlike backward_info, its outgoings may be more complex (may contain outgoing flows to sum nodes). | |||
| 656 | ccv_nnc_autograd_tensor_version_t* autograd_tensor_versions; // Corresponding to forward tensor symbols, each may contain multiple versions (due to multi-write). | |||
| 657 | ccv_array_t* autograd_tensor_symbols; // The tensor symbols we need for automatic differentiation (it may not be 1:1 mapping). | |||
| 658 | ccv_array_t* sum_or_set_execs; // The sum nodes, because in reverse mode, a tensor could have multiple versions, we need to sum them up before use. | |||
| 659 | struct ccv_nnc_symbolic_graph_backward_prep_s* sub_preps; // The preps of its sub-graphs. | |||
| 660 | // Pointers not managed by this struct | |||
| 661 | ccv_nnc_symbolic_graph_t* graph; | |||
| 662 | } ccv_nnc_symbolic_graph_backward_prep_t; | |||
| 663 | ||||
| 664 | static ccv_nnc_symbolic_graph_backward_prep_t _ccv_nnc_symbolic_graph_backward_prep(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size) | |||
| 665 | { | |||
| 666 | const int exec_symbol_info_size = graph->exec_symbol_info->rnum; | |||
| 667 | assert(exec_symbol_info_size > 0)((void) sizeof ((exec_symbol_info_size > 0) ? 1 : 0), __extension__ ({ if (exec_symbol_info_size > 0) ; else __assert_fail ("exec_symbol_info_size > 0" , "ccv_nnc_symbolic_graph_backward.c", 667, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 668 | const int tensor_symbol_info_size = graph->tensor_symbol_info->rnum; | |||
| 669 | assert(tensor_symbol_info_size > 0)((void) sizeof ((tensor_symbol_info_size > 0) ? 1 : 0), __extension__ ({ if (tensor_symbol_info_size > 0) ; else __assert_fail ( "tensor_symbol_info_size > 0", "ccv_nnc_symbolic_graph_backward.c" , 669, __extension__ __PRETTY_FUNCTION__); })); | |||
| 670 | ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_info_t) * exec_symbol_info_size); | |||
| 671 | ccv_nnc_tensor_symbol_info_t* tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_info_t) * tensor_symbol_info_size); | |||
| 672 | ccv_nnc_graph_visit_t* forward_visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0), exec_symbol_info_size, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_symbol_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_symbol_info_size); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_i_] .outgoings->rnum : 0; const int _heap_mem_ = ((exec_symbol_info_size ) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (exec_symbol_info_size) + sizeof(int32_t ) * ((exec_symbol_info_size) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (sizeof(ccv_nnc_incoming_t ) * (exec_symbol_info_size) + sizeof(int32_t) * ((exec_symbol_info_size ) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t ) * (exec_symbol_info_size)); int32_t* _exists_[2] = { (int32_t *)(_incomings_ + (exec_symbol_info_size)), (int32_t*)(_incomings_ + (exec_symbol_info_size)) + (exec_symbol_info_size), }; int32_t * const _edges_ = _exists_[1] + (exec_symbol_info_size); for ( _i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources )[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_backward.c", 672, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r != 1) continue; _incomings_[_idx_].r = 2 ; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)(( graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info )->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)(( graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info )->rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++ ) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_ [d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (exec_symbol_info_size )) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_symbol_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_symbol_info_size)" , "ccv_nnc_symbolic_graph_backward.c", 672, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ( (sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_backward.c", 672, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if (((ccv_nnc_graph_exec_symbol_info_t*)((void* )(((char*)((graph->exec_symbol_info)->data)) + (size_t) (graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*) ((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_ [d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_ [d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_ [d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof (( _exist_size_[_q_] < (exec_symbol_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_symbol_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_symbol_info_size)" , "ccv_nnc_symbolic_graph_backward.c", 672, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_backward.c" , 672, __extension__ __PRETTY_FUNCTION__); })); _incomings_[( destinations)[_i_].d].r = 5; _exists_[0][_i_] = (destinations )[_i_].d; } _exist_size_[0] = (destination_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_ ]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_ [_idx_].r != 5) continue; _incomings_[_idx_].r = 6; if (_incomings_ [_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_ ].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r != 4) continue; _incomings_[d ].r = 5; ((void) sizeof ((_exist_size_[_q_] < (exec_symbol_info_size )) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_symbol_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_symbol_info_size)" , "ccv_nnc_symbolic_graph_backward.c", 672, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_backward.c" , 672, __extension__ __PRETTY_FUNCTION__); })); _incomings_[( destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (source_size ); _i_++) { ((void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_backward.c" , 672, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_ ] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = ( source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_ ]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_ ->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++ _visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_ [_idx_].r = 7; } if (((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings->rnum == 1) { const int d = *(int*)((void*)((( char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*) ((graph->exec_symbol_info)->data)) + (size_t)(graph-> exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t*)( (void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { _exists_[_p_][_i_] = d; continue ; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*) ((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_ [d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { ((void) sizeof ( (_exist_size_[_q_] < (exec_symbol_info_size)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_symbol_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_symbol_info_size)" , "ccv_nnc_symbolic_graph_backward.c", 672, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ( (void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_backward.c" , 672, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_ [(destinations)[_i_].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_].d].c == 0) ? 1 : 0) , __extension__ ({ if (_incomings_[(destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0" , "ccv_nnc_symbolic_graph_backward.c", 672, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(destinations)[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = (((destinations )[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_ [(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_ ) free(_incomings_); } while (0);; ((void) sizeof ((_visit_-> size <= (exec_symbol_info_size)) ? 1 : 0), __extension__ ( { if (_visit_->size <= (exec_symbol_info_size)) ; else __assert_fail ("_visit_->size <= (exec_symbol_info_size)", "ccv_nnc_symbolic_graph_backward.c" , 672, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
| 673 | ccv_nnc_symbolic_graph_symbol_infer(graph, forward_visit, sources, source_size, destinations, destination_size, 0, 0, tensor_symbol_info, exec_symbol_info); | |||
| 674 | int i; | |||
| 675 | // Now, for each one of these, find a reverse graph. | |||
| 676 | ccv_nnc_graph_backward_info_t* backward_info = (ccv_nnc_graph_backward_info_t*)cccalloccalloc(exec_symbol_info_size, sizeof(ccv_nnc_graph_backward_info_t)); | |||
| 677 | ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_ ++) { const int idx __attribute__((unused)) = (forward_visit) ->node[_i_].index; const int _node_unused_ __attribute__(( unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info )) const node __attribute__((unused)) = (exec_symbol_info) + idx ; { | |||
| 678 | assert(ccv_nnc_cmd_is_forward(node->cmd) || node->cmd.cmd == CCV_NNC_NOOP)((void) sizeof ((ccv_nnc_cmd_is_forward(node->cmd) || node ->cmd.cmd == CCV_NNC_NOOP) ? 1 : 0), __extension__ ({ if ( ccv_nnc_cmd_is_forward(node->cmd) || node->cmd.cmd == CCV_NNC_NOOP ) ; else __assert_fail ("ccv_nnc_cmd_is_forward(node->cmd) || node->cmd.cmd == CCV_NNC_NOOP" , "ccv_nnc_symbolic_graph_backward.c", 678, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 679 | if (node->outgoings) | |||
| 680 | for (i = 0; i < node->outgoings->rnum; i++) | |||
| 681 | { | |||
| 682 | int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))); | |||
| 683 | if (!backward_info[d].outgoings) | |||
| 684 | backward_info[d].outgoings = ccv_array_new(sizeof(int32_t), 1, 0); | |||
| 685 | ccv_array_push(backward_info[d].outgoings, &idx); | |||
| 686 | } | |||
| 687 | } ccv_nnc_graph_visit_endfor} } | |||
| 688 | // Also mark only the output bits that we use. | |||
| 689 | for (i = 0; i < exec_symbol_info_size; i++) | |||
| 690 | { | |||
| 691 | backward_info[i].input_bitmask_size = ((exec_symbol_info[i].output_size * 2 + exec_symbol_info[i].input_size + 63) >> 6); | |||
| 692 | backward_info[i].output_bitmask_size = ((exec_symbol_info[i].input_size + 63) >> 6); | |||
| 693 | // Allocate input / output bitmasks | |||
| 694 | if (backward_info[i].input_bitmask_size + backward_info[i].output_bitmask_size > 0) | |||
| 695 | { | |||
| 696 | backward_info[i].input_bitmasks = (uint64_t*)cccalloccalloc(backward_info[i].input_bitmask_size + backward_info[i].output_bitmask_size, sizeof(uint64_t)); | |||
| 697 | if (backward_info[i].output_bitmask_size) | |||
| 698 | backward_info[i].output_bitmasks = backward_info[i].input_bitmasks + backward_info[i].input_bitmask_size; | |||
| 699 | } | |||
| 700 | } | |||
| 701 | ccv_nnc_graph_visit_t* backward_visit = ccv_nnc_graph_visit_new(graph, backward_info, exec_symbol_info_size, destinations, destination_size, sources, source_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((exec_symbol_info_size) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t ; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (exec_symbol_info_size); _i_++) _incoming_edges_ += ((backward_info )[_i_].outgoings) ? (backward_info)[_i_].outgoings->rnum : 0; const int _heap_mem_ = ((exec_symbol_info_size) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t ) * (exec_symbol_info_size) + sizeof(int32_t) * ((exec_symbol_info_size ) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t *)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (exec_symbol_info_size ) + sizeof(int32_t) * ((exec_symbol_info_size) * 2 + _incoming_edges_ )); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (exec_symbol_info_size )); int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (exec_symbol_info_size )), (int32_t*)(_incomings_ + (exec_symbol_info_size)) + (exec_symbol_info_size ), }; int32_t* const _edges_ = _exists_[1] + (exec_symbol_info_size ); for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void ) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_backward.c" , 701, __extension__ __PRETTY_FUNCTION__); })); _incomings_[( destinations)[_i_].d].r = 1; _exists_[0][_i_] = (destinations )[_i_].d; } int _exist_size_[2] = { (destination_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_ [_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_ ].r != 1) continue; _incomings_[_idx_].r = 2; if ((backward_info )[_idx_].outgoings) for (_j_ = 0; _j_ < (backward_info)[_idx_ ].outgoings->rnum; _j_++) { const int d = *(int*)((void*)( ((char*)(((backward_info)[_idx_].outgoings)->data)) + (size_t )((backward_info)[_idx_].outgoings)->rsize * (size_t)(_j_) )); ++_incomings_[d].c; if (_incomings_[d].r != 0) continue; _incomings_ [d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (exec_symbol_info_size )) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_symbol_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_symbol_info_size)" , "ccv_nnc_symbolic_graph_backward.c", 701, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_backward.c" , 701, __extension__ __PRETTY_FUNCTION__); })); _incomings_[( destinations)[_i_].d].r = 3; _exists_[0][_i_] = (destinations )[_i_].d; } _exist_size_[0] = (destination_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if ((backward_info)[_idx_].outgoings) for (_j_ = 0; _j_ < (backward_info)[_idx_].outgoings->rnum; _j_++ ) { const int d = *(int*)((void*)(((char*)(((backward_info)[_idx_ ].outgoings)->data)) + (size_t)((backward_info)[_idx_].outgoings )->rsize * (size_t)(_j_))); if (_incomings_[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_ [d].c] = _idx_; ++_incomings_[d].c; if (_incomings_[d].r != 2 ) continue; _incomings_[d].r = 3; ((void) sizeof ((_exist_size_ [_q_] < (exec_symbol_info_size)) ? 1 : 0), __extension__ ( { if (_exist_size_[_q_] < (exec_symbol_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_symbol_info_size)", "ccv_nnc_symbolic_graph_backward.c" , 701, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_] [_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_ ), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size ); _i_++) { ((void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_backward.c" , 701, __extension__ __PRETTY_FUNCTION__); })); _incomings_[( sources)[_i_].d].r = 5; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_[1] = 0; _p_ = 0 , _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_ ] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[_idx_].r = 6; if (_incomings_[_idx_ ].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_ ++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_ ]; if (_incomings_[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_[_q_] < (exec_symbol_info_size )) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (exec_symbol_info_size )) ; else __assert_fail ("_exist_size_[_q_] < (exec_symbol_info_size)" , "ccv_nnc_symbolic_graph_backward.c", 701, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ( (sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_backward.c", 701, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations )[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_backward.c", 701, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (destinations)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (destination_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_ [_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_ ->size].index = ((_idx_)); _visit_->node[_visit_->size ].term = ((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_ [_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if ((backward_info )[_idx_].outgoings) { if ((backward_info)[_idx_].outgoings-> rnum == 1) { const int d = *(int*)((void*)(((char*)(((backward_info )[_idx_].outgoings)->data)) + (size_t)((backward_info)[_idx_ ].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (source_size)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (backward_info)[_idx_].outgoings ->rnum; _j_++) { const int d = *(int*)((void*)(((char*)((( backward_info)[_idx_].outgoings)->data)) + (size_t)((backward_info )[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_ [d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (source_size)) { ((void) sizeof ((_exist_size_ [_q_] < (exec_symbol_info_size)) ? 1 : 0), __extension__ ( { if (_exist_size_[_q_] < (exec_symbol_info_size)) ; else __assert_fail ("_exist_size_[_q_] < (exec_symbol_info_size)", "ccv_nnc_symbolic_graph_backward.c" , 701, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_] [_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ( (_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources)[ _i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources) [_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_backward.c", 701, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(sources)[_i_].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_[(sources)[_i_].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(sources)[_i_] .d].c == 0) ; else __assert_fail ("_incomings_[(sources)[_i_].d].c == 0" , "ccv_nnc_symbolic_graph_backward.c", 701, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(sources)[_i_].d].c > 0) continue ; _visit_->node[_visit_->size].index = (((sources)[_i_] .d)); _visit_->node[_visit_->size].term = ((_incomings_ [(sources)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_ ) free(_incomings_); } while (0);; ((void) sizeof ((_visit_-> size <= (exec_symbol_info_size)) ? 1 : 0), __extension__ ( { if (_visit_->size <= (exec_symbol_info_size)) ; else __assert_fail ("_visit_->size <= (exec_symbol_info_size)", "ccv_nnc_symbolic_graph_backward.c" , 701, __extension__ __PRETTY_FUNCTION__); })); _visit_; }); | |||
| 702 | const int sub_prep_size = graph->sub_graphs ? graph->sub_graphs->rnum : 0; | |||
| 703 | ccv_nnc_symbolic_graph_backward_prep_t* sub_preps = sub_prep_size > 0 ? (ccv_nnc_symbolic_graph_backward_prep_t*)cccalloccalloc(sub_prep_size, sizeof(ccv_nnc_symbolic_graph_backward_prep_t)) : 0; | |||
| 704 | for (i = 0; i < sub_prep_size; i++) | |||
| 705 | { | |||
| 706 | const ccv_nnc_symbolic_graph_t* const sub_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(graph->sub_graphs, i)((void*)(((char*)((graph->sub_graphs)->data)) + (size_t )(graph->sub_graphs)->rsize * (size_t)(i))); | |||
| 707 | sub_preps[i] = _ccv_nnc_symbolic_graph_backward_prep(sub_graph, ccv_nnc_symbolic_graph_sources(sub_graph), ccv_nnc_symbolic_graph_source_size(sub_graph), ccv_nnc_symbolic_graph_destinations(sub_graph), ccv_nnc_symbolic_graph_destination_size(sub_graph)); | |||
| 708 | } | |||
| 709 | return (ccv_nnc_symbolic_graph_backward_prep_t){ | |||
| 710 | .exec_symbol_info_size = exec_symbol_info_size, | |||
| 711 | .tensor_symbol_info_size = tensor_symbol_info_size, | |||
| 712 | .sub_prep_size = sub_prep_size, | |||
| 713 | .exec_symbol_info = exec_symbol_info, | |||
| 714 | .tensor_symbol_info = tensor_symbol_info, | |||
| 715 | .backward_info = backward_info, | |||
| 716 | .forward_visit = forward_visit, | |||
| 717 | .backward_visit = backward_visit, | |||
| 718 | .sub_preps = sub_preps, | |||
| 719 | .graph = (ccv_nnc_symbolic_graph_t*)graph, | |||
| 720 | }; | |||
| 721 | } | |||
| 722 | ||||
| 723 | static void _ccv_nnc_symbolic_graph_backward_exec_io(const ccv_nnc_graph_exec_symbol_info_t* const node, int** const back_input_map, int** const back_output_map, int* const back_input_size, int* const back_output_size) | |||
| 724 | { | |||
| 725 | int i; | |||
| 726 | if (node->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) | |||
| 727 | { | |||
| 728 | *back_input_map = node->outputs; | |||
| 729 | *back_input_size = node->output_size; | |||
| 730 | for (i = 0; i < node->case_of.argument.offset; i++) | |||
| 731 | (*back_output_map)[i] = node->inputs[i]; | |||
| 732 | const int argument_offset = node->case_of.argument.offset; | |||
| 733 | const int argument_size = node->case_of.argument.size; | |||
| 734 | // Skip the argument range. | |||
| 735 | for (i = argument_offset + argument_size; i < node->input_size; i++) | |||
| 736 | (*back_output_map)[i - argument_size] = node->inputs[i]; | |||
| 737 | *back_output_size = node->input_size - node->case_of.argument.size; | |||
| 738 | } else { // if (node->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) { | |||
| 739 | *back_input_map = node->outputs; | |||
| 740 | *back_input_size = node->output_size; | |||
| 741 | *back_output_map = node->inputs; | |||
| 742 | *back_output_size = node->input_size; | |||
| 743 | } | |||
| 744 | } | |||
| 745 | ||||
| 746 | static void _ccv_nnc_symbolic_graph_backward_prep_sub_f_wrt_symbols(const ccv_nnc_graph_exec_symbol_info_t* const forw_exec, const ccv_nnc_symbolic_graph_t* const sub_graph, const int graph_ref, const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, const uint64_t* const input_bitmasks, const uint64_t* const output_bitmasks, ccv_array_t* const sub_f_symbols, ccv_array_t* const sub_wrt_symbols) | |||
| 747 | { | |||
| 748 | int i, j; | |||
| 749 | ccv_array_clear(sub_wrt_symbols); | |||
| 750 | int forw_outputs[ccv_max(1, forw_exec->output_size)({ typeof (1) _a = (1); typeof (forw_exec->output_size) _b = (forw_exec->output_size); (_a > _b) ? _a : _b; })]; | |||
| 751 | int forw_inputs[ccv_max(1, forw_exec->input_size)({ typeof (1) _a = (1); typeof (forw_exec->input_size) _b = (forw_exec->input_size); (_a > _b) ? _a : _b; })]; | |||
| 752 | int* back_input_map = forw_outputs; | |||
| 753 | int* back_output_map = forw_inputs; | |||
| 754 | int back_input_size, back_output_size; | |||
| 755 | _ccv_nnc_symbolic_graph_backward_exec_io(forw_exec, &back_input_map, &back_output_map, &back_input_size, &back_output_size); | |||
| 756 | for (i = 0; i < back_output_size; i++) | |||
| 757 | if (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
| 758 | { | |||
| 759 | const int d = back_output_map[i]; | |||
| ||||
| 760 | const ccv_array_t* const s_refs = tensor_symbol_info[d].s_ref; | |||
| 761 | const int s_ref = s_refs && s_refs->rnum > graph_ref ? *(int*)ccv_array_get(s_refs, graph_ref)((void*)(((char*)((s_refs)->data)) + (size_t)(s_refs)-> rsize * (size_t)(graph_ref))) - 1 : -1; | |||
| 762 | if (s_ref >= 0) | |||
| 763 | { | |||
| 764 | ccv_nnc_tensor_symbol_t sub_wrt_symbol = { | |||
| 765 | .d = s_ref, | |||
| 766 | .graph = sub_graph, | |||
| 767 | }; | |||
| 768 | ccv_array_push(sub_wrt_symbols, &sub_wrt_symbol); | |||
| 769 | } else | |||
| 770 | ccv_array_push(sub_wrt_symbols, &NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); | |||
| 771 | } | |||
| 772 | ccv_array_clear(sub_f_symbols); | |||
| 773 | for (i = 0; i < back_input_size; i++) | |||
| 774 | if (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
| 775 | { | |||
| 776 | const int d = back_input_map[i]; | |||
| 777 | ccv_nnc_tensor_symbol_t sub_f_symbol = { | |||
| 778 | .d = *(int*)ccv_array_get(tensor_symbol_info[d].s_ref, graph_ref)((void*)(((char*)((tensor_symbol_info[d].s_ref)->data)) + ( size_t)(tensor_symbol_info[d].s_ref)->rsize * (size_t)(graph_ref ))) - 1, | |||
| 779 | .graph = sub_graph, | |||
| 780 | }; | |||
| 781 | ccv_array_push(sub_f_symbols, &sub_f_symbol); | |||
| 782 | } | |||
| 783 | // Go through all its assignments (parameterized loop), making them either wrt or f. | |||
| 784 | // The reason is these must flow through the graph, otherwise we cannot form a full | |||
| 785 | // enclosed loop. Also because they are the additional f / wrt symbols, there is | |||
| 786 | // no case that we cannot find their corresponding gradients in the backward sub graphs | |||
| 787 | // (these gradients have to be parameterized to form an enclosed loop as well). | |||
| 788 | for (i = 0; i < sub_graph->tensor_symbol_info->rnum; i++) | |||
| 789 | { | |||
| 790 | const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, i)((void*)(((char*)((sub_graph->tensor_symbol_info)->data )) + (size_t)(sub_graph->tensor_symbol_info)->rsize * ( size_t)(i))); | |||
| 791 | if (tensor_symbol_info->assign_ref) | |||
| 792 | { | |||
| 793 | const int assign_ref = tensor_symbol_info->assign_ref - 1; | |||
| 794 | // i is the wrt, assign_ref is the f. | |||
| 795 | int flag = 0; | |||
| 796 | for (j = 0; !flag && j < sub_wrt_symbols->rnum; j++) | |||
| 797 | flag = (((ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, j)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols )->rsize * (size_t)(j))))->d == i); | |||
| 798 | if (!flag) | |||
| 799 | { | |||
| 800 | ccv_nnc_tensor_symbol_t sub_wrt_symbol = { | |||
| 801 | .d = i, | |||
| 802 | .graph = sub_graph, | |||
| 803 | }; | |||
| 804 | ccv_array_push(sub_wrt_symbols, &sub_wrt_symbol); | |||
| 805 | } | |||
| 806 | flag = 0; | |||
| 807 | for (j = 0; !flag && j < sub_f_symbols->rnum; j++) | |||
| 808 | flag = (((ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, j)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols )->rsize * (size_t)(j))))->d == assign_ref); | |||
| 809 | if (!flag) | |||
| 810 | { | |||
| 811 | ccv_nnc_tensor_symbol_t sub_f_symbol = { | |||
| 812 | .d = assign_ref, | |||
| 813 | .graph = sub_graph, | |||
| 814 | }; | |||
| 815 | ccv_array_push(sub_f_symbols, &sub_f_symbol); | |||
| 816 | } | |||
| 817 | } | |||
| 818 | } | |||
| 819 | } | |||
| 820 | ||||
| 821 | // Check whether for a given f_symbol, we can compute wrt_symbols at all, if we can, tag the minimal io and ops (some ops can be replaced with noop) required to do so. | |||
| 822 | static int _ccv_nnc_symbolic_graph_backward_prep_prune_ops(const ccv_nnc_symbolic_graph_backward_prep_t* const backward_prep, const ccv_nnc_tensor_symbol_t* const f_symbols, const int f_symbol_size, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size) | |||
| 823 | { | |||
| 824 | int i, j, p; | |||
| 825 | const int tensor_symbol_info_size = backward_prep->tensor_symbol_info_size; | |||
| 826 | const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = backward_prep->exec_symbol_info; | |||
| 827 | const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info =backward_prep->tensor_symbol_info; | |||
| 828 | const ccv_nnc_graph_visit_t* const forward_visit = backward_prep->forward_visit; | |||
| 829 | // Now, for each one of these, find a reverse graph. | |||
| 830 | ccv_nnc_graph_backward_info_t* const backward_info = backward_prep->backward_info; | |||
| 831 | const ccv_nnc_graph_visit_t* const backward_visit = backward_prep->backward_visit; | |||
| 832 | // Find the f_symbols, and tag its flows. | |||
| 833 | ccv_nnc_graph_visit_for(backward_visit, backward_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (backward_visit)->size; _i_ ++) { const int idx __attribute__((unused)) = (backward_visit )->node[_i_].index; const int _node_unused_ __attribute__( (unused)) = (backward_visit)->node[_i_].term; typeof ((backward_info )) const node __attribute__((unused)) = (backward_info) + idx ; { | |||
| 834 | int f = node->f_wrt & 0x1; | |||
| 835 | for (i = 0; i < exec_symbol_info[idx].output_size && !f; i++) | |||
| 836 | { | |||
| 837 | int d = exec_symbol_info[idx].outputs[i]; | |||
| 838 | if (d < 0) | |||
| 839 | continue; | |||
| 840 | while (tensor_symbol_info[d].alias_ref) | |||
| 841 | d = tensor_symbol_info[d].alias_ref - 1; | |||
| 842 | for (j = 0; j < f_symbol_size && !f; j++) | |||
| 843 | if (d == f_symbols[j].d) | |||
| 844 | f = 1; | |||
| 845 | } | |||
| 846 | if (f) | |||
| 847 | { | |||
| 848 | node->f_wrt |= f; | |||
| 849 | if (node->outgoings) | |||
| 850 | for (i = 0; i < node->outgoings->rnum; i++) | |||
| 851 | { | |||
| 852 | int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))); | |||
| 853 | backward_info[d].f_wrt |= f; | |||
| 854 | } | |||
| 855 | } | |||
| 856 | } ccv_nnc_graph_visit_endfor} } | |||
| 857 | // Find the wrt_symbols, and tag its flows. | |||
| 858 | ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_ ++) { const int idx __attribute__((unused)) = (forward_visit) ->node[_i_].index; const int _node_unused_ __attribute__(( unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info )) const node __attribute__((unused)) = (exec_symbol_info) + idx ; { | |||
| 859 | int wrt = backward_info[idx].f_wrt & 0x2; | |||
| 860 | for (i = 0; i < node->input_size && !wrt; i++) | |||
| 861 | { | |||
| 862 | int d = node->inputs[i]; | |||
| 863 | if (d < 0) | |||
| 864 | continue; | |||
| 865 | while (tensor_symbol_info[d].alias_ref) | |||
| 866 | d = tensor_symbol_info[d].alias_ref - 1; | |||
| 867 | for (j = 0; j < wrt_symbol_size && !wrt; j++) | |||
| 868 | { | |||
| 869 | int wrt_d = wrt_symbols[j].d; | |||
| 870 | if (wrt_d < 0) | |||
| 871 | continue; | |||
| 872 | // Find the root of this tensor alias. | |||
| 873 | if (tensor_symbol_info[wrt_d].alias_ref) | |||
| 874 | wrt_d = tensor_symbol_info[wrt_d].alias_ref - 1; | |||
| 875 | if (d == wrt_d) | |||
| 876 | wrt = 0x2; | |||
| 877 | } | |||
| 878 | } | |||
| 879 | if (wrt) | |||
| 880 | { | |||
| 881 | backward_info[idx].f_wrt |= wrt; | |||
| 882 | if (node->outgoings) | |||
| 883 | for (i = 0; i < node->outgoings->rnum; i++) | |||
| 884 | { | |||
| 885 | int d = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))); | |||
| 886 | backward_info[d].f_wrt |= wrt; | |||
| 887 | } | |||
| 888 | } | |||
| 889 | } ccv_nnc_graph_visit_endfor} } | |||
| 890 | enum { | |||
| 891 | WRT_SYMBOL_USE = 1, | |||
| 892 | F_SYMBOL_USE = 2 | |||
| 893 | }; | |||
| 894 | uint8_t* used_grad = (uint8_t*)cccalloccalloc(tensor_symbol_info_size, sizeof(uint8_t)); | |||
| 895 | // First, all f_symbols and wrt_symbols are used. | |||
| 896 | for (i = 0; i < f_symbol_size; i++) | |||
| 897 | if (f_symbols[i].d >= 0) | |||
| 898 | used_grad[tensor_symbol_info[f_symbols[i].d].alias_ref ? tensor_symbol_info[f_symbols[i].d].alias_ref - 1 : f_symbols[i].d] |= F_SYMBOL_USE; | |||
| 899 | for (i = 0; i < wrt_symbol_size; i++) | |||
| 900 | if (wrt_symbols[i].d >= 0) | |||
| 901 | used_grad[tensor_symbol_info[wrt_symbols[i].d].alias_ref ? tensor_symbol_info[wrt_symbols[i].d].alias_ref - 1 : wrt_symbols[i].d] |= WRT_SYMBOL_USE; | |||
| 902 | // Do optimistic assumption, and then compute used_grad | |||
| 903 | ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, _, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_ ++) { const int idx __attribute__((unused)) = (forward_visit) ->node[_i_].index; const int _node_unused_ __attribute__(( unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info )) const _ __attribute__((unused)) = (exec_symbol_info) + idx ; { | |||
| 904 | ccv_nnc_graph_backward_info_t* node = backward_info + idx; | |||
| 905 | /* Only interested in the ones on the f / wrt flow */ | |||
| 906 | if ((node->f_wrt & 0x3) == 0x3) | |||
| 907 | { | |||
| 908 | const ccv_nnc_graph_exec_symbol_info_t* forw_exec = exec_symbol_info + idx; | |||
| 909 | ccv_nnc_cmd_t cmd = forw_exec->cmd; | |||
| 910 | if (cmd.cmd != CCV_NNC_NOOP) | |||
| 911 | cmd.cmd += 1; /* Backward command is the one after forward command. */ | |||
| 912 | assert(ccv_nnc_cmd_is_backward(cmd) || cmd.cmd == CCV_NNC_NOOP)((void) sizeof ((ccv_nnc_cmd_is_backward(cmd) || cmd.cmd == CCV_NNC_NOOP ) ? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_is_backward(cmd) || cmd.cmd == CCV_NNC_NOOP) ; else __assert_fail ("ccv_nnc_cmd_is_backward(cmd) || cmd.cmd == CCV_NNC_NOOP" , "ccv_nnc_symbolic_graph_backward.c", 912, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 913 | for (i = 0; i < forw_exec->output_size * 2 + forw_exec->input_size; i++) | |||
| 914 | if (!(i >= forw_exec->output_size && i < forw_exec->output_size + forw_exec->input_size && | |||
| 915 | forw_exec->inputs[i - forw_exec->output_size] < 0) && // If the input is empty, no need. | |||
| 916 | !(i >= forw_exec->output_size + forw_exec->input_size && i < forw_exec->output_size * 2 + forw_exec->input_size && | |||
| 917 | forw_exec->outputs[i - forw_exec->output_size - forw_exec->input_size] < 0) && // If the output is empty, no need. | |||
| 918 | !(i < forw_exec->output_size && forw_exec->outputs[i] < 0)) // If the output is empty for gradient, no need. | |||
| 919 | node->input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| 920 | for (i = 0; i < forw_exec->input_size; i++) | |||
| 921 | if (!(forw_exec->inputs[i] < 0)) // If the inputs is empty, no need. | |||
| 922 | node->output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| 923 | int maybe_noop = 1; | |||
| 924 | for (i = 0; i < forw_exec->input_size; i++) | |||
| 925 | /* See if it is used as wrt, if not, no need to run this node at all. */ | |||
| 926 | if (forw_exec->inputs[i] >= 0 && used_grad[tensor_symbol_info[forw_exec->inputs[i]].alias_ref ? tensor_symbol_info[forw_exec->inputs[i]].alias_ref - 1 : forw_exec->inputs[i]] & WRT_SYMBOL_USE) | |||
| 927 | { | |||
| 928 | maybe_noop = 0; | |||
| 929 | break; | |||
| 930 | } | |||
| 931 | if (maybe_noop) | |||
| 932 | { | |||
| 933 | for (i = 0; i < node->input_bitmask_size; i++) | |||
| 934 | node->input_bitmasks[i] = 0; | |||
| 935 | for (i = 0; i < node->output_bitmask_size; i++) | |||
| 936 | node->output_bitmasks[i] = 0; | |||
| 937 | node->output_bitmask_size = 0; | |||
| 938 | } else if (cmd.cmd == CCV_NNC_GRAPH_FORWARD || cmd.cmd == CCV_NNC_GRAPH_BACKWARD) { | |||
| 939 | // Clear out all potential outputs if we think it is not a wrt symbols. | |||
| 940 | for (i = 0; i < forw_exec->input_size; i++) | |||
| 941 | if ((node->output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) && | |||
| 942 | !(used_grad[tensor_symbol_info[forw_exec->inputs[i]].alias_ref ? tensor_symbol_info[forw_exec->inputs[i]].alias_ref - 1 : forw_exec->inputs[i]] & WRT_SYMBOL_USE)) | |||
| 943 | node->output_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
| 944 | // But for now, assuming we need all input gradients. | |||
| 945 | // Clear out all inputs / outputs from forward op. | |||
| 946 | for (i = forw_exec->output_size; i < forw_exec->output_size * 2 + forw_exec->input_size; i++) | |||
| 947 | node->input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
| 948 | } else if (ccv_nnc_cmd_bitmask(cmd, forw_exec->output_size * 2 + forw_exec->input_size, forw_exec->input_size, node->input_bitmasks, node->input_bitmask_size, node->output_bitmasks, node->output_bitmask_size)) { | |||
| 949 | int flag; /* Only continue if it changed */ | |||
| 950 | do { | |||
| 951 | flag = 0; | |||
| 952 | /* Check if the output first */ | |||
| 953 | for (i = 0; i < forw_exec->input_size; i++) | |||
| 954 | /* Only try to eliminate the one that is not used. */ | |||
| 955 | if ((node->output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) && | |||
| 956 | !(used_grad[tensor_symbol_info[forw_exec->inputs[i]].alias_ref ? tensor_symbol_info[forw_exec->inputs[i]].alias_ref - 1 : forw_exec->inputs[i]] & WRT_SYMBOL_USE)) | |||
| 957 | { | |||
| 958 | node->output_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
| 959 | /* If it worked, mark it as flagged. */ | |||
| 960 | if (ccv_nnc_cmd_bitmask(cmd, forw_exec->output_size * 2 + forw_exec->input_size, forw_exec->input_size, node->input_bitmasks, node->input_bitmask_size, node->output_bitmasks, node->output_bitmask_size)) | |||
| 961 | flag = 1; | |||
| 962 | else /* Refit this with the bit back again. */ | |||
| 963 | node->output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| 964 | } | |||
| 965 | for (i = 0; i < forw_exec->output_size * 2 + forw_exec->input_size; i++) | |||
| 966 | if ((node->input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) && | |||
| 967 | (i >= forw_exec->output_size || | |||
| 968 | !(used_grad[tensor_symbol_info[forw_exec->outputs[i]].alias_ref ? tensor_symbol_info[forw_exec->outputs[i]].alias_ref - 1 : forw_exec->outputs[i]] & F_SYMBOL_USE))) | |||
| 969 | { /* Try to eliminate one of the input. */ | |||
| 970 | node->input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
| 971 | /* If it worked, mark it as flagged. */ | |||
| 972 | if (ccv_nnc_cmd_bitmask(cmd, forw_exec->output_size * 2 + forw_exec->input_size, forw_exec->input_size, node->input_bitmasks, node->input_bitmask_size, node->output_bitmasks, node->output_bitmask_size)) | |||
| 973 | flag = 1; | |||
| 974 | else /* Refit this with the bit back again. */ | |||
| 975 | node->input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| 976 | } | |||
| 977 | } while (flag); | |||
| 978 | } | |||
| 979 | for (i = 0; i < forw_exec->output_size; i++) | |||
| 980 | if (node->input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
| 981 | /* Mark it is used as wrt. */ | |||
| 982 | used_grad[tensor_symbol_info[forw_exec->outputs[i]].alias_ref ? tensor_symbol_info[forw_exec->outputs[i]].alias_ref - 1 : forw_exec->outputs[i]] |= WRT_SYMBOL_USE; | |||
| 983 | for (i = 0; i < forw_exec->input_size; i++) | |||
| 984 | /* Mark it is used as f. */ | |||
| 985 | if (node->output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
| 986 | used_grad[tensor_symbol_info[forw_exec->inputs[i]].alias_ref ? tensor_symbol_info[forw_exec->inputs[i]].alias_ref - 1 : forw_exec->inputs[i]] |= F_SYMBOL_USE; | |||
| 987 | } | |||
| 988 | } ccv_nnc_graph_visit_endfor} } | |||
| 989 | ccv_array_t* sub_f_symbols = 0; | |||
| 990 | ccv_array_t* sub_wrt_symbols = 0; | |||
| 991 | ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, _, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_ ++) { const int idx __attribute__((unused)) = (forward_visit) ->node[_i_].index; const int _node_unused_ __attribute__(( unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info )) const _ __attribute__((unused)) = (exec_symbol_info) + idx ; { | |||
| 992 | ccv_nnc_graph_backward_info_t* node = backward_info + idx; | |||
| 993 | const ccv_nnc_graph_exec_symbol_info_t* forw_exec = exec_symbol_info + idx; | |||
| 994 | /* Only interested in the ones on the f / wrt flow */ | |||
| 995 | if ((node->f_wrt & 0x3) == 0x3 && forw_exec->graph_ref_size > 0) | |||
| 996 | { | |||
| 997 | uint64_t stack_input_bitmasks1[node->input_bitmask_size]; | |||
| 998 | uint64_t stack_input_bitmasks2[node->input_bitmask_size]; | |||
| 999 | uint64_t* const input_bitmasks = forw_exec->graph_ref_size > 1 ? stack_input_bitmasks1 : node->input_bitmasks; | |||
| 1000 | // We collect input masks into this location. | |||
| 1001 | if (forw_exec->graph_ref_size > 1) | |||
| 1002 | memset(stack_input_bitmasks2, 0, sizeof(uint64_t) * node->input_bitmask_size); | |||
| 1003 | for (p = 0; p < forw_exec->graph_ref_size; p++) | |||
| 1004 | { | |||
| 1005 | // Reset the stack input bitmasks. | |||
| 1006 | if (forw_exec->graph_ref_size > 1) | |||
| 1007 | memcpy(stack_input_bitmasks1, node->input_bitmasks, sizeof(uint64_t) * node->input_bitmask_size); | |||
| 1008 | // Now calling it recursively until we are sure no f_symbols can be removed. | |||
| 1009 | const int graph_ref = CCV_NNC_GRAPH_REF(forw_exec)((forw_exec)->_heap_graph_ref ? (forw_exec)->_heap_graph_ref : (forw_exec)->_inline_graph_ref)[p] - 1; | |||
| 1010 | ccv_nnc_symbolic_graph_backward_prep_t* const sub_prep = backward_prep->sub_preps + graph_ref; | |||
| 1011 | if (!sub_wrt_symbols) | |||
| 1012 | sub_wrt_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 1013 | else | |||
| 1014 | ccv_array_clear(sub_wrt_symbols); | |||
| 1015 | for (i = 0; i < forw_exec->input_size; i++) | |||
| 1016 | if (node->output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
| 1017 | { | |||
| 1018 | const ccv_array_t* const s_refs = tensor_symbol_info[forw_exec->inputs[i]].s_ref; | |||
| 1019 | const int s_ref = s_refs && s_refs->rnum > graph_ref ? *(int*)ccv_array_get(s_refs, graph_ref)((void*)(((char*)((s_refs)->data)) + (size_t)(s_refs)-> rsize * (size_t)(graph_ref))) - 1 : -1; | |||
| 1020 | if (s_ref >= 0) | |||
| 1021 | { | |||
| 1022 | ccv_nnc_tensor_symbol_t sub_wrt_symbol = { | |||
| 1023 | .d = s_ref, | |||
| 1024 | .graph = sub_prep->graph, | |||
| 1025 | }; | |||
| 1026 | ccv_array_push(sub_wrt_symbols, &sub_wrt_symbol); | |||
| 1027 | } | |||
| 1028 | } | |||
| 1029 | int flag; // Only continue if it changed */ | |||
| 1030 | do { | |||
| 1031 | flag = 0; | |||
| 1032 | for (i = 0; i < forw_exec->output_size; i++) | |||
| 1033 | // Try to reduce number of inputs for the backward graph. If it is not tagged as F_SYMBOL_USE, we can reduce it. | |||
| 1034 | // It is reducible because this sub graph may have multiple computation paths, therefore, some of these may not | |||
| 1035 | // involve our wrt symbols at all. | |||
| 1036 | if (!(used_grad[tensor_symbol_info[forw_exec->outputs[i]].alias_ref ? tensor_symbol_info[forw_exec->outputs[i]].alias_ref - 1 : forw_exec->outputs[i]] & F_SYMBOL_USE) && | |||
| 1037 | input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
| 1038 | { /* Try to eliminate one of the input. */ | |||
| 1039 | input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
| 1040 | if (!sub_f_symbols) | |||
| 1041 | sub_f_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 1042 | else | |||
| 1043 | ccv_array_clear(sub_f_symbols); | |||
| 1044 | for (j = 0; j < forw_exec->output_size; j++) | |||
| 1045 | if (node->input_bitmasks[j >> 6] & ((uint64_t)1 << (j & 63))) | |||
| 1046 | { | |||
| 1047 | const int s_ref = *(int*)ccv_array_get(tensor_symbol_info[forw_exec->outputs[j]].s_ref, graph_ref)((void*)(((char*)((tensor_symbol_info[forw_exec->outputs[j ]].s_ref)->data)) + (size_t)(tensor_symbol_info[forw_exec-> outputs[j]].s_ref)->rsize * (size_t)(graph_ref))) - 1; | |||
| 1048 | assert(s_ref >= 0)((void) sizeof ((s_ref >= 0) ? 1 : 0), __extension__ ({ if (s_ref >= 0) ; else __assert_fail ("s_ref >= 0", "ccv_nnc_symbolic_graph_backward.c" , 1048, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1049 | ccv_nnc_tensor_symbol_t sub_f_symbol = { | |||
| 1050 | .d = s_ref, | |||
| 1051 | .graph = sub_prep->graph, | |||
| 1052 | }; | |||
| 1053 | ccv_array_push(sub_f_symbols, &sub_f_symbol); | |||
| 1054 | } | |||
| 1055 | if (_ccv_nnc_symbolic_graph_backward_prep_prune_ops(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, 0)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols )->rsize * (size_t)(0))), sub_f_symbols->rnum, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols )->rsize * (size_t)(0))), sub_wrt_symbols->rnum, ccv_nnc_symbolic_graph_sources(sub_prep->graph), ccv_nnc_symbolic_graph_source_size(sub_prep->graph), ccv_nnc_symbolic_graph_destinations(sub_prep->graph), ccv_nnc_symbolic_graph_destination_size(sub_prep->graph))) | |||
| 1056 | flag = 1; | |||
| 1057 | else /* Refit this with the bit back again. */ | |||
| 1058 | input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| 1059 | } | |||
| 1060 | } while (flag); | |||
| 1061 | // I am done, need to redo above for sub_prep, and it has to be successful now. | |||
| 1062 | if (!sub_f_symbols) | |||
| 1063 | sub_f_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 1064 | else | |||
| 1065 | ccv_array_clear(sub_f_symbols); | |||
| 1066 | for (i = 0; i < forw_exec->output_size; i++) | |||
| 1067 | if (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
| 1068 | { | |||
| 1069 | const int s_ref = *(int*)ccv_array_get(tensor_symbol_info[forw_exec->outputs[i]].s_ref, graph_ref)((void*)(((char*)((tensor_symbol_info[forw_exec->outputs[i ]].s_ref)->data)) + (size_t)(tensor_symbol_info[forw_exec-> outputs[i]].s_ref)->rsize * (size_t)(graph_ref))) - 1; | |||
| 1070 | assert(s_ref >= 0)((void) sizeof ((s_ref >= 0) ? 1 : 0), __extension__ ({ if (s_ref >= 0) ; else __assert_fail ("s_ref >= 0", "ccv_nnc_symbolic_graph_backward.c" , 1070, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1071 | ccv_nnc_tensor_symbol_t sub_f_symbol = { | |||
| 1072 | .d = s_ref, | |||
| 1073 | .graph = sub_prep->graph, | |||
| 1074 | }; | |||
| 1075 | ccv_array_push(sub_f_symbols, &sub_f_symbol); | |||
| 1076 | } | |||
| 1077 | _ccv_nnc_symbolic_graph_backward_prep_prune_ops(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, 0)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols )->rsize * (size_t)(0))), sub_f_symbols->rnum, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols )->rsize * (size_t)(0))), sub_wrt_symbols->rnum, ccv_nnc_symbolic_graph_sources(sub_prep->graph), ccv_nnc_symbolic_graph_source_size(sub_prep->graph), ccv_nnc_symbolic_graph_destinations(sub_prep->graph), ccv_nnc_symbolic_graph_destination_size(sub_prep->graph)); | |||
| 1078 | if (forw_exec->graph_ref_size > 1) | |||
| 1079 | for (i = 0; i < node->input_bitmask_size; i++) | |||
| 1080 | stack_input_bitmasks2[i] |= input_bitmasks[i]; | |||
| 1081 | } | |||
| 1082 | if (forw_exec->graph_ref_size > 1) | |||
| 1083 | memcpy(node->input_bitmasks, stack_input_bitmasks2, sizeof(uint64_t) * node->input_bitmask_size); | |||
| 1084 | } | |||
| 1085 | } ccv_nnc_graph_visit_endfor} } | |||
| 1086 | if (sub_f_symbols) | |||
| 1087 | ccv_array_free(sub_f_symbols); | |||
| 1088 | if (sub_wrt_symbols) | |||
| 1089 | ccv_array_free(sub_wrt_symbols); | |||
| 1090 | int flag = 1; | |||
| 1091 | for (i = 0; i < f_symbol_size && flag; i++) | |||
| 1092 | flag = (used_grad[tensor_symbol_info[f_symbols[i].d].alias_ref ? tensor_symbol_info[f_symbols[i].d].alias_ref - 1 : f_symbols[i].d] & WRT_SYMBOL_USE); | |||
| 1093 | ccfreefree(used_grad); | |||
| 1094 | return flag; | |||
| 1095 | } | |||
| 1096 | ||||
| 1097 | static void _ccv_nnc_symbolic_graph_backward_prep_gen(ccv_nnc_symbolic_graph_backward_prep_t* const backward_prep, const ccv_nnc_tensor_symbol_t* const f_symbols, const int f_symbol_size, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, const int is_while, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size) | |||
| 1098 | { | |||
| 1099 | const int exec_symbol_info_size = backward_prep->exec_symbol_info_size; | |||
| 1100 | const int tensor_symbol_info_size = backward_prep->tensor_symbol_info_size; | |||
| 1101 | const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = backward_prep->exec_symbol_info; | |||
| 1102 | const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info =backward_prep->tensor_symbol_info; | |||
| 1103 | const ccv_nnc_graph_visit_t* const forward_visit = backward_prep->forward_visit; | |||
| 1104 | // Now, for each one of these, find a reverse graph. | |||
| 1105 | ccv_nnc_graph_backward_info_t* const backward_info = backward_prep->backward_info; | |||
| 1106 | const ccv_nnc_graph_visit_t* const backward_visit = backward_prep->backward_visit; | |||
| 1107 | int i, j; | |||
| 1108 | // Now, only the flow from f_symbols back to wrt_symbols are interested to us. | |||
| 1109 | // Visit the graph in reverse order, build the AD nodes. | |||
| 1110 | ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs = (ccv_nnc_autograd_graph_exec_symbol_t*)cccalloccalloc(exec_symbol_info_size, sizeof(ccv_nnc_autograd_graph_exec_symbol_t)); | |||
| 1111 | int max_forw_input_size = 0, max_forw_output_size = 0; | |||
| 1112 | for (i = 0; i < exec_symbol_info_size; i++) | |||
| 1113 | if ((backward_info[i].f_wrt & 0x3) == 0x3) | |||
| 1114 | { | |||
| 1115 | max_forw_input_size = ccv_max(max_forw_input_size, exec_symbol_info[i].input_size)({ typeof (max_forw_input_size) _a = (max_forw_input_size); typeof (exec_symbol_info[i].input_size) _b = (exec_symbol_info[i].input_size ); (_a > _b) ? _a : _b; }); | |||
| 1116 | max_forw_output_size = ccv_max(max_forw_output_size, exec_symbol_info[i].output_size)({ typeof (max_forw_output_size) _a = (max_forw_output_size); typeof (exec_symbol_info[i].output_size) _b = (exec_symbol_info [i].output_size); (_a > _b) ? _a : _b; }); | |||
| 1117 | if (backward_info[i].outgoings) | |||
| 1118 | { | |||
| 1119 | // Copy over the outgoing bits. | |||
| 1120 | autograd_execs[i].outgoings = ccv_array_new(sizeof(int), backward_info[i].outgoings->rnum, 0); | |||
| 1121 | for (j = 0; j < backward_info[i].outgoings->rnum; j++) | |||
| 1122 | { | |||
| 1123 | const int d = *(int*)ccv_array_get(backward_info[i].outgoings, j)((void*)(((char*)((backward_info[i].outgoings)->data)) + ( size_t)(backward_info[i].outgoings)->rsize * (size_t)(j))); | |||
| 1124 | // Only push the outgoing node if it is in the f_wrt path. | |||
| 1125 | if ((backward_info[d].f_wrt & 0x3) == 0x3) | |||
| 1126 | ccv_array_push(autograd_execs[i].outgoings, &d); | |||
| 1127 | } | |||
| 1128 | } | |||
| 1129 | } | |||
| 1130 | int max_forw_inputs[ccv_max(1, max_forw_input_size)({ typeof (1) _a = (1); typeof (max_forw_input_size) _b = (max_forw_input_size ); (_a > _b) ? _a : _b; })]; | |||
| 1131 | int max_forw_outputs[ccv_max(1, max_forw_output_size)({ typeof (1) _a = (1); typeof (max_forw_output_size) _b = (max_forw_output_size ); (_a > _b) ? _a : _b; })]; | |||
| 1132 | ccv_nnc_autograd_tensor_version_t* const autograd_tensor_versions = (ccv_nnc_autograd_tensor_version_t*)cccalloccalloc(tensor_symbol_info_size, sizeof(ccv_nnc_autograd_tensor_version_t)); | |||
| 1133 | ccv_array_t* autograd_tensor_symbols = ccv_array_new(sizeof(ccv_nnc_autograd_tensor_symbol_t), tensor_symbol_info_size, 0); | |||
| 1134 | ccv_array_t* sum_or_set_execs = ccv_array_new(sizeof(ccv_nnc_sum_or_set_graph_exec_symbol_t), 0, 0); | |||
| 1135 | ccv_nnc_graph_visit_for(backward_visit, backward_info, back_info_node, idx){ int _i_; for (_i_ = 0; _i_ < (backward_visit)->size; _i_ ++) { const int idx __attribute__((unused)) = (backward_visit )->node[_i_].index; const int _node_unused_ __attribute__( (unused)) = (backward_visit)->node[_i_].term; typeof ((backward_info )) const back_info_node __attribute__((unused)) = (backward_info ) + idx; { | |||
| 1136 | /* This is required by both f flow and wrt flow, therefore, an interest to us */ | |||
| 1137 | if ((back_info_node->f_wrt & 0x3) == 0x3) | |||
| 1138 | { | |||
| 1139 | const ccv_nnc_graph_exec_symbol_info_t* forw_exec = exec_symbol_info + idx; | |||
| 1140 | ccv_nnc_autograd_graph_exec_symbol_t* back_exec = autograd_execs + idx; | |||
| 1141 | back_exec->cmd = forw_exec->cmd; | |||
| 1142 | if (back_exec->cmd.cmd != CCV_NNC_NOOP) | |||
| 1143 | back_exec->cmd.cmd += 1; /* Backward command is the one after forward command. */ | |||
| 1144 | assert(ccv_nnc_cmd_is_backward(back_exec->cmd) || back_exec->cmd.cmd == CCV_NNC_NOOP)((void) sizeof ((ccv_nnc_cmd_is_backward(back_exec->cmd) || back_exec->cmd.cmd == CCV_NNC_NOOP) ? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_is_backward(back_exec->cmd) || back_exec ->cmd.cmd == CCV_NNC_NOOP) ; else __assert_fail ("ccv_nnc_cmd_is_backward(back_exec->cmd) || back_exec->cmd.cmd == CCV_NNC_NOOP" , "ccv_nnc_symbolic_graph_backward.c", 1144, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1145 | if (!back_info_node->output_bitmask_size) /* This has no output, can be a noop. */ | |||
| 1146 | back_exec->cmd.cmd = CCV_NNC_NOOP; | |||
| 1147 | else { | |||
| 1148 | int* back_input_map = max_forw_outputs; | |||
| 1149 | int* back_output_map = max_forw_inputs; | |||
| 1150 | _ccv_nnc_symbolic_graph_backward_exec_io(forw_exec, &back_input_map, &back_output_map, &back_exec->input_size, &back_exec->output_size); | |||
| 1151 | back_exec->inputs = ccmallocmalloc(sizeof(int) * (back_exec->input_size + back_exec->output_size)); | |||
| 1152 | back_exec->outputs = back_exec->inputs + back_exec->input_size; | |||
| 1153 | /* Need to compute input before we compute output */ | |||
| 1154 | for (i = 0; i < back_exec->input_size; i++) | |||
| 1155 | { | |||
| 1156 | /* If we can skip this input, do that. */ | |||
| 1157 | if (!(back_info_node->input_bitmasks[i >> 6] & ((uint64_t)1 << i))) | |||
| 1158 | continue; | |||
| 1159 | const int d = back_input_map[i]; | |||
| 1160 | const int alias_ref = tensor_symbol_info[d].alias_ref; | |||
| 1161 | ccv_nnc_autograd_tensor_version_t* tensor_ver = alias_ref ? autograd_tensor_versions + (alias_ref - 1) : autograd_tensor_versions + d; | |||
| 1162 | /* Initialization tensor, should corresponding to f symbols */ | |||
| 1163 | if (!tensor_ver->ref_version) | |||
| 1164 | { | |||
| 1165 | ccv_nnc_autograd_tensor_symbol_t tensor_sym = {}; | |||
| 1166 | if (!alias_ref) | |||
| 1167 | { | |||
| 1168 | tensor_sym.d = d; | |||
| 1169 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 1170 | const ccv_nnc_tensor_ref_t tensor_ref = { | |||
| 1171 | .d = autograd_tensor_symbols->rnum - 1, | |||
| 1172 | .x = idx, | |||
| 1173 | .alias_registry = 0 | |||
| 1174 | }; | |||
| 1175 | tensor_ver->ref_version = ccv_array_new(sizeof(ccv_nnc_tensor_ref_t), 1, 0); | |||
| 1176 | ccv_array_push(tensor_ver->ref_version, &tensor_ref); | |||
| 1177 | } else { | |||
| 1178 | tensor_sym.d = alias_ref - 1; | |||
| 1179 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 1180 | const ccv_nnc_tensor_ref_t tensor_ref = { | |||
| 1181 | .d = autograd_tensor_symbols->rnum - 1, | |||
| 1182 | .x = idx, | |||
| 1183 | .alias_registry = ccv_array_new(sizeof(int), 1, 0) | |||
| 1184 | }; | |||
| 1185 | tensor_ver->ref_version = ccv_array_new(sizeof(ccv_nnc_tensor_ref_t), 1, 0); | |||
| 1186 | ccv_array_push(tensor_ver->ref_version, &tensor_ref); | |||
| 1187 | tensor_sym.d = d; /* set back */ | |||
| 1188 | tensor_sym.alias_ref = tensor_ref.d + 1; | |||
| 1189 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 1190 | const int ad = autograd_tensor_symbols->rnum - 1; | |||
| 1191 | ccv_array_push(tensor_ref.alias_registry, &ad); | |||
| 1192 | } | |||
| 1193 | } | |||
| 1194 | /* The simplest case (most common), it is not an alias. */ | |||
| 1195 | if (!alias_ref) | |||
| 1196 | { | |||
| 1197 | /* Even simpler, this only have one reference tensor, thus, pass this as input. */ | |||
| 1198 | if (tensor_ver->c == tensor_ver->ref_version->rnum - 1) | |||
| 1199 | { | |||
| 1200 | ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, tensor_ver->c)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(tensor_ver ->c))); | |||
| 1201 | /* There are alias associated with this tensor ref, zero it out when this tensor is allocated. */ | |||
| 1202 | /* This is is required. Consider the case that we have an alias of this tensor used somehwere */ | |||
| 1203 | /* on forward pass, when we compute backward, we have that alias computed first, however, its */ | |||
| 1204 | /* underlying tensor is not zero initialized, and we will end up with garbage values here. */ | |||
| 1205 | if (tensor_ref->alias_registry && | |||
| 1206 | /* Loop over to see if this tensor is fully occupied to avoid extra zero step. */ | |||
| 1207 | !_ccv_nnc_tensor_ref_fully_assigned_with_aliases(tensor_ref, autograd_tensor_symbols, tensor_symbol_info)) | |||
| 1208 | { | |||
| 1209 | ccv_nnc_autograd_tensor_symbol_t* tensor_sym = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref-> d))); | |||
| 1210 | assert(tensor_sym->alias_ref == 0)((void) sizeof ((tensor_sym->alias_ref == 0) ? 1 : 0), __extension__ ({ if (tensor_sym->alias_ref == 0) ; else __assert_fail ( "tensor_sym->alias_ref == 0", "ccv_nnc_symbolic_graph_backward.c" , 1210, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1211 | tensor_sym->flags = CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS; | |||
| 1212 | } | |||
| 1213 | back_exec->inputs[i] = tensor_ref->d; | |||
| 1214 | } else { | |||
| 1215 | /* Otherwise, we need to sum them up, and then pass the summed result to the computation. */ | |||
| 1216 | _ccv_nnc_graph_sum_autograd_tensor_versions(idx, d, exec_symbol_info_size, tensor_symbol_info, tensor_ver, autograd_execs, autograd_tensor_symbols, sum_or_set_execs); | |||
| 1217 | ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, tensor_ver->c)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(tensor_ver ->c))); | |||
| 1218 | back_exec->inputs[i] = tensor_ref->d; | |||
| 1219 | } | |||
| 1220 | } else | |||
| 1221 | /* If this is an alias, go through all available tensor ref versions */ | |||
| 1222 | back_exec->inputs[i] = _ccv_nnc_graph_sum_autograd_tensor_versions_alias(idx, d, tensor_symbol_info, exec_symbol_info_size, tensor_symbol_info + d, tensor_ver, autograd_execs, autograd_tensor_symbols, sum_or_set_execs); | |||
| 1223 | } | |||
| 1224 | for (i = 0; i < back_exec->output_size; i++) | |||
| 1225 | { | |||
| 1226 | /* If we can skip this output, do that. */ | |||
| 1227 | if (!(back_info_node->output_bitmasks[i >> 6] & ((uint64_t)1 << i))) | |||
| 1228 | continue; | |||
| 1229 | const int d = back_output_map[i]; | |||
| 1230 | const int alias_ref = tensor_symbol_info[d].alias_ref; | |||
| 1231 | ccv_nnc_autograd_tensor_symbol_t tensor_sym = { | |||
| 1232 | .d = d | |||
| 1233 | }; | |||
| 1234 | /* The simplest case (most common), it is not an alias. */ | |||
| 1235 | if (!alias_ref) | |||
| 1236 | { | |||
| 1237 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 1238 | const ccv_nnc_tensor_ref_t tensor_ref = { | |||
| 1239 | .d = autograd_tensor_symbols->rnum - 1, | |||
| 1240 | .x = idx, | |||
| 1241 | .exec_registry = 0, | |||
| 1242 | .alias_registry = 0 | |||
| 1243 | }; | |||
| 1244 | ccv_nnc_autograd_tensor_version_t* tensor_ver = autograd_tensor_versions + d; | |||
| 1245 | if (!tensor_ver->ref_version) | |||
| 1246 | tensor_ver->ref_version = ccv_array_new(sizeof(ccv_nnc_tensor_ref_t), 1, 0); | |||
| 1247 | ccv_array_push(tensor_ver->ref_version, &tensor_ref); | |||
| 1248 | back_exec->outputs[i] = tensor_ref.d; | |||
| 1249 | } else { | |||
| 1250 | /* Otherwise, in case that this is an alias, we try to find the existing one (in tensor_ver | |||
| 1251 | * see if can meet the need (thus, for the tensor info / ofs, it fits). */ | |||
| 1252 | ccv_nnc_autograd_tensor_version_t* tensor_ver = autograd_tensor_versions + (alias_ref - 1); | |||
| 1253 | if (!tensor_ver->ref_version) | |||
| 1254 | tensor_ver->ref_version = ccv_array_new(sizeof(ccv_nnc_tensor_ref_t), 1, 0); | |||
| 1255 | /* If already exists a ref version, check if any of these not-sealed tensors have free space. */ | |||
| 1256 | int found = 0; | |||
| 1257 | for (j = tensor_ver->c; !found && j < tensor_ver->ref_version->rnum; j++) | |||
| 1258 | { | |||
| 1259 | ccv_nnc_tensor_ref_t* tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, j)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(j))); | |||
| 1260 | if (!_ccv_nnc_tensor_ref_version_involve_alias(tensor_ref, autograd_tensor_symbols, tensor_symbol_info, tensor_symbol_info + d)) | |||
| 1261 | { | |||
| 1262 | tensor_sym.alias_ref = tensor_ref->d + 1; | |||
| 1263 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 1264 | const int ad = autograd_tensor_symbols->rnum - 1; | |||
| 1265 | ccv_array_push(tensor_ref->alias_registry, &ad); | |||
| 1266 | if (!tensor_ref->exec_registry) | |||
| 1267 | tensor_ref->exec_registry = ccv_array_new(sizeof(int), 1, 0); | |||
| 1268 | ccv_array_push(tensor_ref->exec_registry, &idx); | |||
| 1269 | back_exec->outputs[i] = ad; | |||
| 1270 | found = 1; | |||
| 1271 | } | |||
| 1272 | } | |||
| 1273 | if (!found) /* Cannot find an tensor ref to insert, create one first */ | |||
| 1274 | { | |||
| 1275 | tensor_sym.d = alias_ref - 1; /* Reference back to the non-alias. */ | |||
| 1276 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 1277 | const ccv_nnc_tensor_ref_t tensor_ref = { | |||
| 1278 | .d = autograd_tensor_symbols->rnum - 1, | |||
| 1279 | .x = idx, | |||
| 1280 | .exec_registry = 0, | |||
| 1281 | .alias_registry = ccv_array_new(sizeof(int), 1, 0) | |||
| 1282 | }; | |||
| 1283 | ccv_array_push(tensor_ver->ref_version, &tensor_ref); | |||
| 1284 | tensor_sym.d = d; /* set back */ | |||
| 1285 | tensor_sym.alias_ref = tensor_ref.d + 1; | |||
| 1286 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 1287 | const int ad = autograd_tensor_symbols->rnum - 1; | |||
| 1288 | ccv_array_push(tensor_ref.alias_registry, &ad); | |||
| 1289 | back_exec->outputs[i] = ad; | |||
| 1290 | } | |||
| 1291 | } | |||
| 1292 | } | |||
| 1293 | } | |||
| 1294 | } | |||
| 1295 | } ccv_nnc_graph_visit_endfor} } | |||
| 1296 | // Find all relevant wrt symbols, generate sum for them if needed. | |||
| 1297 | for (i = 0; i < wrt_symbol_size; i++) | |||
| 1298 | { | |||
| 1299 | const int d = wrt_symbols[i].d; | |||
| 1300 | if (d < 0) | |||
| 1301 | continue; | |||
| 1302 | const int ref_d = (!tensor_symbol_info[d].alias_ref) ? d : tensor_symbol_info[d].alias_ref - 1; | |||
| 1303 | ccv_nnc_autograd_tensor_version_t* tensor_ver = autograd_tensor_versions + ref_d; | |||
| 1304 | if (!tensor_ver->ref_version) | |||
| 1305 | { | |||
| 1306 | // This wrt symbol is not available at all, for this case, we set its flag to init zero. | |||
| 1307 | const ccv_nnc_autograd_tensor_symbol_t tensor_sym = { | |||
| 1308 | .d = ref_d | |||
| 1309 | }; | |||
| 1310 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 1311 | ccv_nnc_sum_or_set_graph_exec_symbol_t set_exec = { | |||
| 1312 | .value = 0, | |||
| 1313 | .output = autograd_tensor_symbols->rnum - 1, | |||
| 1314 | }; | |||
| 1315 | ccv_array_push(sum_or_set_execs, &set_exec); | |||
| 1316 | // Insert the one to be set to zero. | |||
| 1317 | const ccv_nnc_tensor_ref_t tensor_ref = { | |||
| 1318 | .d = autograd_tensor_symbols->rnum - 1, | |||
| 1319 | .x = exec_symbol_info_size + sum_or_set_execs->rnum - 1, | |||
| 1320 | }; | |||
| 1321 | tensor_ver->ref_version = ccv_array_new(sizeof(ccv_nnc_tensor_ref_t), 1, 0); | |||
| 1322 | ccv_array_push(tensor_ver->ref_version, &tensor_ref); | |||
| 1323 | continue; | |||
| 1324 | } | |||
| 1325 | // If it is a while loop, we need to insert an accumulator to the graph (this is expressed as a initialization tensor summed with existing results). | |||
| 1326 | // First, insert the initialization tensor if this wrt results is not used directly in next while loop (thus, it participates the computation, therefore, no need to accumulate). | |||
| 1327 | if (is_while && !tensor_symbol_info[ref_d].assign_ref && | |||
| 1328 | _ccv_nnc_tensor_ref_version_find_init(tensor_ver) < 0) // If the initialization tensor is not inserted yet. | |||
| 1329 | { | |||
| 1330 | const ccv_nnc_autograd_tensor_symbol_t tensor_sym = { | |||
| 1331 | .d = ref_d | |||
| 1332 | }; | |||
| 1333 | ccv_array_push(autograd_tensor_symbols, &tensor_sym); | |||
| 1334 | // Insert the one to be summed. | |||
| 1335 | const ccv_nnc_tensor_ref_t tensor_ref = { | |||
| 1336 | .d = autograd_tensor_symbols->rnum - 1, | |||
| 1337 | .x = -1, // This denotes it is an initialization vector. | |||
| 1338 | }; | |||
| 1339 | ccv_array_push(tensor_ver->ref_version, &tensor_ref); | |||
| 1340 | } | |||
| 1341 | // If there are more than one tensor in the list, it is possible to sum them up. | |||
| 1342 | if (tensor_ver->c < tensor_ver->ref_version->rnum - 1) | |||
| 1343 | _ccv_nnc_graph_sum_autograd_tensor_versions(-1, ref_d, exec_symbol_info_size, tensor_symbol_info, tensor_ver, autograd_execs, autograd_tensor_symbols, sum_or_set_execs); | |||
| 1344 | // The tensor version should have ref_version, and only one now (after sum up). | |||
| 1345 | assert(tensor_ver->c == tensor_ver->ref_version->rnum - 1)((void) sizeof ((tensor_ver->c == tensor_ver->ref_version ->rnum - 1) ? 1 : 0), __extension__ ({ if (tensor_ver-> c == tensor_ver->ref_version->rnum - 1) ; else __assert_fail ("tensor_ver->c == tensor_ver->ref_version->rnum - 1" , "ccv_nnc_symbolic_graph_backward.c", 1345, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1346 | } | |||
| 1347 | // Adding additional fields to backward_prep now. | |||
| 1348 | backward_prep->autograd_execs = autograd_execs; | |||
| 1349 | backward_prep->autograd_tensor_versions = autograd_tensor_versions; | |||
| 1350 | backward_prep->autograd_tensor_symbols = autograd_tensor_symbols; | |||
| 1351 | backward_prep->sum_or_set_execs = sum_or_set_execs; | |||
| 1352 | ccv_array_t* sub_f_symbols = 0; | |||
| 1353 | ccv_array_t* sub_wrt_symbols = 0; | |||
| 1354 | ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, _, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_ ++) { const int idx __attribute__((unused)) = (forward_visit) ->node[_i_].index; const int _node_unused_ __attribute__(( unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info )) const _ __attribute__((unused)) = (exec_symbol_info) + idx ; { | |||
| 1355 | ccv_nnc_graph_backward_info_t* node = backward_info + idx; | |||
| 1356 | const ccv_nnc_graph_exec_symbol_info_t* forw_exec = exec_symbol_info + idx; | |||
| 1357 | /* Only interested in the ones on the f / wrt flow */ | |||
| 1358 | if ((node->f_wrt & 0x3) == 0x3) | |||
| 1359 | { | |||
| 1360 | const int is_while = (forw_exec->flags & CCV_NNC_GRAPH_EXEC_P_WHILE); | |||
| 1361 | for (i = 0; i < forw_exec->graph_ref_size; i++) | |||
| 1362 | { | |||
| 1363 | // Now calling it recursively until we are sure no f_symbols can be removed. | |||
| 1364 | const int graph_ref = CCV_NNC_GRAPH_REF(forw_exec)((forw_exec)->_heap_graph_ref ? (forw_exec)->_heap_graph_ref : (forw_exec)->_inline_graph_ref)[i] - 1; | |||
| 1365 | ccv_nnc_symbolic_graph_backward_prep_t* const sub_prep = backward_prep->sub_preps + graph_ref; | |||
| 1366 | if (!sub_wrt_symbols) | |||
| 1367 | sub_wrt_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 1368 | if (!sub_f_symbols) | |||
| 1369 | sub_f_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 1370 | _ccv_nnc_symbolic_graph_backward_prep_sub_f_wrt_symbols(forw_exec, sub_prep->graph, graph_ref, tensor_symbol_info, node->input_bitmasks, node->output_bitmasks, sub_f_symbols, sub_wrt_symbols); | |||
| 1371 | _ccv_nnc_symbolic_graph_backward_prep_gen(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, 0)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols )->rsize * (size_t)(0))), sub_f_symbols->rnum, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols )->rsize * (size_t)(0))), sub_wrt_symbols->rnum, is_while, ccv_nnc_symbolic_graph_sources(sub_prep->graph), ccv_nnc_symbolic_graph_source_size(sub_prep->graph), ccv_nnc_symbolic_graph_destinations(sub_prep->graph), ccv_nnc_symbolic_graph_destination_size(sub_prep->graph)); | |||
| 1372 | } | |||
| 1373 | } | |||
| 1374 | } ccv_nnc_graph_visit_endfor} } | |||
| 1375 | if (sub_f_symbols) | |||
| 1376 | ccv_array_free(sub_f_symbols); | |||
| 1377 | if (sub_wrt_symbols) | |||
| 1378 | ccv_array_free(sub_wrt_symbols); | |||
| 1379 | } | |||
| 1380 | ||||
| 1381 | static void _ccv_nnc_symbolic_graph_backward_prep_free(const ccv_nnc_symbolic_graph_backward_prep_t backward_prep) | |||
| 1382 | { | |||
| 1383 | int i, j; | |||
| 1384 | const int exec_symbol_info_size = backward_prep.exec_symbol_info_size; | |||
| 1385 | const int tensor_symbol_info_size = backward_prep.tensor_symbol_info_size; | |||
| 1386 | ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs = backward_prep.autograd_execs; | |||
| 1387 | if (autograd_execs) | |||
| 1388 | { | |||
| 1389 | for (i = 0; i < exec_symbol_info_size; i++) | |||
| 1390 | { | |||
| 1391 | if (autograd_execs[i].inputs) | |||
| 1392 | ccfreefree(autograd_execs[i].inputs); | |||
| 1393 | if (autograd_execs[i].outgoings) | |||
| 1394 | ccv_array_free(autograd_execs[i].outgoings); | |||
| 1395 | } | |||
| 1396 | ccfreefree(autograd_execs); | |||
| 1397 | } | |||
| 1398 | ccv_nnc_autograd_tensor_version_t* const autograd_tensor_versions = backward_prep.autograd_tensor_versions; | |||
| 1399 | if (autograd_tensor_versions) | |||
| 1400 | { | |||
| 1401 | for (i = 0; i < tensor_symbol_info_size; i++) | |||
| 1402 | { | |||
| 1403 | if (autograd_tensor_versions[i].ref_version) | |||
| 1404 | { | |||
| 1405 | for (j = 0; j < autograd_tensor_versions[i].ref_version->rnum; j++) | |||
| 1406 | { | |||
| 1407 | ccv_nnc_tensor_ref_t* ref_version = (ccv_nnc_tensor_ref_t*)ccv_array_get(autograd_tensor_versions[i].ref_version, j)((void*)(((char*)((autograd_tensor_versions[i].ref_version)-> data)) + (size_t)(autograd_tensor_versions[i].ref_version)-> rsize * (size_t)(j))); | |||
| 1408 | if (ref_version->exec_registry) | |||
| 1409 | ccv_array_free(ref_version->exec_registry); | |||
| 1410 | if (ref_version->alias_registry) | |||
| 1411 | ccv_array_free(ref_version->alias_registry); | |||
| 1412 | } | |||
| 1413 | ccv_array_free(autograd_tensor_versions[i].ref_version); | |||
| 1414 | } | |||
| 1415 | } | |||
| 1416 | ccfreefree(autograd_tensor_versions); | |||
| 1417 | } | |||
| 1418 | if (backward_prep.autograd_tensor_symbols) | |||
| 1419 | ccv_array_free(backward_prep.autograd_tensor_symbols); | |||
| 1420 | ccv_array_t* const sum_or_set_execs = backward_prep.sum_or_set_execs; | |||
| 1421 | if (sum_or_set_execs) | |||
| 1422 | { | |||
| 1423 | for (i = 0; i < sum_or_set_execs->rnum; i++) | |||
| 1424 | { | |||
| 1425 | ccv_nnc_sum_or_set_graph_exec_symbol_t* sum_or_set = (ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, i)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs )->rsize * (size_t)(i))); | |||
| 1426 | if (sum_or_set->inputs) | |||
| 1427 | ccfreefree(sum_or_set->inputs); | |||
| 1428 | if (sum_or_set->outgoings) | |||
| 1429 | ccv_array_free(sum_or_set->outgoings); | |||
| 1430 | } | |||
| 1431 | ccv_array_free(sum_or_set_execs); | |||
| 1432 | } | |||
| 1433 | // Now afterwards, these are mandatory. | |||
| 1434 | ccv_nnc_graph_backward_info_t* const backward_info = backward_prep.backward_info; | |||
| 1435 | for (i = 0; i < exec_symbol_info_size; i++) | |||
| 1436 | { | |||
| 1437 | if (backward_info[i].outgoings) | |||
| 1438 | ccv_array_free(backward_info[i].outgoings); | |||
| 1439 | if (backward_info[i].input_bitmasks) | |||
| 1440 | ccfreefree(backward_info[i].input_bitmasks); | |||
| 1441 | } | |||
| 1442 | ccfreefree(backward_info); | |||
| 1443 | ccv_nnc_graph_visit_free(backward_prep.backward_visit); | |||
| 1444 | ccv_nnc_graph_visit_free(backward_prep.forward_visit); | |||
| 1445 | ccfreefree(backward_prep.exec_symbol_info); | |||
| 1446 | ccfreefree(backward_prep.tensor_symbol_info); | |||
| 1447 | for (i = 0; i < backward_prep.sub_prep_size; i++) | |||
| 1448 | _ccv_nnc_symbolic_graph_backward_prep_free(backward_prep.sub_preps[i]); | |||
| 1449 | if (backward_prep.sub_preps) | |||
| 1450 | ccfreefree(backward_prep.sub_preps); | |||
| 1451 | } | |||
| 1452 | ||||
| 1453 | static void _ccv_nnc_add_backward_breakpoint_for_symbol(const ccv_nnc_symbolic_graph_backward_prep_t* const backward_prep, const ccv_nnc_graph_exec_symbol_t breakpoint, ccv_nnc_symbolic_graph_t* const graph, ccv_array_t* const sub_breakpoints) | |||
| 1454 | { | |||
| 1455 | const ccv_nnc_graph_exec_symbol_t noop = ccv_nnc_graph_exec_symbol_new(graph, ccv_nnc_cmd(CCV_NNC_NOOP, 0, CMD_GENERIC()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}), 0), 0, 0, 0, 0, 0); | |||
| 1456 | ccv_array_push(sub_breakpoints, &noop); | |||
| 1457 | // Now need to hook this up to the graph. | |||
| 1458 | const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = backward_prep->exec_symbol_info; | |||
| 1459 | const ccv_nnc_graph_visit_t* const forward_visit = backward_prep->forward_visit; | |||
| 1460 | // Now, for each one of these, find a reverse graph. | |||
| 1461 | ccv_nnc_graph_backward_info_t* const backward_info = backward_prep->backward_info; | |||
| 1462 | int i; | |||
| 1463 | // Clean up the high bit. | |||
| 1464 | for (i = 0; i < backward_prep->exec_symbol_info_size; i++) | |||
| 1465 | backward_info[i].f_wrt &= ~0x4; | |||
| 1466 | assert((backward_info[breakpoint.d].f_wrt & 0x3) != 0x3)((void) sizeof (((backward_info[breakpoint.d].f_wrt & 0x3 ) != 0x3) ? 1 : 0), __extension__ ({ if ((backward_info[breakpoint .d].f_wrt & 0x3) != 0x3) ; else __assert_fail ("(backward_info[breakpoint.d].f_wrt & 0x3) != 0x3" , "ccv_nnc_symbolic_graph_backward.c", 1466, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1467 | backward_info[breakpoint.d].f_wrt |= 0x4; | |||
| 1468 | const ccv_nnc_graph_visit_t* const backward_visit = backward_prep->backward_visit; | |||
| 1469 | const ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs = backward_prep->autograd_execs; | |||
| 1470 | // Going forward to find whether this breakpoint is a source node to some f_wrt nodes. | |||
| 1471 | ccv_nnc_graph_visit_for(forward_visit, exec_symbol_info, forw_exec, idx){ int _i_; for (_i_ = 0; _i_ < (forward_visit)->size; _i_ ++) { const int idx __attribute__((unused)) = (forward_visit) ->node[_i_].index; const int _node_unused_ __attribute__(( unused)) = (forward_visit)->node[_i_].term; typeof ((exec_symbol_info )) const forw_exec __attribute__((unused)) = (exec_symbol_info ) + idx; { | |||
| 1472 | ccv_nnc_graph_backward_info_t* const node = backward_info + idx; | |||
| 1473 | // If it is tagged on breakpoint flow, but not as both f or wrt, flow through it. | |||
| 1474 | if ((node->f_wrt & 0x4) && (node->f_wrt & 0x3) != 0x3) | |||
| 1475 | for (i = 0; forw_exec->outgoings && i < forw_exec->outgoings->rnum; i++) | |||
| 1476 | { | |||
| 1477 | const int outgoing_idx = *(int*)ccv_array_get(forw_exec->outgoings, i)((void*)(((char*)((forw_exec->outgoings)->data)) + (size_t )(forw_exec->outgoings)->rsize * (size_t)(i))); | |||
| 1478 | ccv_nnc_graph_backward_info_t* const outgoing_node = backward_info + outgoing_idx; | |||
| 1479 | // If this is a f_wrt node. Concatenate. | |||
| 1480 | if (!(outgoing_node->f_wrt & 0x4) && (outgoing_node->f_wrt & 0x3) == 0x3) | |||
| 1481 | ccv_nnc_graph_exec_symbol_concat(graph, autograd_execs[outgoing_idx].symbol, noop); | |||
| 1482 | outgoing_node->f_wrt |= 0x4; | |||
| 1483 | } | |||
| 1484 | } ccv_nnc_graph_visit_endfor} } | |||
| 1485 | // Going backward to find whether this breakpoint is a destination node for some f_wrt_nodes. | |||
| 1486 | ccv_nnc_graph_visit_for(backward_visit, backward_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (backward_visit)->size; _i_ ++) { const int idx __attribute__((unused)) = (backward_visit )->node[_i_].index; const int _node_unused_ __attribute__( (unused)) = (backward_visit)->node[_i_].term; typeof ((backward_info )) const node __attribute__((unused)) = (backward_info) + idx ; { | |||
| 1487 | if ((node->f_wrt & 0x4) && (node->f_wrt & 0x3) != 0x3) | |||
| 1488 | for (i = 0; node->outgoings && i < node->outgoings->rnum; i++) | |||
| 1489 | { | |||
| 1490 | const int outgoing_idx = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))); | |||
| 1491 | ccv_nnc_graph_backward_info_t* const outgoing_node = backward_info + outgoing_idx; | |||
| 1492 | // If this is a f_wrt node. Concatenate. | |||
| 1493 | if (!(outgoing_node->f_wrt & 0x4) && (outgoing_node->f_wrt & 0x3) == 0x3) | |||
| 1494 | ccv_nnc_graph_exec_symbol_concat(graph, noop, autograd_execs[outgoing_idx].symbol); | |||
| 1495 | outgoing_node->f_wrt |= 0x4; | |||
| 1496 | } | |||
| 1497 | } ccv_nnc_graph_visit_endfor} } | |||
| 1498 | } | |||
| 1499 | ||||
| 1500 | static ccv_nnc_autograd_tensor_symbol_t* _ccv_nnc_autograd_tensor_symbol_from_tensor_version(ccv_array_t* const autograd_tensor_symbols, const ccv_nnc_autograd_tensor_version_t* const tensor_ver) | |||
| 1501 | { | |||
| 1502 | assert(tensor_ver->ref_version)((void) sizeof ((tensor_ver->ref_version) ? 1 : 0), __extension__ ({ if (tensor_ver->ref_version) ; else __assert_fail ("tensor_ver->ref_version" , "ccv_nnc_symbolic_graph_backward.c", 1502, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1503 | const ccv_nnc_tensor_ref_t* const tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, tensor_ver->c)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(tensor_ver ->c))); | |||
| 1504 | return (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref-> d))); | |||
| 1505 | } | |||
| 1506 | ||||
| 1507 | static void _ccv_nnc_symbolic_graph_set_backward_carry_overs(const ccv_nnc_symbolic_graph_backward_prep_t* const backward_prep, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, ccv_nnc_symbolic_graph_t* const graph) | |||
| 1508 | { | |||
| 1509 | int i; | |||
| 1510 | for (i = 0; i < backward_prep->graph->tensor_symbol_info->rnum; i++) | |||
| 1511 | { | |||
| 1512 | const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = backward_prep->tensor_symbol_info + i; | |||
| 1513 | if (tensor_symbol_info->assign_ref) | |||
| 1514 | { | |||
| 1515 | const int assign_ref = tensor_symbol_info->assign_ref - 1; | |||
| 1516 | ccv_nnc_autograd_tensor_symbol_t* const destination_autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(backward_prep->autograd_tensor_symbols, backward_prep->autograd_tensor_versions + assign_ref); | |||
| 1517 | ccv_nnc_autograd_tensor_symbol_t* const source_autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(backward_prep->autograd_tensor_symbols, backward_prep->autograd_tensor_versions + i); | |||
| 1518 | ccv_nnc_symbolic_graph_set_carry_overs(graph, (ccv_nnc_tensor_symbol_map_t []){ | |||
| 1519 | { .source = source_autograd_symbol->symbol, .destination = destination_autograd_symbol->symbol } | |||
| 1520 | }, 1); | |||
| 1521 | } | |||
| 1522 | } | |||
| 1523 | for (i = 0; i < wrt_symbol_size; i++) | |||
| 1524 | { | |||
| 1525 | const int d = wrt_symbols[i].d; | |||
| 1526 | if (d < 0) | |||
| 1527 | continue; | |||
| 1528 | const int ref_d = (!backward_prep->tensor_symbol_info[d].alias_ref) ? d : backward_prep->tensor_symbol_info[d].alias_ref - 1; | |||
| 1529 | const ccv_nnc_autograd_tensor_version_t* const tensor_ver = backward_prep->autograd_tensor_versions + ref_d; | |||
| 1530 | const int init_ref_ver = _ccv_nnc_tensor_ref_version_find_init(tensor_ver); | |||
| 1531 | if (init_ref_ver >= 0) | |||
| 1532 | { | |||
| 1533 | const int init_d = ((ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, init_ref_ver)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(init_ref_ver ))))->d; | |||
| 1534 | ccv_nnc_autograd_tensor_symbol_t* const destination_autograd_symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(backward_prep->autograd_tensor_symbols, init_d)((void*)(((char*)((backward_prep->autograd_tensor_symbols) ->data)) + (size_t)(backward_prep->autograd_tensor_symbols )->rsize * (size_t)(init_d))); | |||
| 1535 | ccv_nnc_autograd_tensor_symbol_t* const source_autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(backward_prep->autograd_tensor_symbols, backward_prep->autograd_tensor_versions + ref_d); | |||
| 1536 | ccv_nnc_symbolic_graph_set_carry_overs(graph, (ccv_nnc_tensor_symbol_map_t []){ | |||
| 1537 | { .source = source_autograd_symbol->symbol, .destination = destination_autograd_symbol->symbol } | |||
| 1538 | }, 1); | |||
| 1539 | } | |||
| 1540 | } | |||
| 1541 | } | |||
| 1542 | ||||
| 1543 | static void _ccv_nnc_symbolic_graph_add_init_zeros(const ccv_nnc_symbolic_graph_backward_prep_t* const sub_prep, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_t* const sub_graph, ccv_array_t* const symbols) | |||
| 1544 | { | |||
| 1545 | int i; | |||
| 1546 | for (i = 0; i < wrt_symbol_size; i++) | |||
| 1547 | { | |||
| 1548 | const int d = wrt_symbols[i].d; | |||
| 1549 | if (d < 0) | |||
| 1550 | continue; | |||
| 1551 | const int ref_d = (!sub_prep->tensor_symbol_info[d].alias_ref) ? d : sub_prep->tensor_symbol_info[d].alias_ref - 1; | |||
| 1552 | const ccv_nnc_autograd_tensor_version_t* const tensor_ver = sub_prep->autograd_tensor_versions + ref_d; | |||
| 1553 | const int init_ref_ver = _ccv_nnc_tensor_ref_version_find_init(tensor_ver); | |||
| 1554 | if (init_ref_ver >= 0) | |||
| 1555 | { | |||
| 1556 | // Need de-dup logic. | |||
| 1557 | const int init_d = ((ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, init_ref_ver)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(init_ref_ver ))))->d; | |||
| 1558 | ccv_nnc_autograd_tensor_symbol_t* const init_autograd_symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(sub_prep->autograd_tensor_symbols, init_d)((void*)(((char*)((sub_prep->autograd_tensor_symbols)-> data)) + (size_t)(sub_prep->autograd_tensor_symbols)->rsize * (size_t)(init_d))); | |||
| 1559 | const ccv_nnc_tensor_symbol_info_t* const sub_init_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, init_autograd_symbol->symbol.d)((void*)(((char*)((sub_graph->tensor_symbol_info)->data )) + (size_t)(sub_graph->tensor_symbol_info)->rsize * ( size_t)(init_autograd_symbol->symbol.d))); | |||
| 1560 | // If it doesn't have a parent ref yet, create one. | |||
| 1561 | if (!sub_init_symbol_info->p_ref) | |||
| 1562 | { | |||
| 1563 | ccv_nnc_tensor_symbol_t new_symbol = ccv_nnc_tensor_symbol_new(graph, sub_prep->tensor_symbol_info[ref_d].info, 0); | |||
| 1564 | ccv_nnc_tensor_symbol_set_flags(graph, new_symbol, CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS); | |||
| 1565 | ccv_array_push(symbols, &new_symbol); | |||
| 1566 | ccv_nnc_tensor_symbol_hookup(graph, sub_graph, new_symbol, init_autograd_symbol->symbol); | |||
| 1567 | } | |||
| 1568 | } | |||
| 1569 | } | |||
| 1570 | } | |||
| 1571 | ||||
| 1572 | static void _ccv_nnc_symbolic_graph_add_tape_vars(const ccv_nnc_symbolic_graph_backward_prep_t* const sub_prep, ccv_nnc_symbolic_graph_t* const root, ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_t* const sub_graph, ccv_array_t* const symbols) | |||
| 1573 | { | |||
| 1574 | int i; | |||
| 1575 | for (i = 0; i < sub_graph->tensor_symbol_info->rnum; i++) | |||
| 1576 | { | |||
| 1577 | const ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, i)((void*)(((char*)((sub_graph->tensor_symbol_info)->data )) + (size_t)(sub_graph->tensor_symbol_info)->rsize * ( size_t)(i))); | |||
| 1578 | if ((symbol_info->flags & CCV_NNC_TENSOR_SYMBOL_TAPE_VAR) && symbol_info->pair_ref) | |||
| 1579 | { | |||
| 1580 | const int pair_ref = symbol_info->pair_ref - 1; | |||
| 1581 | const ccv_nnc_tensor_symbol_t root_symbol = ccv_nnc_tensor_symbol_resolve(root, (ccv_nnc_tensor_symbol_t){ | |||
| 1582 | .d = pair_ref, | |||
| 1583 | .graph = sub_prep->graph, | |||
| 1584 | }); | |||
| 1585 | if (root_symbol.d >= 0) | |||
| 1586 | { | |||
| 1587 | ccv_nnc_tensor_symbol_hookup(root, sub_graph, root_symbol, (ccv_nnc_tensor_symbol_t){ | |||
| 1588 | .d = i, | |||
| 1589 | .graph = sub_graph, | |||
| 1590 | }); | |||
| 1591 | if (symbols) | |||
| 1592 | { | |||
| 1593 | const ccv_nnc_tensor_symbol_t p_symbol = ccv_nnc_tensor_symbol_resolve(graph, (ccv_nnc_tensor_symbol_t){ | |||
| 1594 | .d = i, | |||
| 1595 | .graph = sub_graph, | |||
| 1596 | }); | |||
| 1597 | ccv_array_push(symbols, &p_symbol); | |||
| 1598 | } | |||
| 1599 | } | |||
| 1600 | } | |||
| 1601 | } | |||
| 1602 | } | |||
| 1603 | ||||
| 1604 | static void _ccv_nnc_symbolic_graph_backward_gen(const ccv_nnc_symbolic_graph_backward_prep_t* const backward_prep, const ccv_nnc_tensor_symbol_t* const f_symbols, const int f_symbol_size, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_t* const root) | |||
| 1605 | { | |||
| 1606 | assert(graph == backward_prep->graph || graph->pair == backward_prep->graph)((void) sizeof ((graph == backward_prep->graph || graph-> pair == backward_prep->graph) ? 1 : 0), __extension__ ({ if (graph == backward_prep->graph || graph->pair == backward_prep ->graph) ; else __assert_fail ("graph == backward_prep->graph || graph->pair == backward_prep->graph" , "ccv_nnc_symbolic_graph_backward.c", 1606, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| ||||
| 1607 | const int exec_symbol_info_size = backward_prep->exec_symbol_info_size; | |||
| 1608 | const int tensor_symbol_info_size = backward_prep->tensor_symbol_info_size; | |||
| 1609 | const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = backward_prep->exec_symbol_info; | |||
| 1610 | const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = backward_prep->tensor_symbol_info; | |||
| 1611 | int i, j, k, p; | |||
| 1612 | ccv_array_t* const autograd_tensor_symbols = backward_prep->autograd_tensor_symbols; | |||
| 1613 | // Generate required symbols based on the information gathered above. | |||
| 1614 | for (i = 0; i < autograd_tensor_symbols->rnum; i++) | |||
| 1615 | { | |||
| 1616 | ccv_nnc_autograd_tensor_symbol_t* symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, i)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(i))); | |||
| 1617 | assert(symbol->d >= 0)((void) sizeof ((symbol->d >= 0) ? 1 : 0), __extension__ ({ if (symbol->d >= 0) ; else __assert_fail ("symbol->d >= 0" , "ccv_nnc_symbolic_graph_backward.c", 1617, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1618 | assert(symbol->d < tensor_symbol_info_size)((void) sizeof ((symbol->d < tensor_symbol_info_size) ? 1 : 0), __extension__ ({ if (symbol->d < tensor_symbol_info_size ) ; else __assert_fail ("symbol->d < tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_backward.c", 1618, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1619 | const ccv_nnc_tensor_symbol_info_t* const forw_symbol = tensor_symbol_info + symbol->d; | |||
| 1620 | if (!symbol->alias_ref) | |||
| 1621 | { | |||
| 1622 | assert(!forw_symbol->alias_ref)((void) sizeof ((!forw_symbol->alias_ref) ? 1 : 0), __extension__ ({ if (!forw_symbol->alias_ref) ; else __assert_fail ("!forw_symbol->alias_ref" , "ccv_nnc_symbolic_graph_backward.c", 1622, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1623 | symbol->symbol = ccv_nnc_tensor_symbol_new(graph, forw_symbol->info, 0); | |||
| 1624 | ccv_nnc_tensor_symbol_set_flags(graph, symbol->symbol, symbol->flags); | |||
| 1625 | } else { | |||
| 1626 | assert(forw_symbol->alias_ref)((void) sizeof ((forw_symbol->alias_ref) ? 1 : 0), __extension__ ({ if (forw_symbol->alias_ref) ; else __assert_fail ("forw_symbol->alias_ref" , "ccv_nnc_symbolic_graph_backward.c", 1626, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1627 | assert(symbol->flags == 0)((void) sizeof ((symbol->flags == 0) ? 1 : 0), __extension__ ({ if (symbol->flags == 0) ; else __assert_fail ("symbol->flags == 0" , "ccv_nnc_symbolic_graph_backward.c", 1627, __extension__ __PRETTY_FUNCTION__ ); })); // We don't set flags on alias. | |||
| 1628 | // Due to our generation order, this must be after the original symbol is created. | |||
| 1629 | ccv_nnc_autograd_tensor_symbol_t* ref = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, symbol->alias_ref - 1)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(symbol->alias_ref - 1))); | |||
| 1630 | symbol->symbol = ccv_nnc_tensor_symbol_alias_new(graph, ref->symbol, forw_symbol->ofs, forw_symbol->stride, forw_symbol->info, 0); | |||
| 1631 | } | |||
| 1632 | } | |||
| 1633 | ccv_nnc_graph_backward_info_t* const backward_info = backward_prep->backward_info; | |||
| 1634 | ccv_nnc_autograd_graph_exec_symbol_t* const autograd_execs = backward_prep->autograd_execs; | |||
| 1635 | ccv_array_t* symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 1636 | ccv_array_t* symbol_map = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_map_t), 0, 0); | |||
| 1637 | ccv_array_t* sub_f_symbols = 0; | |||
| 1638 | ccv_array_t* sub_wrt_symbols = 0; | |||
| 1639 | ccv_array_t* sub_execs = 0; | |||
| 1640 | for (i = 0; i < exec_symbol_info_size; i++) | |||
| 1641 | { | |||
| 1642 | // This is not going to be an interesting node. Skip. | |||
| 1643 | if ((backward_info[i].f_wrt & 0x3) != 0x3) | |||
| 1644 | continue; | |||
| 1645 | ccv_nnc_graph_backward_info_t* const back_info = backward_info + i; | |||
| 1646 | ccv_nnc_autograd_graph_exec_symbol_t* const back_exec = autograd_execs + i; | |||
| 1647 | if (back_exec->cmd.cmd == CCV_NNC_NOOP) | |||
| 1648 | { | |||
| 1649 | back_exec->symbol = ccv_nnc_graph_exec_symbol_new(graph, back_exec->cmd, 0, 0, 0, 0, 0); | |||
| 1650 | continue; | |||
| 1651 | } | |||
| 1652 | const ccv_nnc_graph_exec_symbol_info_t* const forw_exec = exec_symbol_info + i; | |||
| 1653 | if (forw_exec->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) | |||
| 1654 | { | |||
| 1655 | ccv_array_clear(symbols); | |||
| 1656 | const int graph_ref = CCV_NNC_GRAPH_REF(forw_exec)((forw_exec)->_heap_graph_ref ? (forw_exec)->_heap_graph_ref : (forw_exec)->_inline_graph_ref)[0] - 1; | |||
| 1657 | ccv_nnc_symbolic_graph_backward_prep_t* sub_prep = backward_prep->sub_preps + graph_ref; | |||
| 1658 | ccv_nnc_symbolic_graph_t* sub_graph = ccv_nnc_symbolic_graph_new(); | |||
| 1659 | sub_graph->pair = sub_prep->graph; | |||
| 1660 | if (!sub_wrt_symbols
| |||
| 1661 | sub_wrt_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 1662 | // I am done, need to redo above for sub_prep, and it has to be successful now. | |||
| 1663 | if (!sub_f_symbols
| |||
| 1664 | sub_f_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 1665 | _ccv_nnc_symbolic_graph_backward_prep_sub_f_wrt_symbols(forw_exec, sub_prep->graph, graph_ref, tensor_symbol_info, back_info->input_bitmasks, back_info->output_bitmasks, sub_f_symbols, sub_wrt_symbols); | |||
| 1666 | _ccv_nnc_symbolic_graph_backward_gen(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, 0)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols )->rsize * (size_t)(0))), sub_f_symbols->rnum, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols )->rsize * (size_t)(0))), sub_wrt_symbols->rnum, sub_graph, root); | |||
| 1667 | back_exec->symbol = ccv_nnc_symbolic_graph_while(graph, back_exec->cmd.cmd, sub_graph, forw_exec->name); | |||
| 1668 | if (!sub_execs) | |||
| 1669 | sub_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0); | |||
| 1670 | ccv_array_clear(sub_execs); | |||
| 1671 | // Find the breakpoints in forward graph, creating the reverse one. | |||
| 1672 | for (j = 0; j < sub_prep->graph->breakpoint_size; j++) | |||
| 1673 | { | |||
| 1674 | const int d = sub_prep->graph->breakpoints[j].d; | |||
| 1675 | if (sub_prep->autograd_execs[d].symbol.graph) | |||
| 1676 | ccv_array_push(sub_execs, &sub_prep->autograd_execs[d].symbol); | |||
| 1677 | else | |||
| 1678 | _ccv_nnc_add_backward_breakpoint_for_symbol(sub_prep, sub_prep->graph->breakpoints[j], sub_graph, sub_execs); | |||
| 1679 | } | |||
| 1680 | ccv_nnc_symbolic_graph_set_while_expr(sub_graph, NOOP_GRAPH_WHILE_EXPR(ccv_nnc_graph_while_f)(1), 0, 0, 0, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sub_execs, 0)((void*)(((char*)((sub_execs)->data)) + (size_t)(sub_execs )->rsize * (size_t)(0))), sub_execs->rnum); | |||
| 1681 | ccv_nnc_graph_exec_symbol_autogen(sub_graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
| 1682 | _ccv_nnc_symbolic_graph_set_backward_carry_overs(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols )->rsize * (size_t)(0))), sub_wrt_symbols->rnum, sub_graph); | |||
| 1683 | for (j = 0; j < back_exec->input_size; j++) | |||
| 1684 | if (back_info->input_bitmasks[j >> 6] & ((uint64_t)1 << j)) | |||
| 1685 | ccv_array_push(symbols, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->inputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(back_exec-> inputs[j]))))->symbol)); | |||
| 1686 | // Find whether in the wrt symbols, anything we need to init to zero, if there are, these need to be inputs here too. | |||
| 1687 | _ccv_nnc_symbolic_graph_add_init_zeros(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols )->rsize * (size_t)(0))), sub_wrt_symbols->rnum, graph, sub_graph, symbols); | |||
| 1688 | _ccv_nnc_symbolic_graph_add_tape_vars(sub_prep, root, graph, sub_graph, symbols); | |||
| 1689 | // input_size at this point, may be different from the back_exec->input_size, the reason is because we may added zeroing tensors as input tensors. | |||
| 1690 | const int input_size = symbols->rnum; | |||
| 1691 | for (j = 0; j < back_exec->output_size; j++) | |||
| 1692 | if (back_info->output_bitmasks[j >> 6] & ((uint64_t)1 << j)) | |||
| 1693 | ccv_array_push(symbols, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->outputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(back_exec-> outputs[j]))))->symbol)); | |||
| 1694 | const int output_size = symbols->rnum - input_size; | |||
| 1695 | const int p_idx = sub_prep->graph->p_idx - 1; | |||
| 1696 | assert(back_exec->input_size == forw_exec->output_size)((void) sizeof ((back_exec->input_size == forw_exec->output_size ) ? 1 : 0), __extension__ ({ if (back_exec->input_size == forw_exec ->output_size) ; else __assert_fail ("back_exec->input_size == forw_exec->output_size" , "ccv_nnc_symbolic_graph_backward.c", 1696, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1697 | k = 0; | |||
| 1698 | for (j = 0; j < back_exec->input_size; j++) | |||
| 1699 | if (back_info->input_bitmasks[j >> 6] & ((uint64_t)1 << j)) | |||
| 1700 | { | |||
| 1701 | const ccv_nnc_tensor_symbol_info_t* const info = tensor_symbol_info + forw_exec->outputs[j]; | |||
| 1702 | const int s_idx = *(int*)ccv_array_get(info->s_ref, p_idx)((void*)(((char*)((info->s_ref)->data)) + (size_t)(info ->s_ref)->rsize * (size_t)(p_idx))) - 1; | |||
| 1703 | assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if (s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_backward.c" , 1703, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1704 | const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(sub_prep->autograd_tensor_symbols, sub_prep->autograd_tensor_versions + s_idx); | |||
| 1705 | ccv_nnc_tensor_symbol_hookup(graph, sub_graph, *(ccv_nnc_tensor_symbol_t*)ccv_array_get(symbols, k)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)-> rsize * (size_t)(k))), autograd_symbol->symbol); | |||
| 1706 | ++k; | |||
| 1707 | } | |||
| 1708 | k = input_size; // Reset k, the symbol pass already set up by add_init_zeros. | |||
| 1709 | assert(back_exec->output_size == forw_exec->input_size)((void) sizeof ((back_exec->output_size == forw_exec->input_size ) ? 1 : 0), __extension__ ({ if (back_exec->output_size == forw_exec->input_size) ; else __assert_fail ("back_exec->output_size == forw_exec->input_size" , "ccv_nnc_symbolic_graph_backward.c", 1709, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1710 | for (j = 0; j < back_exec->output_size; j++) | |||
| 1711 | if (back_info->output_bitmasks[j >> 6] & ((uint64_t)1 << j)) | |||
| 1712 | { | |||
| 1713 | const ccv_nnc_tensor_symbol_info_t* const info = tensor_symbol_info + forw_exec->inputs[j]; | |||
| 1714 | const int s_idx = *(int*)ccv_array_get(info->s_ref, p_idx)((void*)(((char*)((info->s_ref)->data)) + (size_t)(info ->s_ref)->rsize * (size_t)(p_idx))) - 1; | |||
| 1715 | assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if (s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_backward.c" , 1715, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1716 | const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(sub_prep->autograd_tensor_symbols, sub_prep->autograd_tensor_versions + s_idx); | |||
| 1717 | ccv_nnc_tensor_symbol_hookup(graph, sub_graph, *(ccv_nnc_tensor_symbol_t*)ccv_array_get(symbols, k)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)-> rsize * (size_t)(k))), autograd_symbol->symbol); | |||
| 1718 | ++k; | |||
| 1719 | } | |||
| 1720 | ccv_nnc_graph_exec_symbol_set_io(graph, back_exec->symbol, ccv_array_get(symbols, 0)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)-> rsize * (size_t)(0))), input_size, ccv_array_get(symbols, input_size)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)-> rsize * (size_t)(input_size))), output_size); | |||
| 1721 | } else if (forw_exec->flags & CCV_NNC_GRAPH_EXEC_CASE_OF) { | |||
| 1722 | ccv_array_clear(symbol_map); | |||
| 1723 | for (j = 0; j < back_exec->output_size; j++) | |||
| 1724 | if (back_info->output_bitmasks[j >> 6] & ((uint64_t)1 << j)) | |||
| 1725 | { | |||
| 1726 | ccv_nnc_tensor_symbol_map_t symbol = { | |||
| 1727 | .source = ((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->inputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(back_exec-> inputs[j]))))->symbol, | |||
| 1728 | .destination = ((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->outputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(back_exec-> outputs[j]))))->symbol, | |||
| 1729 | }; | |||
| 1730 | ccv_array_push(symbol_map, &symbol); | |||
| 1731 | } | |||
| 1732 | const int symbol_map_size = symbol_map->rnum; | |||
| 1733 | back_exec->symbol = ccv_nnc_symbolic_graph_case_of_new(graph, back_exec->cmd.cmd, 0, 0, ccv_array_get(symbol_map, 0)((void*)(((char*)((symbol_map)->data)) + (size_t)(symbol_map )->rsize * (size_t)(0))), symbol_map_size, forw_exec->name); | |||
| 1734 | ccv_nnc_symbolic_graph_set_case_of_expr(graph, back_exec->symbol, NOOP_GRAPH_CASE_OF_EXPR(ccv_nnc_graph_case_of_f)(1), 0); | |||
| 1735 | for (p = 0; p < forw_exec->graph_ref_size; p++) | |||
| 1736 | { | |||
| 1737 | const int graph_ref = CCV_NNC_GRAPH_REF(forw_exec)((forw_exec)->_heap_graph_ref ? (forw_exec)->_heap_graph_ref : (forw_exec)->_inline_graph_ref)[p] - 1; | |||
| 1738 | ccv_nnc_symbolic_graph_backward_prep_t* sub_prep = backward_prep->sub_preps + graph_ref; | |||
| 1739 | ccv_nnc_symbolic_graph_t* sub_graph = ccv_nnc_symbolic_graph_new(); | |||
| 1740 | sub_graph->pair = sub_prep->graph; | |||
| 1741 | if (!sub_wrt_symbols) | |||
| 1742 | sub_wrt_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 1743 | // I am done, need to redo above for sub_prep, and it has to be successful now. | |||
| 1744 | if (!sub_f_symbols) | |||
| 1745 | sub_f_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0); | |||
| 1746 | _ccv_nnc_symbolic_graph_backward_prep_sub_f_wrt_symbols(forw_exec, sub_prep->graph, graph_ref, tensor_symbol_info, back_info->input_bitmasks, back_info->output_bitmasks, sub_f_symbols, sub_wrt_symbols); | |||
| 1747 | _ccv_nnc_symbolic_graph_backward_gen(sub_prep, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, 0)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols )->rsize * (size_t)(0))), sub_f_symbols->rnum, (ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, 0)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols )->rsize * (size_t)(0))), sub_wrt_symbols->rnum, sub_graph, root); | |||
| 1748 | ccv_array_clear(symbol_map); | |||
| 1749 | k = 0; | |||
| 1750 | for (j = 0; j < back_exec->output_size; j++) | |||
| 1751 | if (back_info->output_bitmasks[j >> 6] & ((uint64_t)1 << j)) | |||
| 1752 | { | |||
| 1753 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_wrt_symbols, k)((void*)(((char*)((sub_wrt_symbols)->data)) + (size_t)(sub_wrt_symbols )->rsize * (size_t)(k))))->d; | |||
| 1754 | if (d >= 0) | |||
| 1755 | { | |||
| 1756 | const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(sub_prep->autograd_tensor_symbols, sub_prep->autograd_tensor_versions + d); | |||
| 1757 | ccv_nnc_tensor_symbol_map_t symbol = { | |||
| 1758 | .source = autograd_symbol->symbol, | |||
| 1759 | .destination = ((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->outputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(back_exec-> outputs[j]))))->symbol, | |||
| 1760 | }; | |||
| 1761 | ccv_array_push(symbol_map, &symbol); | |||
| 1762 | } else { | |||
| 1763 | // Create a new tensor in sub-graph and set it to be 0. | |||
| 1764 | const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->outputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(back_exec-> outputs[j]))); | |||
| 1765 | // autograd_symbol->d points to the corresponding forward tensor. | |||
| 1766 | ccv_nnc_tensor_symbol_t zero_symbol = ccv_nnc_tensor_symbol_new(sub_graph, tensor_symbol_info[autograd_symbol->d].info, 0); | |||
| 1767 | ccv_nnc_graph_exec_symbol_new(sub_graph, CMD_SET_FORWARD(0)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={0,}}}, 0), 0, 0, &zero_symbol, 1, 0); | |||
| 1768 | ccv_nnc_tensor_symbol_map_t symbol = { | |||
| 1769 | .source = zero_symbol, | |||
| 1770 | .destination = autograd_symbol->symbol, | |||
| 1771 | }; | |||
| 1772 | ccv_array_push(symbol_map, &symbol); | |||
| 1773 | } | |||
| 1774 | ++k; | |||
| 1775 | } | |||
| 1776 | ccv_nnc_graph_exec_symbol_autogen(sub_graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
| 1777 | const int symbol_map_size = symbol_map->rnum; | |||
| 1778 | ccv_nnc_symbolic_graph_set_case_of(graph, back_exec->symbol, sub_graph, p, ccv_array_get(symbol_map, 0)((void*)(((char*)((symbol_map)->data)) + (size_t)(symbol_map )->rsize * (size_t)(0))), symbol_map_size); | |||
| 1779 | // Hookup input only after this becomes a sub graph of the graph. | |||
| 1780 | k = 0; | |||
| 1781 | for (j = 0; j < back_exec->input_size; j++) | |||
| 1782 | if (back_info->input_bitmasks[j >> 6] & ((uint64_t)1 << j)) | |||
| 1783 | { | |||
| 1784 | const int d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(sub_f_symbols, k)((void*)(((char*)((sub_f_symbols)->data)) + (size_t)(sub_f_symbols )->rsize * (size_t)(k))))->d; | |||
| 1785 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_nnc_symbolic_graph_backward.c" , 1785, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1786 | // No corresponding sub tensors allocated. Skip. | |||
| 1787 | if (!sub_prep->autograd_tensor_versions[d].ref_version || | |||
| 1788 | !sub_prep->autograd_tensor_versions[d].ref_version->rnum) | |||
| 1789 | continue; | |||
| 1790 | const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = _ccv_nnc_autograd_tensor_symbol_from_tensor_version(sub_prep->autograd_tensor_symbols, sub_prep->autograd_tensor_versions + d); | |||
| 1791 | ccv_nnc_tensor_symbol_hookup(graph, sub_graph, ((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->inputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(back_exec-> inputs[j]))))->symbol, autograd_symbol->symbol); | |||
| 1792 | ++k; | |||
| 1793 | } | |||
| 1794 | // Need to make sure tape vars are hooked up. | |||
| 1795 | _ccv_nnc_symbolic_graph_add_tape_vars(sub_prep, root, graph, sub_graph, 0); | |||
| 1796 | } | |||
| 1797 | } else { | |||
| 1798 | ccv_array_clear(symbols); | |||
| 1799 | // Gradient inputs. | |||
| 1800 | for (j = 0; j < back_exec->input_size; j++) | |||
| 1801 | if (back_info->input_bitmasks[j >> 6] & ((uint64_t)1 << j)) | |||
| 1802 | ccv_array_push(symbols, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->inputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(back_exec-> inputs[j]))))->symbol)); | |||
| 1803 | else | |||
| 1804 | ccv_array_push(symbols, &NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); | |||
| 1805 | // Inputs from forward function. | |||
| 1806 | for (j = 0; j < forw_exec->input_size; j++) | |||
| 1807 | if (!(back_info->input_bitmasks[(j + back_exec->input_size) >> 6] & ((uint64_t)1 << (j + back_exec->input_size)))) | |||
| 1808 | ccv_array_push(symbols, &NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); | |||
| 1809 | else { | |||
| 1810 | const ccv_nnc_tensor_symbol_t symbol = { | |||
| 1811 | .d = forw_exec->inputs[j], | |||
| 1812 | .graph = backward_prep->graph | |||
| 1813 | }; | |||
| 1814 | if (graph == backward_prep->graph) | |||
| 1815 | ccv_array_push(symbols, &symbol); | |||
| 1816 | else { // Otherwise, create a new symbol, and set its pair to the old symbol. | |||
| 1817 | const ccv_nnc_tensor_symbol_t new_symbol = ccv_nnc_tensor_symbol_new(graph, tensor_symbol_info[forw_exec->inputs[j]].info, tensor_symbol_info[forw_exec->inputs[j]].name); | |||
| 1818 | ccv_nnc_tensor_symbol_pair_with(graph, new_symbol, symbol); | |||
| 1819 | const int flags = ccv_nnc_tensor_symbol_flags(backward_prep->graph, symbol) | CCV_NNC_TENSOR_SYMBOL_TAPE_VAR; | |||
| 1820 | ccv_nnc_tensor_symbol_set_flags(graph, new_symbol, flags); | |||
| 1821 | ccv_nnc_tensor_symbol_set_flags(backward_prep->graph, symbol, flags); | |||
| 1822 | ccv_array_push(symbols, &new_symbol); | |||
| 1823 | } | |||
| 1824 | } | |||
| 1825 | // Outputs from forward function. | |||
| 1826 | for (j = 0; j < forw_exec->output_size; j++) | |||
| 1827 | if (!(back_info->input_bitmasks[(j + back_exec->input_size + forw_exec->input_size) >> 6] & ((uint64_t)1 << (j + back_exec->input_size + forw_exec->input_size)))) | |||
| 1828 | ccv_array_push(symbols, &NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); | |||
| 1829 | else { | |||
| 1830 | const ccv_nnc_tensor_symbol_t symbol = { | |||
| 1831 | .d = forw_exec->outputs[j], | |||
| 1832 | .graph = backward_prep->graph | |||
| 1833 | }; | |||
| 1834 | if (graph == backward_prep->graph) | |||
| 1835 | ccv_array_push(symbols, &symbol); | |||
| 1836 | else { // Otherwise, create a new symbol, and set its pair to the old symbol. | |||
| 1837 | const ccv_nnc_tensor_symbol_t new_symbol = ccv_nnc_tensor_symbol_new(graph, tensor_symbol_info[forw_exec->outputs[j]].info, tensor_symbol_info[forw_exec->outputs[j]].name); | |||
| 1838 | ccv_nnc_tensor_symbol_pair_with(graph, new_symbol, symbol); | |||
| 1839 | const int flags = ccv_nnc_tensor_symbol_flags(backward_prep->graph, symbol) | CCV_NNC_TENSOR_SYMBOL_TAPE_VAR; | |||
| 1840 | ccv_nnc_tensor_symbol_set_flags(graph, new_symbol, flags); | |||
| 1841 | ccv_nnc_tensor_symbol_set_flags(backward_prep->graph, symbol, flags); | |||
| 1842 | ccv_array_push(symbols, &new_symbol); | |||
| 1843 | } | |||
| 1844 | } | |||
| 1845 | for (j = 0; j < back_exec->output_size; j++) | |||
| 1846 | if (back_info->output_bitmasks[j >> 6] & ((uint64_t)1 << j)) | |||
| 1847 | ccv_array_push(symbols, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, back_exec->outputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(back_exec-> outputs[j]))))->symbol)); | |||
| 1848 | else | |||
| 1849 | ccv_array_push(symbols, &NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }); | |||
| 1850 | back_exec->symbol = ccv_nnc_graph_exec_symbol_new(graph, back_exec->cmd, ccv_array_get(symbols, 0)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)-> rsize * (size_t)(0))), back_exec->input_size + forw_exec->input_size + forw_exec->output_size, ccv_array_get(symbols, back_exec->input_size + forw_exec->input_size + forw_exec->output_size)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)-> rsize * (size_t)(back_exec->input_size + forw_exec->input_size + forw_exec->output_size))), back_exec->output_size, 0); | |||
| 1851 | ccv_nnc_graph_exec_symbol_set_hint(graph, back_exec->symbol, exec_symbol_info[i].hint); | |||
| 1852 | ccv_nnc_graph_exec_symbol_pair_with(graph, back_exec->symbol, (ccv_nnc_graph_exec_symbol_t){ | |||
| 1853 | .d = i, | |||
| 1854 | .graph = backward_prep->graph, | |||
| 1855 | }); | |||
| 1856 | } | |||
| 1857 | } | |||
| 1858 | if (sub_f_symbols) | |||
| 1859 | ccv_array_free(sub_f_symbols); | |||
| 1860 | if (sub_wrt_symbols) | |||
| 1861 | ccv_array_free(sub_wrt_symbols); | |||
| 1862 | if (sub_execs) | |||
| 1863 | ccv_array_free(sub_execs); | |||
| 1864 | ccv_array_t* const sum_or_set_execs = backward_prep->sum_or_set_execs; | |||
| 1865 | for (i = 0; i < sum_or_set_execs->rnum; i++) | |||
| 1866 | { | |||
| 1867 | ccv_nnc_sum_or_set_graph_exec_symbol_t* sum_or_set_exec = (ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, i)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs )->rsize * (size_t)(i))); | |||
| 1868 | // It is sum, set don't have inputs. | |||
| 1869 | if (sum_or_set_exec->input_size) | |||
| 1870 | { | |||
| 1871 | ccv_array_clear(symbols); | |||
| 1872 | // This is to sum. | |||
| 1873 | for (j = 0; j < sum_or_set_exec->input_size; j++) | |||
| 1874 | ccv_array_push(symbols, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, sum_or_set_exec->inputs[j])((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(sum_or_set_exec ->inputs[j]))))->symbol)); | |||
| 1875 | ccv_nnc_cmd_t cmd = ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, CMD_GENERIC()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}), 0); | |||
| 1876 | sum_or_set_exec->symbol = ccv_nnc_graph_exec_symbol_new(graph, cmd, ccv_array_get(symbols, 0)((void*)(((char*)((symbols)->data)) + (size_t)(symbols)-> rsize * (size_t)(0))), sum_or_set_exec->input_size, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, sum_or_set_exec->output)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(sum_or_set_exec ->output))))->symbol), 1, 0); | |||
| 1877 | } else | |||
| 1878 | sum_or_set_exec->symbol = ccv_nnc_graph_exec_symbol_new(graph, CMD_SET_FORWARD(sum_or_set_exec->value)ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size ={.dim={1,1,1}},.blas={.a={sum_or_set_exec->value,}}}, 0), 0, 0, &(((ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, sum_or_set_exec->output)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(sum_or_set_exec ->output))))->symbol), 1, 0); | |||
| 1879 | } | |||
| 1880 | ccv_array_free(symbol_map); | |||
| 1881 | ccv_array_free(symbols); | |||
| 1882 | for (i = 0; i < exec_symbol_info_size; i++) | |||
| 1883 | { | |||
| 1884 | // This is not going to be an interesting node. Skip. | |||
| 1885 | if ((backward_info[i].f_wrt & 0x3) != 0x3) | |||
| 1886 | continue; | |||
| 1887 | ccv_nnc_autograd_graph_exec_symbol_t* const back_exec = autograd_execs + i; | |||
| 1888 | // If on the same graph, we cannot decide whether it is before or after the forw_exec, enforcing it is after forw_exec. | |||
| 1889 | if (graph == backward_prep->graph) | |||
| 1890 | ccv_nnc_graph_exec_symbol_concat(graph, (ccv_nnc_graph_exec_symbol_t){ | |||
| 1891 | .d = i, | |||
| 1892 | .graph = graph | |||
| 1893 | }, back_exec->symbol); | |||
| 1894 | if (back_exec->outgoings) | |||
| 1895 | for (j = 0; j < back_exec->outgoings->rnum; j++) | |||
| 1896 | { | |||
| 1897 | int d = *(int*)ccv_array_get(back_exec->outgoings, j)((void*)(((char*)((back_exec->outgoings)->data)) + (size_t )(back_exec->outgoings)->rsize * (size_t)(j))); | |||
| 1898 | if (d < exec_symbol_info_size) | |||
| 1899 | ccv_nnc_graph_exec_symbol_concat(graph, back_exec->symbol, autograd_execs[d].symbol); | |||
| 1900 | else | |||
| 1901 | ccv_nnc_graph_exec_symbol_concat(graph, back_exec->symbol, ((ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, d - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs )->rsize * (size_t)(d - exec_symbol_info_size))))->symbol); | |||
| 1902 | } | |||
| 1903 | } | |||
| 1904 | for (i = 0; i < sum_or_set_execs->rnum; i++) | |||
| 1905 | { | |||
| 1906 | ccv_nnc_sum_or_set_graph_exec_symbol_t* exec = (ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, i)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs )->rsize * (size_t)(i))); | |||
| 1907 | if (exec->outgoings) | |||
| 1908 | for (j = 0; j < exec->outgoings->rnum; j++) | |||
| 1909 | { | |||
| 1910 | int d = *(int*)ccv_array_get(exec->outgoings, j)((void*)(((char*)((exec->outgoings)->data)) + (size_t)( exec->outgoings)->rsize * (size_t)(j))); | |||
| 1911 | if (d < exec_symbol_info_size) | |||
| 1912 | ccv_nnc_graph_exec_symbol_concat(graph, exec->symbol, autograd_execs[d].symbol); | |||
| 1913 | else | |||
| 1914 | ccv_nnc_graph_exec_symbol_concat(graph, exec->symbol, ((ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, d - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs )->rsize * (size_t)(d - exec_symbol_info_size))))->symbol); | |||
| 1915 | } | |||
| 1916 | } | |||
| 1917 | // Now, everything is done, set the metadata on graph so that we can lookup later for backward symbols | |||
| 1918 | if (graph->backward.tensor_symbol_idx) | |||
| 1919 | graph->backward.tensor_symbol_idx = (int*)ccreallocrealloc(graph->backward.tensor_symbol_idx, sizeof(int) * (graph->tensor_symbol_info->rnum + tensor_symbol_info_size)); | |||
| 1920 | else | |||
| 1921 | graph->backward.tensor_symbol_idx = (int*)ccmallocmalloc(sizeof(int) * (graph->tensor_symbol_info->rnum + tensor_symbol_info_size)); | |||
| 1922 | graph->backward.tensor_symbol_size = tensor_symbol_info_size; | |||
| 1923 | graph->backward.exec_symbol_idx = graph->backward.tensor_symbol_idx + tensor_symbol_info_size; | |||
| 1924 | graph->backward.exec_symbol_size = graph->tensor_symbol_info->rnum; | |||
| 1925 | for (i = 0; i < tensor_symbol_info_size; i++) | |||
| 1926 | graph->backward.tensor_symbol_idx[i] = -1; | |||
| 1927 | for (i = 0; i < graph->backward.exec_symbol_size; i++) | |||
| 1928 | graph->backward.exec_symbol_idx[i] = -1; | |||
| 1929 | ccv_nnc_autograd_tensor_version_t* const autograd_tensor_versions = backward_prep->autograd_tensor_versions; | |||
| 1930 | // Assigning for wrt symbols. | |||
| 1931 | for (i = 0; i < wrt_symbol_size; i++) | |||
| 1932 | { | |||
| 1933 | const int d = wrt_symbols[i].d; | |||
| 1934 | if (d < 0) | |||
| 1935 | continue; | |||
| 1936 | assert(d < tensor_symbol_info_size)((void) sizeof ((d < tensor_symbol_info_size) ? 1 : 0), __extension__ ({ if (d < tensor_symbol_info_size) ; else __assert_fail ( "d < tensor_symbol_info_size", "ccv_nnc_symbolic_graph_backward.c" , 1936, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1937 | const ccv_nnc_tensor_symbol_info_t* const forw_symbol = tensor_symbol_info + d; | |||
| 1938 | ccv_nnc_autograd_tensor_version_t* const tensor_ver = autograd_tensor_versions + ((!forw_symbol->alias_ref) ? d : forw_symbol->alias_ref - 1); | |||
| 1939 | assert(tensor_ver->ref_version)((void) sizeof ((tensor_ver->ref_version) ? 1 : 0), __extension__ ({ if (tensor_ver->ref_version) ; else __assert_fail ("tensor_ver->ref_version" , "ccv_nnc_symbolic_graph_backward.c", 1939, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1940 | ccv_nnc_tensor_ref_t* const tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, tensor_ver->c)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(tensor_ver ->c))); | |||
| 1941 | ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref-> d))); | |||
| 1942 | // If this wrt symbol is an alias, create extra alias for this. | |||
| 1943 | if (!forw_symbol->alias_ref) | |||
| 1944 | graph->backward.tensor_symbol_idx[d] = autograd_symbol->symbol.d; | |||
| 1945 | else // We create new alias, and this cannot be referenced from exec_symbol_idx because its size limited to previous tensor symbol size. | |||
| 1946 | graph->backward.tensor_symbol_idx[d] = ccv_nnc_tensor_symbol_alias_new(graph, autograd_symbol->symbol, forw_symbol->ofs, forw_symbol->stride, forw_symbol->info, 0).d; | |||
| 1947 | const int dd = autograd_symbol->symbol.d; | |||
| 1948 | const int x = tensor_ref->x; | |||
| 1949 | if (tensor_ref->exec_registry && tensor_ref->exec_registry->rnum) // Create no-op node. | |||
| 1950 | { | |||
| 1951 | ccv_nnc_graph_exec_symbol_t noop = ccv_nnc_graph_exec_symbol_new(graph, ccv_nnc_cmd(CCV_NNC_NOOP, 0, CMD_GENERIC()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}), 0), 0, 0, 0, 0, 0); | |||
| 1952 | if (x < exec_symbol_info_size) | |||
| 1953 | ccv_nnc_graph_exec_symbol_concat(graph, autograd_execs[x].symbol, noop); | |||
| 1954 | else | |||
| 1955 | ccv_nnc_graph_exec_symbol_concat(graph, ((ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, x - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs )->rsize * (size_t)(x - exec_symbol_info_size))))->symbol, noop); | |||
| 1956 | for (j = 0; j < tensor_ref->exec_registry->rnum; j++) | |||
| 1957 | { | |||
| 1958 | const int x = *(int*)ccv_array_get(tensor_ref->exec_registry, j)((void*)(((char*)((tensor_ref->exec_registry)->data)) + (size_t)(tensor_ref->exec_registry)->rsize * (size_t)( j))); | |||
| 1959 | assert(x >= 0)((void) sizeof ((x >= 0) ? 1 : 0), __extension__ ({ if (x >= 0) ; else __assert_fail ("x >= 0", "ccv_nnc_symbolic_graph_backward.c" , 1959, __extension__ __PRETTY_FUNCTION__); })); /* Otherwise, this is initialization tensor, which is impossible to be summed up by. */ | |||
| 1960 | assert(x < exec_symbol_info_size)((void) sizeof ((x < exec_symbol_info_size) ? 1 : 0), __extension__ ({ if (x < exec_symbol_info_size) ; else __assert_fail ("x < exec_symbol_info_size" , "ccv_nnc_symbolic_graph_backward.c", 1960, __extension__ __PRETTY_FUNCTION__ ); })); // exec_registry is only used by alias_registry, it simply cannot reference to a sum operation. | |||
| 1961 | ccv_nnc_graph_exec_symbol_concat(graph, autograd_execs[x].symbol, noop); | |||
| 1962 | } | |||
| 1963 | graph->backward.exec_symbol_idx[dd] = noop.d; | |||
| 1964 | } else { | |||
| 1965 | if (x < exec_symbol_info_size) | |||
| 1966 | graph->backward.exec_symbol_idx[dd] = autograd_execs[x].symbol.d; | |||
| 1967 | else | |||
| 1968 | graph->backward.exec_symbol_idx[dd] = ((ccv_nnc_sum_or_set_graph_exec_symbol_t*)ccv_array_get(sum_or_set_execs, x - exec_symbol_info_size)((void*)(((char*)((sum_or_set_execs)->data)) + (size_t)(sum_or_set_execs )->rsize * (size_t)(x - exec_symbol_info_size))))->symbol.d; | |||
| 1969 | } | |||
| 1970 | } | |||
| 1971 | // Assigning for f symbols. | |||
| 1972 | for (i = 0; i < f_symbol_size; i++) | |||
| 1973 | { | |||
| 1974 | const int d = f_symbols[i].d; | |||
| 1975 | assert(d >= 0)((void) sizeof ((d >= 0) ? 1 : 0), __extension__ ({ if (d >= 0) ; else __assert_fail ("d >= 0", "ccv_nnc_symbolic_graph_backward.c" , 1975, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1976 | assert(d < tensor_symbol_info_size)((void) sizeof ((d < tensor_symbol_info_size) ? 1 : 0), __extension__ ({ if (d < tensor_symbol_info_size) ; else __assert_fail ( "d < tensor_symbol_info_size", "ccv_nnc_symbolic_graph_backward.c" , 1976, __extension__ __PRETTY_FUNCTION__); })); | |||
| 1977 | const ccv_nnc_autograd_tensor_version_t* const tensor_ver = autograd_tensor_versions + d; | |||
| 1978 | if (tensor_ver->ref_version) | |||
| 1979 | { | |||
| 1980 | // We don't use _ccv_nnc_autograd_tensor_symbol_from_tensor_version because that select the last version, but for us, we need the first version. | |||
| 1981 | const ccv_nnc_tensor_ref_t* const tensor_ref = (ccv_nnc_tensor_ref_t*)ccv_array_get(tensor_ver->ref_version, 0)((void*)(((char*)((tensor_ver->ref_version)->data)) + ( size_t)(tensor_ver->ref_version)->rsize * (size_t)(0))); | |||
| 1982 | const ccv_nnc_autograd_tensor_symbol_t* const autograd_symbol = (ccv_nnc_autograd_tensor_symbol_t*)ccv_array_get(autograd_tensor_symbols, tensor_ref->d)((void*)(((char*)((autograd_tensor_symbols)->data)) + (size_t )(autograd_tensor_symbols)->rsize * (size_t)(tensor_ref-> d))); | |||
| 1983 | graph->backward.tensor_symbol_idx[d] = autograd_symbol->symbol.d; | |||
| 1984 | // Cannot find relevant backward exec symbols for f, it could be many. | |||
| 1985 | } | |||
| 1986 | } | |||
| 1987 | } | |||
| 1988 | ||||
| 1989 | void ccv_nnc_symbolic_graph_backward(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const f_symbols, const int f_symbol_size, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size) | |||
| 1990 | { | |||
| 1991 | int i; | |||
| 1992 | // f symbols cannot be alias. | |||
| 1993 | for (i = 0; i < f_symbol_size; i++) | |||
| 1994 | if (f_symbols[i].d >= 0) | |||
| 1995 | { | |||
| 1996 | assert(f_symbols[i].graph == graph)((void) sizeof ((f_symbols[i].graph == graph) ? 1 : 0), __extension__ ({ if (f_symbols[i].graph == graph) ; else __assert_fail ("f_symbols[i].graph == graph" , "ccv_nnc_symbolic_graph_backward.c", 1996, __extension__ __PRETTY_FUNCTION__ ); })); // f symbol has to be in the current graph. | |||
| 1997 | assert(!((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, f_symbols[i].d))->alias_ref)((void) sizeof ((!((ccv_nnc_tensor_symbol_info_t*)((void*)((( char*)((graph->tensor_symbol_info)->data)) + (size_t)(graph ->tensor_symbol_info)->rsize * (size_t)(f_symbols[i].d) )))->alias_ref) ? 1 : 0), __extension__ ({ if (!((ccv_nnc_tensor_symbol_info_t *)((void*)(((char*)((graph->tensor_symbol_info)->data)) + (size_t)(graph->tensor_symbol_info)->rsize * (size_t )(f_symbols[i].d))))->alias_ref) ; else __assert_fail ("!((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, f_symbols[i].d))->alias_ref" , "ccv_nnc_symbolic_graph_backward.c", 1997, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 1998 | } | |||
| 1999 | for (i = 0; i < wrt_symbol_size; i++) | |||
| 2000 | if (wrt_symbols[i].d >= 0) | |||
| 2001 | { | |||
| 2002 | assert(wrt_symbols[i].graph == graph)((void) sizeof ((wrt_symbols[i].graph == graph) ? 1 : 0), __extension__ ({ if (wrt_symbols[i].graph == graph) ; else __assert_fail ( "wrt_symbols[i].graph == graph", "ccv_nnc_symbolic_graph_backward.c" , 2002, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2003 | // This is not an alias, or what it refers to is not an alias. | |||
| 2004 | assert(!((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, wrt_symbols[i].d))->alias_ref || !((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, wrt_symbols[i].d))->alias_ref - 1))->alias_ref)((void) sizeof ((!((ccv_nnc_tensor_symbol_info_t*)((void*)((( char*)((graph->tensor_symbol_info)->data)) + (size_t)(graph ->tensor_symbol_info)->rsize * (size_t)(wrt_symbols[i]. d))))->alias_ref || !((ccv_nnc_tensor_symbol_info_t*)((void *)(((char*)((graph->tensor_symbol_info)->data)) + (size_t )(graph->tensor_symbol_info)->rsize * (size_t)(((ccv_nnc_tensor_symbol_info_t *)((void*)(((char*)((graph->tensor_symbol_info)->data)) + (size_t)(graph->tensor_symbol_info)->rsize * (size_t )(wrt_symbols[i].d))))->alias_ref - 1))))->alias_ref) ? 1 : 0), __extension__ ({ if (!((ccv_nnc_tensor_symbol_info_t *)((void*)(((char*)((graph->tensor_symbol_info)->data)) + (size_t)(graph->tensor_symbol_info)->rsize * (size_t )(wrt_symbols[i].d))))->alias_ref || !((ccv_nnc_tensor_symbol_info_t *)((void*)(((char*)((graph->tensor_symbol_info)->data)) + (size_t)(graph->tensor_symbol_info)->rsize * (size_t )(((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((graph-> tensor_symbol_info)->data)) + (size_t)(graph->tensor_symbol_info )->rsize * (size_t)(wrt_symbols[i].d))))->alias_ref - 1 ))))->alias_ref) ; else __assert_fail ("!((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, wrt_symbols[i].d))->alias_ref || !((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, wrt_symbols[i].d))->alias_ref - 1))->alias_ref" , "ccv_nnc_symbolic_graph_backward.c", 2004, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2005 | } | |||
| 2006 | const int exec_symbol_info_size = graph->exec_symbol_info->rnum; | |||
| 2007 | const int tensor_symbol_info_size = graph->tensor_symbol_info->rnum; | |||
| 2008 | assert(exec_symbol_info_size > 0)((void) sizeof ((exec_symbol_info_size > 0) ? 1 : 0), __extension__ ({ if (exec_symbol_info_size > 0) ; else __assert_fail ("exec_symbol_info_size > 0" , "ccv_nnc_symbolic_graph_backward.c", 2008, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2009 | assert(tensor_symbol_info_size > 0)((void) sizeof ((tensor_symbol_info_size > 0) ? 1 : 0), __extension__ ({ if (tensor_symbol_info_size > 0) ; else __assert_fail ( "tensor_symbol_info_size > 0", "ccv_nnc_symbolic_graph_backward.c" , 2009, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2010 | ccv_nnc_symbolic_graph_backward_prep_t backward_prep = _ccv_nnc_symbolic_graph_backward_prep(graph, sources, source_size, destinations, destination_size); | |||
| 2011 | _ccv_nnc_symbolic_graph_backward_prep_prune_ops(&backward_prep, f_symbols, f_symbol_size, wrt_symbols, wrt_symbol_size, sources, source_size, destinations, destination_size); | |||
| 2012 | _ccv_nnc_symbolic_graph_backward_prep_gen(&backward_prep, f_symbols, f_symbol_size, wrt_symbols, wrt_symbol_size, 0, sources, source_size, destinations, destination_size); | |||
| 2013 | _ccv_nnc_symbolic_graph_backward_gen(&backward_prep, f_symbols, f_symbol_size, wrt_symbols, wrt_symbol_size, graph, graph); | |||
| 2014 | _ccv_nnc_symbolic_graph_backward_prep_free(backward_prep); | |||
| 2015 | } | |||
| 2016 | ||||
| 2017 | ccv_nnc_tensor_symbol_t ccv_nnc_tensor_symbol_for_backward(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol) | |||
| 2018 | { | |||
| 2019 | assert(symbol.d >= 0)((void) sizeof ((symbol.d >= 0) ? 1 : 0), __extension__ ({ if (symbol.d >= 0) ; else __assert_fail ("symbol.d >= 0" , "ccv_nnc_symbolic_graph_backward.c", 2019, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2020 | assert(symbol.d < graph->backward.tensor_symbol_size)((void) sizeof ((symbol.d < graph->backward.tensor_symbol_size ) ? 1 : 0), __extension__ ({ if (symbol.d < graph->backward .tensor_symbol_size) ; else __assert_fail ("symbol.d < graph->backward.tensor_symbol_size" , "ccv_nnc_symbolic_graph_backward.c", 2020, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2021 | if (graph->backward.tensor_symbol_idx[symbol.d] < 0) | |||
| 2022 | return NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL }; | |||
| 2023 | ccv_nnc_tensor_symbol_t tensor = { | |||
| 2024 | .d = graph->backward.tensor_symbol_idx[symbol.d], | |||
| 2025 | .graph = graph, | |||
| 2026 | }; | |||
| 2027 | return tensor; | |||
| 2028 | } | |||
| 2029 | ||||
| 2030 | ccv_nnc_graph_exec_symbol_t ccv_nnc_graph_exec_symbol_for_backward(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol) | |||
| 2031 | { | |||
| 2032 | assert(symbol.d >= 0)((void) sizeof ((symbol.d >= 0) ? 1 : 0), __extension__ ({ if (symbol.d >= 0) ; else __assert_fail ("symbol.d >= 0" , "ccv_nnc_symbolic_graph_backward.c", 2032, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2033 | assert(symbol.d < graph->tensor_symbol_info->rnum)((void) sizeof ((symbol.d < graph->tensor_symbol_info-> rnum) ? 1 : 0), __extension__ ({ if (symbol.d < graph-> tensor_symbol_info->rnum) ; else __assert_fail ("symbol.d < graph->tensor_symbol_info->rnum" , "ccv_nnc_symbolic_graph_backward.c", 2033, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2034 | int dd = symbol.d; | |||
| 2035 | // Check if this is an alias. Use the original if it is. | |||
| 2036 | ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, dd)((void*)(((char*)((graph->tensor_symbol_info)->data)) + (size_t)(graph->tensor_symbol_info)->rsize * (size_t)( dd))); | |||
| 2037 | if (symbol_info->alias_ref) | |||
| 2038 | dd = symbol_info->alias_ref - 1; | |||
| 2039 | assert(dd >= 0)((void) sizeof ((dd >= 0) ? 1 : 0), __extension__ ({ if (dd >= 0) ; else __assert_fail ("dd >= 0", "ccv_nnc_symbolic_graph_backward.c" , 2039, __extension__ __PRETTY_FUNCTION__); })); | |||
| 2040 | assert(dd < graph->backward.exec_symbol_size)((void) sizeof ((dd < graph->backward.exec_symbol_size) ? 1 : 0), __extension__ ({ if (dd < graph->backward.exec_symbol_size ) ; else __assert_fail ("dd < graph->backward.exec_symbol_size" , "ccv_nnc_symbolic_graph_backward.c", 2040, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 2041 | if (graph->backward.exec_symbol_idx[dd] < 0) | |||
| 2042 | return (ccv_nnc_graph_exec_symbol_t){ | |||
| 2043 | .graph = 0, | |||
| 2044 | .d = CCV_NNC_NO_GRAPH_EXEC_SYMBOL | |||
| 2045 | }; | |||
| 2046 | ccv_nnc_graph_exec_symbol_t exec = { | |||
| 2047 | .d = graph->backward.exec_symbol_idx[dd], | |||
| 2048 | .graph = graph | |||
| 2049 | }; | |||
| 2050 | return exec; | |||
| 2051 | } |