File: | nnc/ccv_nnc_symbolic_graph_simplify.c |
Warning: | line 779, column 28 The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" | |||
2 | #include "ccv_nnc_easy.h" | |||
3 | #include "ccv_nnc_internal.h" | |||
4 | #include "ccv_internal.h" | |||
5 | #include "_ccv_nnc_symbolic_graph.h" | |||
6 | #include "3rdparty/siphash/siphash24.h" | |||
7 | ||||
8 | // MARK - Level-3.5 API | |||
9 | ||||
10 | static uint8_t key_siphash[16] = "graphcsekvlibnnc"; | |||
11 | ||||
12 | typedef struct { | |||
13 | int tensor_symbol_info_size; | |||
14 | int exec_symbol_info_size; | |||
15 | ccv_nnc_symbolic_graph_t* graph; | |||
16 | ccv_nnc_graph_visit_t* visit; | |||
17 | ccv_nnc_tensor_symbol_info_t* tensor_symbol_info; | |||
18 | ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info; | |||
19 | uint32_t* exec_dead; // Mark a exec is dead and need to be cleared, each bit represent a exec. | |||
20 | uint32_t* tensor_dead; // Mark a tensor is dead and need to be cleared, each bit represent a tensor. | |||
21 | int* output_execs; // Mapping from tensor to the exec that generates this tensor. | |||
22 | } ccv_nnc_symbolic_graph_simplify_t; | |||
23 | ||||
24 | static void _ccv_nnc_symbolic_graph_simplify_update_output_execs(ccv_nnc_symbolic_graph_simplify_t* const simplify) | |||
25 | { | |||
26 | int i; | |||
27 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
28 | simplify->output_execs[i] = -1; | |||
29 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
30 | if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
31 | continue; | |||
32 | for (i = 0; i < node->output_size; i++) | |||
33 | if (node->outputs[i] >= 0) | |||
34 | simplify->output_execs[node->outputs[i]] = idx; // A tensor can only be written once. | |||
35 | } ccv_nnc_graph_visit_endfor} } | |||
36 | } | |||
37 | ||||
38 | static ccv_nnc_symbolic_graph_simplify_t* _ccv_nnc_symbolic_graph_simplify_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size) | |||
39 | { | |||
40 | ccv_nnc_symbolic_graph_simplify_t* const simplify = (ccv_nnc_symbolic_graph_simplify_t*)ccmallocmalloc(sizeof(ccv_nnc_symbolic_graph_simplify_t)); | |||
41 | simplify->graph = graph; | |||
42 | simplify->visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0), graph->exec_symbol_info->rnum, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((graph->exec_symbol_info->rnum) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_symbol_info-> rnum); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_i_] .outgoings->rnum : 0; const int _heap_mem_ = (graph->exec_symbol_info ->rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_ ; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof (ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info->rnum) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t *)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (graph->exec_symbol_info ->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info-> rnum) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof (ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum)) ; int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (graph-> exec_symbol_info->rnum)), (int32_t*)(_incomings_ + (graph-> exec_symbol_info->rnum)) + (graph->exec_symbol_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_symbol_info ->rnum); for (_i_ = 0; _i_ < (source_size); _i_++) { (( void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == graph) ; else __assert_fail ( "(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c" , 42, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_ ] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size) , 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0 ) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_ [_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if ( _incomings_[_idx_].r == 1) continue; _incomings_[_idx_].r = 1 ; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)(( graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info )->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)(( graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info )->rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++ ) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_ [d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_ ]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for ( _i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources )[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0 ] = (source_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[ _q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r == 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*) ((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_ [d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_ [d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; _exists_[ _q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations) [_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations )[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_ [0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue ; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_ [_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations) [_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations )[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources )[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources )[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_ ] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size ].index = ((_idx_)); _visit_->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_ [_idx_].d) { ++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings->rnum == 1) { const int d = *(int*)(( void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((graph->exec_symbol_info)->data)) + (size_t)( graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_] .outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_ [d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 3 && _d_ < (destination_size)) { _exists_[_p_][_i_ ] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*) ((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_ [d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 3 && _d_ < (destination_size)) { _exists_[_q_][_exist_size_ [_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_ ), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size ); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph ) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); if (_incomings_[(destinations)[_i_].d].r == 4) continue ; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_ ].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations )[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(destinations)[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = (((destinations )[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_ [(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_ ) free(_incomings_); } while (0);; ((void) sizeof ((_visit_-> size <= (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (graph->exec_symbol_info-> rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_symbol_info->rnum)" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _visit_; }); | |||
43 | simplify->tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_info_t) * graph->tensor_symbol_info->rnum); | |||
44 | simplify->exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_info_t) * graph->exec_symbol_info->rnum); | |||
45 | ccv_nnc_symbolic_graph_symbol_infer(graph, simplify->visit, sources, source_size, destinations, destination_size, 0, 0, simplify->tensor_symbol_info, simplify->exec_symbol_info); | |||
46 | simplify->tensor_symbol_info_size = graph->tensor_symbol_info->rnum; | |||
47 | simplify->exec_symbol_info_size = graph->exec_symbol_info->rnum; | |||
48 | simplify->exec_dead = cccalloccalloc(((simplify->exec_symbol_info_size + 31) >> 5) + ((simplify->tensor_symbol_info_size + 31) >> 5), sizeof(uint32_t)); | |||
49 | simplify->tensor_dead = simplify->exec_dead + ((simplify->exec_symbol_info_size + 31) >> 5); | |||
50 | simplify->output_execs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size); | |||
51 | return simplify; | |||
52 | } | |||
53 | ||||
54 | static void _ccv_nnc_symbolic_graph_simplify_apply(ccv_nnc_symbolic_graph_simplify_t* const simplify) | |||
55 | { | |||
56 | int i, j; | |||
57 | for (i = 0; i < simplify->exec_symbol_info_size; i++) | |||
58 | if (simplify->exec_dead[i >> 5] & (1u << (i & 0x1f))) | |||
59 | ccv_nnc_graph_exec_symbol_free(simplify->graph, (ccv_nnc_graph_exec_symbol_t){ | |||
60 | .d = i, | |||
61 | .graph = simplify->graph, | |||
62 | }); | |||
63 | else // If it is not marked as dead, go through to unmark tensor | |||
64 | for (j = 0; j < simplify->exec_symbol_info[i].output_size; j++) | |||
65 | { | |||
66 | const int d = simplify->exec_symbol_info[i].outputs[j]; | |||
67 | if (d >= 0) | |||
68 | simplify->tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
69 | } | |||
70 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
71 | if (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))) | |||
72 | ccv_nnc_tensor_symbol_free(simplify->graph, (ccv_nnc_tensor_symbol_t){ | |||
73 | .d = i, | |||
74 | .graph = simplify->graph, | |||
75 | }); | |||
76 | // autogen the sources / destinations. | |||
77 | ccv_nnc_graph_exec_symbol_autogen(simplify->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); | |||
78 | } | |||
79 | ||||
80 | static void _ccv_nnc_symbolic_graph_simplify_free(ccv_nnc_symbolic_graph_simplify_t* const simplify) | |||
81 | { | |||
82 | ccv_nnc_graph_visit_free(simplify->visit); | |||
83 | ccfreefree(simplify->tensor_symbol_info); | |||
84 | ccfreefree(simplify->exec_symbol_info); | |||
85 | ccfreefree(simplify->exec_dead); | |||
86 | ccfreefree(simplify->output_execs); | |||
87 | ccfreefree(simplify); | |||
88 | } | |||
89 | ||||
90 | typedef struct { | |||
91 | int d; | |||
92 | int ifbit; | |||
93 | uint64_t hash; | |||
94 | } ccv_nnc_cse_hash_t; | |||
95 | ||||
96 | static int _ccv_nnc_cse_hash_find(ccv_nnc_cse_hash_t* const hash_map, const uint64_t hash, const int map_size) | |||
97 | { | |||
98 | assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if ( hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c" , 98, __extension__ __PRETTY_FUNCTION__); })); | |||
99 | int i, j; | |||
100 | i = (hash - 1) % map_size; | |||
101 | for (j = 0; ; j++, i++) | |||
102 | { | |||
103 | if (i >= map_size) | |||
104 | i = 0; | |||
105 | if (j > hash_map[i].ifbit) | |||
106 | return -1; | |||
107 | if (hash_map[i].hash == hash) | |||
108 | return hash_map[i].d; | |||
109 | } | |||
110 | } | |||
111 | ||||
112 | static void _ccv_nnc_cse_hash_add(ccv_nnc_cse_hash_t* const hash_map, uint64_t hash, int d, const int map_size) | |||
113 | { | |||
114 | assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if ( hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c" , 114, __extension__ __PRETTY_FUNCTION__); })); | |||
115 | int i, j; | |||
116 | i = (hash - 1) % map_size; | |||
117 | for (j = 0; ; j++, i++) | |||
118 | { | |||
119 | if (i >= map_size) | |||
120 | i = 0; | |||
121 | if (hash_map[i].hash == hash) // Already exists, do nothing. | |||
122 | return; | |||
123 | if (hash_map[i].hash == 0) | |||
124 | { | |||
125 | // Insert. | |||
126 | hash_map[i].d = d; | |||
127 | hash_map[i].ifbit = j; | |||
128 | hash_map[i].hash = hash; | |||
129 | return; | |||
130 | } | |||
131 | if (j > hash_map[i].ifbit) | |||
132 | { | |||
133 | const ccv_nnc_cse_hash_t old_hash = hash_map[i]; | |||
134 | // Swap, and continue, until find an empty slot. | |||
135 | hash_map[i].d = d; | |||
136 | hash_map[i].ifbit = j; | |||
137 | hash_map[i].hash = hash; | |||
138 | d = old_hash.d; | |||
139 | j = old_hash.ifbit; | |||
140 | hash = old_hash.hash; | |||
141 | } | |||
142 | } | |||
143 | } | |||
144 | ||||
145 | static int _ccv_nnc_symbolic_graph_update_refs(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const int* const refs, const int output_exec_ref_dead) | |||
146 | { | |||
147 | int i, j; | |||
148 | // Go over refs, if a tensor is an alias, mark it reference to the new one. | |||
149 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
150 | if (refs[i] >= 0) | |||
151 | // Mark this tensor as dead. | |||
152 | simplify->tensor_dead[i >> 5] |= (1u << (i & 0x1f)); | |||
153 | else if (simplify->tensor_symbol_info[i].alias_ref && refs[simplify->tensor_symbol_info[i].alias_ref - 1] >= 0) { | |||
154 | const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1; | |||
155 | simplify->tensor_symbol_info[i].alias_ref = refs[alias_ref] + 1; | |||
156 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(i))))->alias_ref = refs[alias_ref] + 1; | |||
157 | } | |||
158 | for (i = 0; i < output_size; i++) | |||
159 | // If the output is an alias, that's fine, because if the alias is re-binded, we are good. | |||
160 | simplify->tensor_dead[outputs[i].d >> 5] &= ~(1u << (outputs[i].d & 0x1f)); // Undead for output tensor symbols. | |||
161 | // Merge s_refs if the tensor is dead. | |||
162 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
163 | if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f)))) | |||
164 | { | |||
165 | const int ref = refs[i]; | |||
166 | if (simplify->tensor_symbol_info[i].s_ref && simplify->tensor_symbol_info[i].s_ref->rnum) | |||
167 | { | |||
168 | if (!simplify->tensor_symbol_info[ref].s_ref) // If there is no s_ref, simple, just assign the pointer and set the old one to nil. | |||
169 | { | |||
170 | simplify->tensor_symbol_info[ref].s_ref = simplify->tensor_symbol_info[i].s_ref; | |||
171 | simplify->tensor_symbol_info[i].s_ref = 0; | |||
172 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(i))))->s_ref = 0; | |||
173 | } else { | |||
174 | ccv_array_t* const ref_s_ref = simplify->tensor_symbol_info[ref].s_ref; | |||
175 | ccv_array_t* const i_s_ref = simplify->tensor_symbol_info[i].s_ref; | |||
176 | const int ref_s_ref_rnum = ref_s_ref->rnum; | |||
177 | int flag = 0; | |||
178 | // Detect conflict, if there is, undead. | |||
179 | for (j = 0; !flag && j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref ->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; } ); j++) | |||
180 | { | |||
181 | const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))); | |||
182 | const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)-> rsize * (size_t)(j))); | |||
183 | // If for the same sub-graph, they have different tensors linked, we cannot merge these two. | |||
184 | flag = (ref_s_ref_k > 0 && i_s_ref_k > 0 && ref_s_ref_k != i_s_ref_k); | |||
185 | } | |||
186 | if (flag) | |||
187 | { | |||
188 | simplify->tensor_dead[i >> 5] &= ~(1u << (i & 0x1f)); // Undead | |||
189 | continue; | |||
190 | } | |||
191 | if (ref_s_ref_rnum < i_s_ref->rnum) | |||
192 | { | |||
193 | ccv_array_resize(ref_s_ref, i_s_ref->rnum); | |||
194 | memcpy(ccv_array_get(ref_s_ref, ref_s_ref_rnum)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(ref_s_ref_rnum))), ccv_array_get(i_s_ref, ref_s_ref_rnum)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)-> rsize * (size_t)(ref_s_ref_rnum))), sizeof(int) * (i_s_ref->rnum - ref_s_ref_rnum)); | |||
195 | } | |||
196 | for (j = 0; j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref ->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; } ); j++) | |||
197 | { | |||
198 | const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))); | |||
199 | const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)-> rsize * (size_t)(j))); | |||
200 | assert(ref_s_ref_k == 0 || i_s_ref_k == 0)((void) sizeof ((ref_s_ref_k == 0 || i_s_ref_k == 0) ? 1 : 0) , __extension__ ({ if (ref_s_ref_k == 0 || i_s_ref_k == 0) ; else __assert_fail ("ref_s_ref_k == 0 || i_s_ref_k == 0", "ccv_nnc_symbolic_graph_simplify.c" , 200, __extension__ __PRETTY_FUNCTION__); })); | |||
201 | if (i_s_ref_k) | |||
202 | *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))) = i_s_ref_k; | |||
203 | } | |||
204 | ccv_array_free(simplify->tensor_symbol_info[i].s_ref); | |||
205 | simplify->tensor_symbol_info[i].s_ref = 0; | |||
206 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(i))))->s_ref = 0; | |||
207 | for (j = 0; j < ref_s_ref->rnum; j++) | |||
208 | { | |||
209 | const int ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))) - 1; | |||
210 | if (ref_k >= 0) | |||
211 | { | |||
212 | ccv_nnc_symbolic_graph_t* const sub_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(simplify->graph->sub_graphs, j)((void*)(((char*)((simplify->graph->sub_graphs)->data )) + (size_t)(simplify->graph->sub_graphs)->rsize * ( size_t)(j))); | |||
213 | assert(sub_graph)((void) sizeof ((sub_graph) ? 1 : 0), __extension__ ({ if (sub_graph ) ; else __assert_fail ("sub_graph", "ccv_nnc_symbolic_graph_simplify.c" , 213, __extension__ __PRETTY_FUNCTION__); })); | |||
214 | // Update its p_ref. | |||
215 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, ref_k)((void*)(((char*)((sub_graph->tensor_symbol_info)->data )) + (size_t)(sub_graph->tensor_symbol_info)->rsize * ( size_t)(ref_k))))->p_ref = ref + 1; | |||
216 | } | |||
217 | } | |||
218 | } | |||
219 | assert(simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[i].s_ref == ( (ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify-> graph->tensor_symbol_info)->data)) + (size_t)(simplify-> graph->tensor_symbol_info)->rsize * (size_t)(i))))-> s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info [i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char* )((simplify->graph->tensor_symbol_info)->data)) + (size_t )(simplify->graph->tensor_symbol_info)->rsize * (size_t )(i))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref" , "ccv_nnc_symbolic_graph_simplify.c", 219, __extension__ __PRETTY_FUNCTION__ ); })); | |||
220 | assert(simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify-> graph->tensor_symbol_info)->data)) + (size_t)(simplify-> graph->tensor_symbol_info)->rsize * (size_t)(ref))))-> s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info [ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char *)((simplify->graph->tensor_symbol_info)->data)) + ( size_t)(simplify->graph->tensor_symbol_info)->rsize * (size_t)(ref))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref" , "ccv_nnc_symbolic_graph_simplify.c", 220, __extension__ __PRETTY_FUNCTION__ ); })); | |||
221 | } | |||
222 | } | |||
223 | // Going through refs that we are updating, going through its p_ref to make sure both are updated. | |||
224 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
225 | if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))) && simplify->tensor_symbol_info[i].p_ref) | |||
226 | { | |||
227 | const int ref = refs[i]; | |||
228 | const int p_ref = simplify->tensor_symbol_info[i].p_ref - 1; | |||
229 | assert(p_ref >= 0)((void) sizeof ((p_ref >= 0) ? 1 : 0), __extension__ ({ if (p_ref >= 0) ; else __assert_fail ("p_ref >= 0", "ccv_nnc_symbolic_graph_simplify.c" , 229, __extension__ __PRETTY_FUNCTION__); })); | |||
230 | assert(simplify->graph->p)((void) sizeof ((simplify->graph->p) ? 1 : 0), __extension__ ({ if (simplify->graph->p) ; else __assert_fail ("simplify->graph->p" , "ccv_nnc_symbolic_graph_simplify.c", 230, __extension__ __PRETTY_FUNCTION__ ); })); | |||
231 | ccv_array_t* const s_ref = ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->p->tensor_symbol_info, p_ref)((void*)(((char*)((simplify->graph->p->tensor_symbol_info )->data)) + (size_t)(simplify->graph->p->tensor_symbol_info )->rsize * (size_t)(p_ref))))->s_ref; | |||
232 | const int s_idx = simplify->graph->p_idx - 1; | |||
233 | assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if (s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_simplify.c" , 233, __extension__ __PRETTY_FUNCTION__); })); | |||
234 | assert(s_ref && s_ref->rnum > s_idx)((void) sizeof ((s_ref && s_ref->rnum > s_idx) ? 1 : 0), __extension__ ({ if (s_ref && s_ref->rnum > s_idx) ; else __assert_fail ("s_ref && s_ref->rnum > s_idx" , "ccv_nnc_symbolic_graph_simplify.c", 234, __extension__ __PRETTY_FUNCTION__ ); })); | |||
235 | *(int*)ccv_array_get(s_ref, s_idx)((void*)(((char*)((s_ref)->data)) + (size_t)(s_ref)->rsize * (size_t)(s_idx))) = ref + 1; // Update so it references to the new s_ref. | |||
236 | assert(!simplify->tensor_symbol_info[ref].p_ref)((void) sizeof ((!simplify->tensor_symbol_info[ref].p_ref) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [ref].p_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[ref].p_ref" , "ccv_nnc_symbolic_graph_simplify.c", 236, __extension__ __PRETTY_FUNCTION__ ); })); | |||
237 | simplify->tensor_symbol_info[ref].p_ref = p_ref + 1; | |||
238 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(ref))))->p_ref = p_ref + 1; | |||
239 | } | |||
240 | // Now go over exec to mark them as dead because we don't need these to generate refs. | |||
241 | if (output_exec_ref_dead) | |||
242 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
243 | if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f)))) | |||
244 | { | |||
245 | const int output_exec = simplify->output_execs[i]; | |||
246 | assert(output_exec >= 0)((void) sizeof ((output_exec >= 0) ? 1 : 0), __extension__ ({ if (output_exec >= 0) ; else __assert_fail ("output_exec >= 0" , "ccv_nnc_symbolic_graph_simplify.c", 246, __extension__ __PRETTY_FUNCTION__ ); })); | |||
247 | const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = simplify->exec_symbol_info + output_exec; | |||
248 | int flag = 0; | |||
249 | for (j = 0; !flag && j < symbol_info->output_size; j++) | |||
250 | { | |||
251 | const int d = symbol_info->outputs[j]; | |||
252 | if (d >= 0) | |||
253 | flag = (!(simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))); // If some of the output is not dead, we cannot proceed. | |||
254 | } | |||
255 | if (!flag) // If all outputs are dead, mark the exec as dead. | |||
256 | simplify->exec_dead[output_exec >> 5] |= (1u << (output_exec & 0x1f)); | |||
257 | } | |||
258 | int updated_refs = 0; | |||
259 | // Go over replace inputs / outputs. | |||
260 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
261 | for (i = 0; i < node->input_size; i++) | |||
262 | { | |||
263 | const int d = node->inputs[i]; | |||
264 | if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))) | |||
265 | { | |||
266 | node->inputs[i] = refs[d]; // It can be replaced. | |||
267 | updated_refs = 1; | |||
268 | } | |||
269 | } | |||
270 | for (i = 0; i < node->output_size; i++) | |||
271 | { | |||
272 | const int d = node->outputs[i]; | |||
273 | if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))) | |||
274 | { | |||
275 | node->outputs[i] = refs[d]; // It can be replaced. | |||
276 | updated_refs = 1; | |||
277 | } | |||
278 | } | |||
279 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(idx))))->inputs == node->inputs) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*) ((simplify->graph->exec_symbol_info)->data)) + (size_t )(simplify->graph->exec_symbol_info)->rsize * (size_t )(idx))))->inputs == node->inputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs" , "ccv_nnc_symbolic_graph_simplify.c", 279, __extension__ __PRETTY_FUNCTION__ ); })); | |||
280 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(idx))))->outputs == node->outputs) ? 1 : 0) , __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t*)(( void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))))->outputs == node->outputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs" , "ccv_nnc_symbolic_graph_simplify.c", 280, __extension__ __PRETTY_FUNCTION__ ); })); | |||
281 | } ccv_nnc_graph_visit_endfor} } | |||
282 | const ccv_nnc_graph_exec_symbol_info_t* const p_node_info = simplify->graph->p ? (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->p->exec_symbol_info, simplify->graph->exec_idx - 1)((void*)(((char*)((simplify->graph->p->exec_symbol_info )->data)) + (size_t)(simplify->graph->p->exec_symbol_info )->rsize * (size_t)(simplify->graph->exec_idx - 1))) : 0; | |||
283 | if (p_node_info && (p_node_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)) | |||
284 | // Go over the while inputs as well. | |||
285 | for (i = 0; i < p_node_info->p_while.input_size; i++) | |||
286 | { | |||
287 | const int d = p_node_info->p_while.inputs[i]; | |||
288 | if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))) | |||
289 | { | |||
290 | p_node_info->p_while.inputs[i] = refs[d]; | |||
291 | updated_refs = 1; | |||
292 | } | |||
293 | } | |||
294 | return updated_refs; | |||
295 | } | |||
296 | ||||
297 | // This is a simple common sub-expression elimination implementation, particularly, we only replace the later computed output | |||
298 | // with the identical earlier computed output, and let the "elimination" part to the graph pruning. | |||
299 | static void _ccv_nnc_symbolic_graph_common_subexpression_elimination(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
300 | { | |||
301 | _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify); | |||
302 | // tensor_hash starts with 0s, and it is either marked with the tensor index + 1, or the hash of the computations. | |||
303 | uint64_t* const tensor_hash = (uint64_t*)cccalloccalloc(simplify->tensor_symbol_info_size, sizeof(uint64_t)); | |||
304 | int i; | |||
305 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
306 | // We cannot support graph / custom command (we cannot model them properly). | |||
307 | if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || | |||
308 | node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD || | |||
309 | node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD || | |||
310 | node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD) | |||
311 | continue; | |||
312 | // If already marked as dead, skip. | |||
313 | if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
314 | continue; | |||
315 | uint64_t hashout, hashin[3]; | |||
316 | siphash((uint8_t*)&hashin[0], (const uint8_t*)&node->cmd.info, sizeof(node->cmd.info), key_siphash); | |||
317 | siphash((uint8_t*)&hashin[1], (const uint8_t*)&node->hint, sizeof(node->hint), key_siphash); | |||
318 | hashin[2] = node->cmd.cmd; // Now actually hash the cmd name. | |||
319 | siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash); | |||
320 | // First, hash the cmd and the hints with the cmd. | |||
321 | // Note on alias, we cannot really generate proper hash for alias (yet). Thus, just treat alias as normal tensors. | |||
322 | for (i = 0; i < node->input_size; i++) | |||
323 | { | |||
324 | assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 324, __extension__ __PRETTY_FUNCTION__ ); })); | |||
325 | // If no hash for the input, use the index + 1 as the hash. | |||
326 | if (node->inputs[i] >= 0 && tensor_hash[node->inputs[i]] == 0) | |||
327 | tensor_hash[node->inputs[i]] = node->inputs[i] + 1; | |||
328 | if (node->inputs[i] >= 0) | |||
329 | { | |||
330 | // Hash using the tensor hash. | |||
331 | hashin[0] = hashout; | |||
332 | hashin[1] = i; // Encode the positional information. | |||
333 | hashin[2] = tensor_hash[node->inputs[i]]; | |||
334 | } else { | |||
335 | // Hash using the input integer (could be special integer). | |||
336 | hashin[0] = hashout; | |||
337 | hashin[1] = i; // Encode the positional information. | |||
338 | hashin[2] = node->inputs[i]; | |||
339 | } | |||
340 | siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash); | |||
341 | } | |||
342 | for (i = 0; i < node->output_size; i++) | |||
343 | if (node->outputs[i] >= 0) | |||
344 | { | |||
345 | assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 345, __extension__ __PRETTY_FUNCTION__ ); })); | |||
346 | // Assigning once, especially now we don't consider aliases. | |||
347 | assert(tensor_hash[node->outputs[i]] == 0)((void) sizeof ((tensor_hash[node->outputs[i]] == 0) ? 1 : 0), __extension__ ({ if (tensor_hash[node->outputs[i]] == 0) ; else __assert_fail ("tensor_hash[node->outputs[i]] == 0" , "ccv_nnc_symbolic_graph_simplify.c", 347, __extension__ __PRETTY_FUNCTION__ ); })); | |||
348 | hashin[0] = hashout; | |||
349 | hashin[1] = i; // Positional information. | |||
350 | siphash((uint8_t*)&hashin[2], (const uint8_t*)&simplify->tensor_symbol_info[node->outputs[i]].info, | |||
351 | sizeof(simplify->tensor_symbol_info[node->outputs[i]].info), key_siphash); | |||
352 | // Generate hash for the output. | |||
353 | siphash((uint8_t*)&tensor_hash[node->outputs[i]], (const uint8_t*)hashin, sizeof(hashin), key_siphash); | |||
354 | } | |||
355 | } ccv_nnc_graph_visit_endfor} } | |||
356 | // Allocate 3 / 2 as much space, for the simple robin-hood open address hash map. | |||
357 | const int map_size = (simplify->tensor_symbol_info_size * 3 + 1) / 2; | |||
358 | int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size); | |||
359 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
360 | refs[i] = -1; | |||
361 | ccv_nnc_cse_hash_t* const hash_map = (ccv_nnc_cse_hash_t*)cccalloccalloc(map_size, sizeof(ccv_nnc_cse_hash_t)); | |||
362 | // Now, all tensors are hashed, identify tensors with the same hash code, replace the ones that accessed later. | |||
363 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
364 | // If already marked as dead, skip. | |||
365 | if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
366 | continue; | |||
367 | for (i = 0; i < node->input_size; i++) | |||
368 | if (node->inputs[i] >= 0) | |||
369 | { | |||
370 | const int d = node->inputs[i]; | |||
371 | assert(tensor_hash[d])((void) sizeof ((tensor_hash[d]) ? 1 : 0), __extension__ ({ if (tensor_hash[d]) ; else __assert_fail ("tensor_hash[d]", "ccv_nnc_symbolic_graph_simplify.c" , 371, __extension__ __PRETTY_FUNCTION__); })); | |||
372 | const int new_d = _ccv_nnc_cse_hash_find(hash_map, tensor_hash[d], map_size); | |||
373 | if (new_d >= 0 && new_d != d) | |||
374 | { | |||
375 | // Check whether this can be replaced. | |||
376 | assert(refs[d] == -1 || refs[d] == new_d)((void) sizeof ((refs[d] == -1 || refs[d] == new_d) ? 1 : 0), __extension__ ({ if (refs[d] == -1 || refs[d] == new_d) ; else __assert_fail ("refs[d] == -1 || refs[d] == new_d", "ccv_nnc_symbolic_graph_simplify.c" , 376, __extension__ __PRETTY_FUNCTION__); })); | |||
377 | assert(!simplify->tensor_symbol_info[d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 377, __extension__ __PRETTY_FUNCTION__ ); })); | |||
378 | assert(!simplify->tensor_symbol_info[d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].r_assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].r_assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 378, __extension__ __PRETTY_FUNCTION__ ); })); | |||
379 | assert(!simplify->tensor_symbol_info[d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].bypass_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].bypass_ref" , "ccv_nnc_symbolic_graph_simplify.c", 379, __extension__ __PRETTY_FUNCTION__ ); })); | |||
380 | assert(!simplify->tensor_symbol_info[new_d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [new_d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 380, __extension__ __PRETTY_FUNCTION__ ); })); | |||
381 | assert(!simplify->tensor_symbol_info[new_d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].r_assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [new_d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].r_assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 381, __extension__ __PRETTY_FUNCTION__ ); })); | |||
382 | assert(!simplify->tensor_symbol_info[new_d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].bypass_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [new_d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].bypass_ref" , "ccv_nnc_symbolic_graph_simplify.c", 382, __extension__ __PRETTY_FUNCTION__ ); })); | |||
383 | // Ignore if there is a pair_ref (again, pair_ref has side effect that is deeper (using tape)) | |||
384 | if (simplify->tensor_symbol_info[d].pair_ref) | |||
385 | continue; | |||
386 | // If both have p_ref, we cannot merge. | |||
387 | if (simplify->tensor_symbol_info[d].p_ref && simplify->tensor_symbol_info[new_d].p_ref) | |||
388 | continue; | |||
389 | // Merge s_refs from ref[d] later. | |||
390 | if (refs[d] != new_d) | |||
391 | refs[d] = new_d; | |||
392 | assert(simplify->output_execs[new_d] >= 0)((void) sizeof ((simplify->output_execs[new_d] >= 0) ? 1 : 0), __extension__ ({ if (simplify->output_execs[new_d] >= 0) ; else __assert_fail ("simplify->output_execs[new_d] >= 0" , "ccv_nnc_symbolic_graph_simplify.c", 392, __extension__ __PRETTY_FUNCTION__ ); })); | |||
393 | // Establish new dependency. | |||
394 | ccv_nnc_graph_exec_symbol_concat(simplify->graph, (ccv_nnc_graph_exec_symbol_t){ | |||
395 | .d = simplify->output_execs[new_d], | |||
396 | .graph = simplify->graph, | |||
397 | }, (ccv_nnc_graph_exec_symbol_t){ | |||
398 | .d = idx, | |||
399 | .graph = simplify->graph, | |||
400 | }); | |||
401 | } | |||
402 | } | |||
403 | // We can reuse the input, but we cannot do that for output of these commands. | |||
404 | if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || | |||
405 | node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD || | |||
406 | node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD || | |||
407 | node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD) | |||
408 | continue; | |||
409 | for (i = 0; i < node->output_size; i++) | |||
410 | if (node->outputs[i] >= 0) // This tensor can be reused by others. | |||
411 | _ccv_nnc_cse_hash_add(hash_map, tensor_hash[node->outputs[i]], node->outputs[i], map_size); | |||
412 | } ccv_nnc_graph_visit_endfor} } | |||
413 | _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 1 /* For these exec that generates refs, we don't need them any more. */); | |||
414 | ccfreefree(tensor_hash); | |||
415 | ccfreefree(hash_map); | |||
416 | ccfreefree(refs); | |||
417 | } | |||
418 | ||||
419 | static void _ccv_nnc_symbolic_graph_data_transfer_opt(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
420 | { | |||
421 | _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify); | |||
422 | uint32_t* const exec_dead = simplify->exec_dead; | |||
423 | const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = simplify->tensor_symbol_info; | |||
424 | int i, j; | |||
425 | uint32_t* const has_alias = ccmallocmalloc(sizeof(uint32_t) * ((simplify->tensor_symbol_info_size + 31) >> 5)); | |||
426 | int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size); | |||
427 | int updated_refs; | |||
428 | do { | |||
429 | memset(has_alias, 0, sizeof(uint32_t) * ((simplify->tensor_symbol_info_size + 31) >> 5)); | |||
430 | // Go through until no updates is possible. This won't result an infinite loop because every time, | |||
431 | // a tensor is eliminated. | |||
432 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
433 | { | |||
434 | refs[i] = -1; | |||
435 | if (tensor_symbol_info[i].alias_ref) | |||
436 | { | |||
437 | const int alias_ref = tensor_symbol_info[i].alias_ref - 1; | |||
438 | has_alias[alias_ref >> 5] |= (1u << (alias_ref & 0x1f)); | |||
439 | } | |||
440 | } | |||
441 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
442 | // If already marked as dead, skip. | |||
443 | if (exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
444 | continue; | |||
445 | if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD && | |||
446 | node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD && | |||
447 | // Conversion, if the datatype is the same, it is the data transfer op. | |||
448 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD && | |||
449 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD && | |||
450 | // Format transform, if the format is the same, it is the data transfer op. | |||
451 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD && | |||
452 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD) | |||
453 | continue; | |||
454 | for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size. | |||
455 | if (node->inputs[i] >= 0 && node->outputs[i] >= 0) | |||
456 | { | |||
457 | assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 457, __extension__ __PRETTY_FUNCTION__ ); })); | |||
458 | assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 458, __extension__ __PRETTY_FUNCTION__ ); })); | |||
459 | const ccv_nnc_tensor_symbol_info_t* const input = tensor_symbol_info + node->inputs[i]; | |||
460 | const ccv_nnc_tensor_symbol_info_t* const output = tensor_symbol_info + node->outputs[i]; | |||
461 | // If they are not the same data type, skip. (Likely data conversion op). | |||
462 | if (input->info.datatype != output->info.datatype) | |||
463 | continue; | |||
464 | // If they are not the same format, skip. (Likely format transform op). | |||
465 | if (input->info.format != output->info.format) | |||
466 | continue; | |||
467 | // If they are not on the same device (even for NUMA), skip. | |||
468 | if (input->info.type != output->info.type) | |||
469 | continue; | |||
470 | // If both are alias, we cannot consolidate this. | |||
471 | if (input->alias_ref && output->alias_ref) | |||
472 | continue; | |||
473 | // If input is alias, and output has alias reference to it, output cannot be the same as input. | |||
474 | if (input->alias_ref && (has_alias[node->outputs[i] >> 5] & (1u << (node->outputs[i] & 0x1f)))) | |||
475 | continue; | |||
476 | // If output is alias, and input has alias reference to it, input cannot be the same as output. | |||
477 | if (output->alias_ref && (has_alias[node->inputs[i] >> 5] & (1u << (node->inputs[i] & 0x1f)))) | |||
478 | continue; | |||
479 | // If either are carry overs (for while), we cannot do anything. | |||
480 | if (input->assign_ref || output->assign_ref || | |||
481 | input->r_assign_ref || output->r_assign_ref) | |||
482 | continue; | |||
483 | // If either are bypasses (for case..of), we cannot do anything. | |||
484 | if (input->bypass_ref || output->bypass_ref || | |||
485 | input->r_bypass_ref || output->r_bypass_ref) | |||
486 | continue; | |||
487 | // If either are inputs / outputs connecting the parent graph, we cannot do anything. | |||
488 | if (input->p_ref || output->p_ref) | |||
489 | continue; | |||
490 | int flag = 0; | |||
491 | for (j = 0; !flag && j < bind_size; j++) | |||
492 | flag = (binds[j].d == node->inputs[i] || binds[j].d == node->outputs[i]); | |||
493 | for (j = 0; !flag && j < output_size; j++) | |||
494 | flag = (outputs[j].d == node->inputs[i] || outputs[j].d == node->outputs[i]); | |||
495 | // Either input or output cannot be in the outputs nor the binds. | |||
496 | if (flag) | |||
497 | continue; | |||
498 | // If the type is the same, check which one is the alias. | |||
499 | // We always prefer alias. | |||
500 | if (output->alias_ref) | |||
501 | refs[node->inputs[i]] = node->outputs[i]; | |||
502 | else // if (input->alias_ref), else | |||
503 | refs[node->outputs[i]] = node->inputs[i]; | |||
504 | } | |||
505 | } ccv_nnc_graph_visit_endfor} } | |||
506 | // Make sure refs reference to the end. | |||
507 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
508 | if (refs[i] >= 0) | |||
509 | { | |||
510 | int ref = refs[i]; | |||
511 | while (refs[ref] >= 0) | |||
512 | ref = refs[i]; | |||
513 | refs[i] = ref; | |||
514 | } | |||
515 | updated_refs = _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 0 /* We still need these exec that generates the refs. */); | |||
516 | } while (updated_refs); | |||
517 | ccfreefree(refs); | |||
518 | ccfreefree(has_alias); | |||
519 | // Now, all references updated, remove data transfers that sources and destinations are the same. | |||
520 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
521 | // If already marked as dead, skip. | |||
522 | if (exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
523 | continue; | |||
524 | if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD && | |||
525 | node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD && | |||
526 | // Conversion, if the datatype is the same, it is the data transfer op. | |||
527 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD && | |||
528 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD && | |||
529 | // Format transform, if the format is the same, it is the data transfer op. | |||
530 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD && | |||
531 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD) | |||
532 | continue; | |||
533 | for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size. | |||
534 | if (node->inputs[i] == node->outputs[i]) | |||
535 | { | |||
536 | if (i + 1 < node->output_size) | |||
537 | { | |||
538 | node->inputs[i] = node->inputs[i + 1]; | |||
539 | node->outputs[i] = node->outputs[i + 1]; | |||
540 | } | |||
541 | --node->output_size; | |||
542 | --i; | |||
543 | } | |||
544 | node->input_size = node->output_size; | |||
545 | ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))))->input_size = node->input_size; | |||
546 | ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))))->output_size = node->output_size; | |||
547 | // Remove this data transfer node if it has no outputs. | |||
548 | if (node->output_size == 0) | |||
549 | exec_dead[idx >> 5] |= (1u << (idx & 0x1f)); | |||
550 | } ccv_nnc_graph_visit_endfor} } | |||
551 | } | |||
552 | ||||
553 | typedef struct { | |||
554 | uint32_t fused_op; // The final fused op id. | |||
555 | uint32_t ops_seq[2]; // The sequence of commands to identify. | |||
556 | int ops_seq_size; | |||
557 | int ops_info_select; // Which ops' info will be selected. | |||
558 | struct { | |||
559 | int type; // Whether it is input, or output. It doesn't make sense for example, input in ops_seq, but output in fused_op. | |||
560 | int op_idx; // Index into the ops_seq. | |||
561 | int from; // The index in ops_seq. | |||
562 | int to; // The index in fused_op. | |||
563 | } pos[4]; // maps of positions from ops seq to fused_op for inputs (outputs). | |||
564 | int pos_size; | |||
565 | } ccv_nnc_ops_fusion_t; | |||
566 | ||||
567 | enum { | |||
568 | CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
569 | CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
570 | }; | |||
571 | ||||
572 | const static int ccv_nnc_ops_fusion_io_size = 2; | |||
573 | const static ccv_nnc_ops_fusion_t ccv_nnc_ops_fusions[] = { | |||
574 | { | |||
575 | .fused_op = CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD, | |||
576 | .ops_seq = { | |||
577 | CCV_NNC_SOFTMAX_FORWARD, CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, | |||
578 | }, | |||
579 | .ops_seq_size = 2, | |||
580 | .ops_info_select = 1, | |||
581 | .pos = { | |||
582 | { | |||
583 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
584 | .op_idx = 0, | |||
585 | .from = 0, | |||
586 | .to = 0, | |||
587 | }, | |||
588 | { | |||
589 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
590 | .op_idx = 1, | |||
591 | .from = 1, | |||
592 | .to = 1, | |||
593 | }, | |||
594 | { | |||
595 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
596 | .op_idx = 0, | |||
597 | .from = 0, | |||
598 | .to = 1, | |||
599 | }, | |||
600 | { | |||
601 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
602 | .op_idx = 1, | |||
603 | .from = 0, | |||
604 | .to = 0, | |||
605 | }, | |||
606 | }, | |||
607 | .pos_size = 4, | |||
608 | }, | |||
609 | { | |||
610 | .fused_op = CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD, | |||
611 | .ops_seq = { | |||
612 | CCV_NNC_SIGMOID_FORWARD, CCV_NNC_BINARY_CROSSENTROPY_FORWARD, | |||
613 | }, | |||
614 | .ops_seq_size = 2, | |||
615 | .ops_info_select = 1, | |||
616 | .pos = { | |||
617 | { | |||
618 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
619 | .op_idx = 0, | |||
620 | .from = 0, | |||
621 | .to = 0, | |||
622 | }, | |||
623 | { | |||
624 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
625 | .op_idx = 1, | |||
626 | .from = 1, | |||
627 | .to = 1, | |||
628 | }, | |||
629 | { | |||
630 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
631 | .op_idx = 0, | |||
632 | .from = 0, | |||
633 | .to = 1, | |||
634 | }, | |||
635 | { | |||
636 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
637 | .op_idx = 1, | |||
638 | .from = 0, | |||
639 | .to = 0, | |||
640 | }, | |||
641 | }, | |||
642 | .pos_size = 4, | |||
643 | } | |||
644 | }; | |||
645 | ||||
646 | static int _ccv_nnc_find_ops_for_fusion(const ccv_nnc_ops_fusion_t* const fusion, const int ops_idx, const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const uint32_t* const exec_dead, const int exec_idx, int* const fusing_exec_symbols) | |||
647 | { | |||
648 | if (exec_dead[exec_idx >> 5] & (1u << (exec_idx & 0x1f))) | |||
649 | return 0; | |||
650 | const ccv_nnc_graph_exec_symbol_info_t* const node = exec_symbol_info + exec_idx; | |||
651 | // Doesn't match the ops_seq, return 0. | |||
652 | if (fusion->ops_seq[ops_idx] != node->cmd.cmd) | |||
653 | return 0; | |||
654 | fusing_exec_symbols[ops_idx] = exec_idx; | |||
655 | // If already reached the end, we are good. | |||
656 | if (ops_idx == fusion->ops_seq_size - 1) | |||
657 | return 1; | |||
658 | // Otherwise, we need to go on, but don't have any to follow-up. | |||
659 | if (!node->outgoings || !node->outgoings->rnum) | |||
660 | return 0; | |||
661 | int i; | |||
662 | for (i = 0; i < node->outgoings->rnum; i++) | |||
663 | if (_ccv_nnc_find_ops_for_fusion(fusion, ops_idx + 1, exec_symbol_info, exec_dead, *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))), fusing_exec_symbols)) | |||
664 | return 1; | |||
665 | return 0; | |||
666 | } | |||
667 | ||||
668 | static void _ccv_nnc_symbolic_graph_ops_fusion(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
669 | { | |||
670 | uint32_t* const exec_dead = simplify->exec_dead; | |||
671 | ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info; | |||
672 | int i, j; | |||
673 | int fusing_exec_symbols[sizeof(ccv_nnc_ops_fusions->ops_seq)]; | |||
674 | int fused_inputs[ccv_nnc_ops_fusion_io_size]; // 2 is just a number based on the ops_fusion. | |||
675 | int fused_outputs[ccv_nnc_ops_fusion_io_size]; | |||
676 | ccv_nnc_graph_visit_for(simplify->visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (exec_symbol_info)) const node __attribute__((unused)) = (exec_symbol_info ) + idx; { | |||
677 | // If already marked as dead, skip. | |||
678 | if (exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
679 | continue; | |||
680 | // Run through rudimentary pattern matching for ops_seq. There are better ways to do this (immediately come to mind, Boyer-Moore). However, this is simpler to code. | |||
681 | // If I am running into performance issues with this, I would be very happy. | |||
682 | for (i = 0; i < sizeof(ccv_nnc_ops_fusions) / sizeof(ccv_nnc_ops_fusion_t); i++) | |||
683 | { | |||
684 | const ccv_nnc_ops_fusion_t* const ops_fusion = ccv_nnc_ops_fusions + i; | |||
685 | // Check to see if a list of symbols are possible. | |||
686 | if (ops_fusion->ops_seq[0] == node->cmd.cmd && | |||
687 | _ccv_nnc_find_ops_for_fusion(ops_fusion, 0, exec_symbol_info, exec_dead, idx, fusing_exec_symbols)) | |||
688 | { | |||
689 | // Go through all the inputs and outputs, check if they exists and are mapped. | |||
690 | // TODO: the mapping can be more sophisticated than what we have here. | |||
691 | // Also, I need to check if some inputs / outputs cannot be mapped, then we cannot continue. | |||
692 | for (j = 0; j < ccv_nnc_ops_fusion_io_size; j++) | |||
693 | fused_inputs[j] = fused_outputs[j] = CCV_NNC_NO_TENSOR_SYMBOL; | |||
694 | int input_size = 0, output_size = 0; | |||
695 | for (j = 0; j < ops_fusion->pos_size; j++) | |||
696 | { | |||
697 | ccv_nnc_graph_exec_symbol_info_t* const fusing_op = exec_symbol_info + fusing_exec_symbols[ops_fusion->pos[j].op_idx]; | |||
698 | switch (ops_fusion->pos[j].type) | |||
699 | { | |||
700 | case CCV_NNC_OPS_FUSION_INPUT_INDEX: | |||
701 | fused_inputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->input_size ? fusing_op->inputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL; | |||
702 | input_size = ccv_max(input_size, ops_fusion->pos[j].to + 1)({ typeof (input_size) _a = (input_size); typeof (ops_fusion-> pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a > _b ) ? _a : _b; }); | |||
703 | break; | |||
704 | case CCV_NNC_OPS_FUSION_OUTPUT_INDEX: | |||
705 | fused_outputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->output_size ? fusing_op->outputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL; | |||
706 | output_size = ccv_max(output_size, ops_fusion->pos[j].to + 1)({ typeof (output_size) _a = (output_size); typeof (ops_fusion ->pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a > _b) ? _a : _b; }); | |||
707 | break; | |||
708 | } | |||
709 | } | |||
710 | const ccv_nnc_cmd_param_t info = exec_symbol_info[fusing_exec_symbols[ops_fusion->ops_info_select]].cmd.info; | |||
711 | // Modify the first node so it is the correct type and value. I need to look back to the actual graph to get the info. | |||
712 | ccv_nnc_graph_exec_symbol_info_t* const actual_node = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))); | |||
713 | actual_node->cmd.cmd = node->cmd.cmd = ops_fusion->fused_op; | |||
714 | actual_node->cmd.info = node->cmd.info = info; | |||
715 | if (node->input_size + node->output_size < input_size + output_size) | |||
716 | actual_node->inputs = node->inputs = node->inputs ? ccreallocrealloc(node->inputs, sizeof(int) * (input_size + output_size)) : ccmallocmalloc(sizeof(int) * (input_size + output_size)); | |||
717 | actual_node->outputs = node->outputs = node->inputs + input_size; | |||
718 | actual_node->input_size = node->input_size = input_size; | |||
719 | actual_node->output_size = node->output_size = output_size; | |||
720 | memcpy(node->inputs, fused_inputs, sizeof(int) * input_size); | |||
721 | memcpy(node->outputs, fused_outputs, sizeof(int) * output_size); | |||
722 | // Afterwards, mark the rest as dead. | |||
723 | for (j = 1; j < ops_fusion->ops_seq_size; j++) | |||
724 | exec_dead[fusing_exec_symbols[j] >> 5] |= (1u << (fusing_exec_symbols[j] & 0x1f)); | |||
725 | break; | |||
726 | } | |||
727 | } | |||
728 | } ccv_nnc_graph_visit_endfor} } | |||
729 | } | |||
730 | ||||
731 | static void _ccv_nnc_symbolic_graph_pruning_undead_exec(ccv_nnc_symbolic_graph_simplify_t* const simplify, const int exec_idx, uint32_t* const tensor_visited, ccv_array_t* const next) | |||
732 | { | |||
733 | assert(exec_idx >= 0)((void) sizeof ((exec_idx >= 0) ? 1 : 0), __extension__ ({ if (exec_idx >= 0) ; else __assert_fail ("exec_idx >= 0" , "ccv_nnc_symbolic_graph_simplify.c", 733, __extension__ __PRETTY_FUNCTION__ ); })); | |||
734 | uint32_t* const exec_dead = simplify->exec_dead; | |||
735 | uint32_t* const tensor_dead = simplify->tensor_dead; | |||
736 | exec_dead[exec_idx >> 5] &= ~(1u << (exec_idx & 0x1f)); // Undead the exec. | |||
737 | ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info + exec_idx; | |||
738 | int i; | |||
739 | if (exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_FORWARD || | |||
740 | exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_BACKWARD || | |||
741 | exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_FORWARD || | |||
742 | exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD) | |||
743 | { | |||
744 | // All of its inputs / outputs need to be undead for these commands. | |||
745 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
746 | { | |||
747 | const int d = exec_symbol_info->inputs[i]; | |||
748 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
749 | { | |||
750 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
751 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
752 | } | |||
753 | } | |||
754 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
755 | { | |||
756 | const int d = exec_symbol_info->outputs[i]; | |||
757 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
758 | { | |||
759 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
760 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
761 | } | |||
762 | } | |||
763 | return; | |||
764 | } | |||
765 | // Go through the input / output, to make sure that all of them can be available. | |||
766 | const int input_bitmask_size = (exec_symbol_info->input_size + 63) >> 6; | |||
767 | const int output_bitmask_size = (exec_symbol_info->output_size + 63) >> 6; | |||
768 | uint64_t input_bitmasks[ccv_max(1, input_bitmask_size)({ typeof (1) _a = (1); typeof (input_bitmask_size) _b = (input_bitmask_size ); (_a > _b) ? _a : _b; })]; | |||
769 | for (i = 0; i
| |||
770 | input_bitmasks[i] = 0; | |||
771 | uint64_t output_bitmasks[ccv_max(1, output_bitmask_size)({ typeof (1) _a = (1); typeof (output_bitmask_size) _b = (output_bitmask_size ); (_a > _b) ? _a : _b; })]; | |||
772 | for (i = 0; i
| |||
773 | output_bitmasks[i] = 0; | |||
774 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
775 | if (exec_symbol_info->inputs[i] >= 0) | |||
776 | input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
777 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
778 | if (exec_symbol_info->outputs[i] >= 0) | |||
779 | output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| ||||
780 | // First, mark everything with bitmasks, and verify it works. | |||
781 | assert(ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))((void) sizeof ((ccv_nnc_cmd_bitmask(exec_symbol_info->cmd , exec_symbol_info->input_size, exec_symbol_info->output_size , input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size )) ? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_bitmask(exec_symbol_info ->cmd, exec_symbol_info->input_size, exec_symbol_info-> output_size, input_bitmasks, input_bitmask_size, output_bitmasks , output_bitmask_size)) ; else __assert_fail ("ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)" , "ccv_nnc_symbolic_graph_simplify.c", 781, __extension__ __PRETTY_FUNCTION__ ); })); | |||
782 | int flag; | |||
783 | do { | |||
784 | flag = 0; | |||
785 | // Try to eliminate one at a time. Go over output first. | |||
786 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
787 | { | |||
788 | const int d = exec_symbol_info->outputs[i]; | |||
789 | // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all. | |||
790 | if (d >= 0 && (tensor_dead[d >> 5] & (1u << (d & 0x1f))) && | |||
791 | (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))) | |||
792 | { | |||
793 | output_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
794 | if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)) | |||
795 | flag = 1; | |||
796 | else // Reset the bitmask. | |||
797 | output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
798 | } | |||
799 | } | |||
800 | // For inputs, no matter if it s dead or not, we try to limit our input to the smallest number. | |||
801 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
802 | { | |||
803 | const int d = exec_symbol_info->inputs[i]; | |||
804 | // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all. | |||
805 | if (d >= 0 && (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))) | |||
806 | { | |||
807 | input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
808 | if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)) | |||
809 | flag = 1; | |||
810 | else // Reset the bitmask. | |||
811 | input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
812 | } | |||
813 | } | |||
814 | } while (flag); | |||
815 | // Now we know which one to keep, which one to undead. | |||
816 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
817 | if (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
818 | { | |||
819 | const int d = exec_symbol_info->inputs[i]; | |||
820 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
821 | { | |||
822 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
823 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
824 | } | |||
825 | } else { | |||
826 | // Clean up the inputs. | |||
827 | exec_symbol_info->inputs[i] = CCV_NNC_NO_TENSOR_SYMBOL; | |||
828 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(exec_idx))))->inputs == exec_symbol_info->inputs ) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((simplify->graph->exec_symbol_info) ->data)) + (size_t)(simplify->graph->exec_symbol_info )->rsize * (size_t)(exec_idx))))->inputs == exec_symbol_info ->inputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs" , "ccv_nnc_symbolic_graph_simplify.c", 828, __extension__ __PRETTY_FUNCTION__ ); })); | |||
829 | } | |||
830 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
831 | if (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
832 | { | |||
833 | const int d = exec_symbol_info->outputs[i]; | |||
834 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
835 | { | |||
836 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
837 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
838 | } | |||
839 | } else { | |||
840 | // Clean up the outputs. | |||
841 | exec_symbol_info->outputs[i] = CCV_NNC_NO_TENSOR_SYMBOL; | |||
842 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(exec_idx))))->outputs == exec_symbol_info-> outputs) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((simplify->graph->exec_symbol_info) ->data)) + (size_t)(simplify->graph->exec_symbol_info )->rsize * (size_t)(exec_idx))))->outputs == exec_symbol_info ->outputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs" , "ccv_nnc_symbolic_graph_simplify.c", 842, __extension__ __PRETTY_FUNCTION__ ); })); | |||
843 | } | |||
844 | } | |||
845 | ||||
846 | static void _ccv_nnc_symbolic_graph_pruning(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
847 | { | |||
848 | uint32_t* const tensor_visited = (uint32_t*)cccalloccalloc(sizeof(uint32_t), (simplify->tensor_symbol_info_size + 31) >> 5); | |||
849 | ccv_array_t* const preserve[2] = { | |||
850 | ccv_array_new(sizeof(int), output_size, 0), | |||
851 | ccv_array_new(sizeof(int), 0, 0), | |||
852 | }; | |||
853 | int i, j; | |||
854 | ccv_array_t** const r_alias_refs = (ccv_array_t**)cccalloccalloc(sizeof(ccv_array_t*), simplify->tensor_symbol_info_size); | |||
855 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
856 | if (simplify->tensor_symbol_info[i].alias_ref) | |||
857 | { | |||
858 | const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1; | |||
859 | assert(alias_ref < simplify->tensor_symbol_info_size)((void) sizeof ((alias_ref < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (alias_ref < simplify-> tensor_symbol_info_size) ; else __assert_fail ("alias_ref < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 859, __extension__ __PRETTY_FUNCTION__ ); })); | |||
860 | if (!r_alias_refs[alias_ref]) | |||
861 | r_alias_refs[alias_ref] = ccv_array_new(sizeof(int), 1, 0); | |||
862 | ccv_array_push(r_alias_refs[alias_ref], &i); | |||
863 | } | |||
864 | uint32_t* const exec_dead = simplify->exec_dead; | |||
865 | uint32_t* const tensor_dead = simplify->tensor_dead; | |||
866 | int* const output_execs = simplify->output_execs; | |||
867 | _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify); | |||
868 | // Mark everything visited as dead. | |||
869 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
870 | exec_dead[idx >> 5] |= (1u << (idx & 0x1f)); | |||
871 | for (i = 0; i < node->input_size; i++) | |||
872 | { | |||
873 | const int d = node->inputs[i]; | |||
874 | if (d >= 0) | |||
875 | tensor_dead[d >> 5] |= (1u << (d & 0x1f)); | |||
876 | } | |||
877 | for (i = 0; i < node->output_size; i++) | |||
878 | { | |||
879 | const int d = node->outputs[i]; | |||
880 | if (d >= 0) | |||
881 | tensor_dead[d >> 5] |= (1u << (d & 0x1f)); | |||
882 | } | |||
883 | } ccv_nnc_graph_visit_endfor} } | |||
884 | // If the tensor symbol is used by other exec that is not visited, unmark it. | |||
885 | for (i = 0; i < simplify->exec_symbol_info_size; i++) | |||
886 | { | |||
887 | if (exec_dead[i >> 5] & (1u << (i & 0x1f))) | |||
888 | continue; | |||
889 | const ccv_nnc_graph_exec_symbol_info_t* const node = simplify->exec_symbol_info + i; | |||
890 | for (j = 0; j < node->input_size; j++) | |||
891 | { | |||
892 | const int d = node->inputs[j]; | |||
893 | // Undead it. | |||
894 | if (d >= 0) | |||
895 | tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
896 | } | |||
897 | for (j = 0; j < node->output_size; j++) | |||
898 | { | |||
899 | const int d = node->outputs[j]; | |||
900 | // Undead it. | |||
901 | if (d >= 0) | |||
902 | tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
903 | } | |||
904 | } | |||
905 | for (i = 0; i < output_size; i++) | |||
906 | ccv_array_push(preserve[0], &outputs[i].d); | |||
907 | int p = 0, q = 1; | |||
908 | // BFS to mark execs / tensors as not dead. | |||
909 | while (preserve[p]->rnum > 0) | |||
910 | { | |||
911 | ccv_array_clear(preserve[q]); | |||
912 | // First, undead all relevant tensors. | |||
913 | for (i = 0; i
| |||
914 | { | |||
915 | const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve [p])->rsize * (size_t)(i))); | |||
916 | // Undead the outputs. | |||
917 | tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
918 | int alias_ref = d; | |||
919 | if (simplify->tensor_symbol_info[d].alias_ref) | |||
920 | { | |||
921 | alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1; | |||
922 | tensor_dead[alias_ref >> 5] &= ~(1u << (alias_ref & 0x1f)); | |||
923 | assert(r_alias_refs[alias_ref])((void) sizeof ((r_alias_refs[alias_ref]) ? 1 : 0), __extension__ ({ if (r_alias_refs[alias_ref]) ; else __assert_fail ("r_alias_refs[alias_ref]" , "ccv_nnc_symbolic_graph_simplify.c", 923, __extension__ __PRETTY_FUNCTION__ ); })); | |||
924 | } | |||
925 | if (r_alias_refs[alias_ref]) | |||
926 | for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++) | |||
927 | { | |||
928 | const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t )(r_alias_refs[alias_ref])->rsize * (size_t)(j))); | |||
929 | if (output_execs[b] >= 0) // Only revive if it is written alias. | |||
930 | tensor_dead[b >> 5] &= ~(1u << (b & 0x1f)); | |||
931 | } | |||
932 | } | |||
933 | for (i = 0; i < preserve[p]->rnum; i++) | |||
934 | { | |||
935 | const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve [p])->rsize * (size_t)(i))); | |||
936 | const int output_exec = output_execs[d]; | |||
937 | // Undead the exec. | |||
938 | if (output_exec >= 0) | |||
939 | _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]); | |||
940 | int alias_ref = d; | |||
941 | if (simplify->tensor_symbol_info[d].alias_ref) | |||
942 | { | |||
943 | alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1; | |||
944 | const int output_exec = output_execs[alias_ref]; | |||
945 | if (output_exec >= 0) | |||
946 | _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]); | |||
947 | } | |||
948 | if (r_alias_refs[alias_ref]) | |||
949 | for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++) | |||
950 | { | |||
951 | const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t )(r_alias_refs[alias_ref])->rsize * (size_t)(j))); | |||
952 | const int output_exec = output_execs[b]; | |||
953 | if (output_exec >= 0) | |||
954 | _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]); | |||
955 | } | |||
956 | } | |||
957 | CCV_SWAP(p, q, i)((i) = (p), (p) = (q), (q) = (i)); | |||
958 | } | |||
959 | ccfreefree(tensor_visited); | |||
960 | ccv_array_free(preserve[0]); | |||
961 | ccv_array_free(preserve[1]); | |||
962 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
963 | if (r_alias_refs[i]) | |||
964 | ccv_array_free(r_alias_refs[i]); | |||
965 | ccfreefree(r_alias_refs); | |||
966 | } | |||
967 | ||||
968 | void ccv_nnc_symbolic_graph_simplify(ccv_nnc_symbolic_graph_t* const graph, const int* const passes, const int pass_size, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size) | |||
969 | { | |||
970 | ccv_nnc_symbolic_graph_simplify_t* const simplify = _ccv_nnc_symbolic_graph_simplify_new(graph, sources, source_size, destinations, destination_size); | |||
971 | int i; | |||
972 | for (i = 0; i < pass_size; i++) | |||
| ||||
973 | switch (passes[i]) | |||
974 | { | |||
975 | case CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION: | |||
976 | _ccv_nnc_symbolic_graph_common_subexpression_elimination(simplify, outputs, output_size); | |||
977 | break; | |||
978 | case CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT: | |||
979 | _ccv_nnc_symbolic_graph_data_transfer_opt(simplify, binds, bind_size, outputs, output_size); | |||
980 | break; | |||
981 | case CCV_NNC_SIMPLIFY_GRAPH_PRUNING: | |||
982 | _ccv_nnc_symbolic_graph_pruning(simplify, outputs, output_size); | |||
983 | break; | |||
984 | case CCV_NNC_SIMPLIFY_OPS_FUSION: | |||
985 | _ccv_nnc_symbolic_graph_ops_fusion(simplify, outputs, output_size); | |||
986 | break; | |||
987 | } | |||
988 | _ccv_nnc_symbolic_graph_simplify_apply(simplify); | |||
989 | _ccv_nnc_symbolic_graph_simplify_free(simplify); | |||
990 | } |