File: | nnc/ccv_nnc_symbolic_graph_simplify.c |
Warning: | line 800, column 27 The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" | |||
2 | #include "ccv_nnc_easy.h" | |||
3 | #include "ccv_nnc_internal.h" | |||
4 | #include "ccv_internal.h" | |||
5 | #include "_ccv_nnc_symbolic_graph.h" | |||
6 | #include "3rdparty/siphash/siphash24.h" | |||
7 | ||||
8 | // MARK - Level-3.5 API | |||
9 | ||||
10 | static uint8_t key_siphash[16] = "graphcsekvlibnnc"; | |||
11 | ||||
12 | typedef struct { | |||
13 | int tensor_symbol_info_size; | |||
14 | int exec_symbol_info_size; | |||
15 | ccv_nnc_symbolic_graph_t* graph; | |||
16 | ccv_nnc_graph_visit_t* visit; | |||
17 | ccv_nnc_tensor_symbol_info_t* tensor_symbol_info; | |||
18 | ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info; | |||
19 | uint32_t* exec_dead; // Mark a exec is dead and need to be cleared, each bit represent a exec. | |||
20 | uint32_t* tensor_dead; // Mark a tensor is dead and need to be cleared, each bit represent a tensor. | |||
21 | int* output_execs; // Mapping from tensor to the exec that generates this tensor. | |||
22 | } ccv_nnc_symbolic_graph_simplify_t; | |||
23 | ||||
24 | static void _ccv_nnc_symbolic_graph_simplify_update_output_execs(ccv_nnc_symbolic_graph_simplify_t* const simplify) | |||
25 | { | |||
26 | int i; | |||
27 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
28 | simplify->output_execs[i] = -1; | |||
29 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
30 | if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
31 | continue; | |||
32 | for (i = 0; i < node->output_size; i++) | |||
33 | if (node->outputs[i] >= 0) | |||
34 | simplify->output_execs[node->outputs[i]] = idx; // A tensor can only be written once. | |||
35 | } ccv_nnc_graph_visit_endfor} } | |||
36 | } | |||
37 | ||||
38 | static ccv_nnc_symbolic_graph_simplify_t* _ccv_nnc_symbolic_graph_simplify_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size) | |||
39 | { | |||
40 | ccv_nnc_symbolic_graph_simplify_t* const simplify = (ccv_nnc_symbolic_graph_simplify_t*)ccmallocmalloc(sizeof(ccv_nnc_symbolic_graph_simplify_t)); | |||
41 | simplify->graph = graph; | |||
42 | simplify->visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0), graph->exec_symbol_info->rnum, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((graph->exec_symbol_info->rnum) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_symbol_info-> rnum); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_i_] .outgoings->rnum : 0; const int _heap_mem_ = ((graph->exec_symbol_info ->rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t *)malloc(sizeof(ccv_nnc_incoming_t) * (graph->exec_symbol_info ->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info-> rnum) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t *)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (graph->exec_symbol_info ->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info-> rnum) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof (ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum)) ; int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (graph-> exec_symbol_info->rnum)), (int32_t*)(_incomings_ + (graph-> exec_symbol_info->rnum)) + (graph->exec_symbol_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_symbol_info ->rnum); for (_i_ = 0; _i_ < (source_size); _i_++) { (( void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == graph) ; else __assert_fail ( "(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c" , 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(sources )[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_ [2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[ _idx_].r = 2; if (((ccv_nnc_graph_exec_symbol_info_t*)((void* )(((char*)((graph->exec_symbol_info)->data)) + (size_t) (graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*) ((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_ [d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_symbol_info ->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_symbol_info->rnum)) ; else __assert_fail ( "_exist_size_[_q_] < (graph->exec_symbol_info->rnum)" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ( (sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if (((ccv_nnc_graph_exec_symbol_info_t*)((void* )(((char*)((graph->exec_symbol_info)->data)) + (size_t) (graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*) ((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_ [d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_ [d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_ [d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph ->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c" , 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations )[_i_].d].r = 5; _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_ [0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph ->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c" , 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations )[_i_].d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++ ) { ((void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c" , 42, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_ ] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = ( source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_ ]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_ ->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++ _visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_ [_idx_].r = 7; } if (((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings->rnum == 1) { const int d = *(int*)((void*)((( char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*) ((graph->exec_symbol_info)->data)) + (size_t)(graph-> exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t*)( (void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { _exists_[_p_][_i_] = d; continue ; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*) ((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_ [d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { ((void) sizeof ( (_exist_size_[_q_] < (graph->exec_symbol_info->rnum) ) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph ->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ( (void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c" , 42, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_ [(destinations)[_i_].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_].d].c == 0) ? 1 : 0) , __extension__ ({ if (_incomings_[(destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(destinations)[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = (((destinations )[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_ [(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_ ) free(_incomings_); } while (0);; ((void) sizeof ((_visit_-> size <= (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (graph->exec_symbol_info-> rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_symbol_info->rnum)" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _visit_; }); | |||
43 | simplify->tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_info_t) * graph->tensor_symbol_info->rnum); | |||
44 | simplify->exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_info_t) * graph->exec_symbol_info->rnum); | |||
45 | ccv_nnc_symbolic_graph_symbol_infer(graph, simplify->visit, sources, source_size, destinations, destination_size, 0, 0, simplify->tensor_symbol_info, simplify->exec_symbol_info); | |||
46 | simplify->tensor_symbol_info_size = graph->tensor_symbol_info->rnum; | |||
47 | simplify->exec_symbol_info_size = graph->exec_symbol_info->rnum; | |||
48 | simplify->exec_dead = cccalloccalloc(((simplify->exec_symbol_info_size + 31) >> 5) + ((simplify->tensor_symbol_info_size + 31) >> 5), sizeof(uint32_t)); | |||
49 | simplify->tensor_dead = simplify->exec_dead + ((simplify->exec_symbol_info_size + 31) >> 5); | |||
50 | simplify->output_execs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size); | |||
51 | return simplify; | |||
52 | } | |||
53 | ||||
54 | static void _ccv_nnc_symbolic_graph_simplify_apply(ccv_nnc_symbolic_graph_simplify_t* const simplify) | |||
55 | { | |||
56 | int i, j; | |||
57 | for (i = 0; i < simplify->exec_symbol_info_size; i++) | |||
58 | if (simplify->exec_dead[i >> 5] & (1u << (i & 0x1f))) | |||
59 | ccv_nnc_graph_exec_symbol_free(simplify->graph, (ccv_nnc_graph_exec_symbol_t){ | |||
60 | .d = i, | |||
61 | .graph = simplify->graph, | |||
62 | }); | |||
63 | else // If it is not marked as dead, go through to unmark tensor | |||
64 | for (j = 0; j < simplify->exec_symbol_info[i].output_size; j++) | |||
65 | { | |||
66 | const int d = simplify->exec_symbol_info[i].outputs[j]; | |||
67 | if (d >= 0) | |||
68 | simplify->tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
69 | } | |||
70 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
71 | if (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))) | |||
72 | ccv_nnc_tensor_symbol_free(simplify->graph, (ccv_nnc_tensor_symbol_t){ | |||
73 | .d = i, | |||
74 | .graph = simplify->graph, | |||
75 | }); | |||
76 | } | |||
77 | ||||
78 | static void _ccv_nnc_symbolic_graph_simplify_free(ccv_nnc_symbolic_graph_simplify_t* const simplify) | |||
79 | { | |||
80 | ccv_nnc_graph_visit_free(simplify->visit); | |||
81 | ccfreefree(simplify->tensor_symbol_info); | |||
82 | ccfreefree(simplify->exec_symbol_info); | |||
83 | ccfreefree(simplify->exec_dead); | |||
84 | ccfreefree(simplify->output_execs); | |||
85 | ccfreefree(simplify); | |||
86 | } | |||
87 | ||||
88 | typedef struct { | |||
89 | int d; | |||
90 | int ifbit; | |||
91 | uint64_t hash; | |||
92 | } ccv_nnc_cse_hash_t; | |||
93 | ||||
94 | static int _ccv_nnc_cse_hash_find(ccv_nnc_cse_hash_t* const hash_map, const uint64_t hash, const int map_size) | |||
95 | { | |||
96 | assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if ( hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c" , 96, __extension__ __PRETTY_FUNCTION__); })); | |||
97 | int i, j; | |||
98 | i = (hash - 1) % map_size; | |||
99 | for (j = 0; ; j++, i++) | |||
100 | { | |||
101 | if (i >= map_size) | |||
102 | i = 0; | |||
103 | if (j > hash_map[i].ifbit) | |||
104 | return -1; | |||
105 | if (hash_map[i].hash == hash) | |||
106 | return hash_map[i].d; | |||
107 | } | |||
108 | } | |||
109 | ||||
110 | static void _ccv_nnc_cse_hash_add(ccv_nnc_cse_hash_t* const hash_map, uint64_t hash, int d, const int map_size) | |||
111 | { | |||
112 | assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if ( hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c" , 112, __extension__ __PRETTY_FUNCTION__); })); | |||
113 | int i, j; | |||
114 | i = (hash - 1) % map_size; | |||
115 | for (j = 0; ; j++, i++) | |||
116 | { | |||
117 | if (i >= map_size) | |||
118 | i = 0; | |||
119 | if (hash_map[i].hash == hash) // Already exists, do nothing. | |||
120 | return; | |||
121 | if (hash_map[i].hash == 0) | |||
122 | { | |||
123 | // Insert. | |||
124 | hash_map[i].d = d; | |||
125 | hash_map[i].ifbit = j; | |||
126 | hash_map[i].hash = hash; | |||
127 | return; | |||
128 | } | |||
129 | if (j > hash_map[i].ifbit) | |||
130 | { | |||
131 | const ccv_nnc_cse_hash_t old_hash = hash_map[i]; | |||
132 | // Swap, and continue, until find an empty slot. | |||
133 | hash_map[i].d = d; | |||
134 | hash_map[i].ifbit = j; | |||
135 | hash_map[i].hash = hash; | |||
136 | d = old_hash.d; | |||
137 | j = old_hash.ifbit; | |||
138 | hash = old_hash.hash; | |||
139 | } | |||
140 | } | |||
141 | } | |||
142 | ||||
143 | static int _ccv_nnc_symbolic_graph_update_refs(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const int* const refs, const int output_exec_ref_dead) | |||
144 | { | |||
145 | int i, j; | |||
146 | // Go over refs, if a tensor is an alias, mark it reference to the new one. | |||
147 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
148 | if (refs[i] >= 0) | |||
149 | // Mark this tensor as dead. | |||
150 | simplify->tensor_dead[i >> 5] |= (1u << (i & 0x1f)); | |||
151 | else if (simplify->tensor_symbol_info[i].alias_ref && refs[simplify->tensor_symbol_info[i].alias_ref - 1] >= 0) { | |||
152 | const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1; | |||
153 | simplify->tensor_symbol_info[i].alias_ref = refs[alias_ref] + 1; | |||
154 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(i))))->alias_ref = refs[alias_ref] + 1; | |||
155 | } | |||
156 | for (i = 0; i < output_size; i++) | |||
157 | // If the output is an alias, that's fine, because if the alias is re-binded, we are good. | |||
158 | simplify->tensor_dead[outputs[i].d >> 5] &= ~(1u << (outputs[i].d & 0x1f)); // Undead for output tensor symbols. | |||
159 | // Merge s_refs if the tensor is dead. | |||
160 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
161 | if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f)))) | |||
162 | { | |||
163 | const int ref = refs[i]; | |||
164 | if (simplify->tensor_symbol_info[i].s_ref && simplify->tensor_symbol_info[i].s_ref->rnum) | |||
165 | { | |||
166 | if (!simplify->tensor_symbol_info[ref].s_ref) // If there is no s_ref, simple, just assign the pointer and set the old one to nil. | |||
167 | { | |||
168 | simplify->tensor_symbol_info[ref].s_ref = simplify->tensor_symbol_info[i].s_ref; | |||
169 | simplify->tensor_symbol_info[i].s_ref = 0; | |||
170 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(i))))->s_ref = 0; | |||
171 | } else { | |||
172 | ccv_array_t* const ref_s_ref = simplify->tensor_symbol_info[ref].s_ref; | |||
173 | ccv_array_t* const i_s_ref = simplify->tensor_symbol_info[i].s_ref; | |||
174 | const int ref_s_ref_rnum = ref_s_ref->rnum; | |||
175 | int flag = 0; | |||
176 | // Detect conflict, if there is, undead. | |||
177 | for (j = 0; !flag && j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref ->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; } ); j++) | |||
178 | { | |||
179 | const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))); | |||
180 | const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)-> rsize * (size_t)(j))); | |||
181 | // If for the same sub-graph, they have different tensors linked, we cannot merge these two. | |||
182 | flag = (ref_s_ref_k > 0 && i_s_ref_k > 0 && ref_s_ref_k != i_s_ref_k); | |||
183 | } | |||
184 | if (flag) | |||
185 | { | |||
186 | simplify->tensor_dead[i >> 5] &= ~(1u << (i & 0x1f)); // Undead | |||
187 | continue; | |||
188 | } | |||
189 | if (ref_s_ref_rnum < i_s_ref->rnum) | |||
190 | { | |||
191 | ccv_array_resize(ref_s_ref, i_s_ref->rnum); | |||
192 | memcpy(ccv_array_get(ref_s_ref, ref_s_ref_rnum)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(ref_s_ref_rnum))), ccv_array_get(i_s_ref, ref_s_ref_rnum)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)-> rsize * (size_t)(ref_s_ref_rnum))), sizeof(int) * (i_s_ref->rnum - ref_s_ref_rnum)); | |||
193 | } | |||
194 | for (j = 0; j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref ->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; } ); j++) | |||
195 | { | |||
196 | const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))); | |||
197 | const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)-> rsize * (size_t)(j))); | |||
198 | assert(ref_s_ref_k == 0 || i_s_ref_k == 0)((void) sizeof ((ref_s_ref_k == 0 || i_s_ref_k == 0) ? 1 : 0) , __extension__ ({ if (ref_s_ref_k == 0 || i_s_ref_k == 0) ; else __assert_fail ("ref_s_ref_k == 0 || i_s_ref_k == 0", "ccv_nnc_symbolic_graph_simplify.c" , 198, __extension__ __PRETTY_FUNCTION__); })); | |||
199 | if (i_s_ref_k) | |||
200 | *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))) = i_s_ref_k; | |||
201 | } | |||
202 | ccv_array_free(simplify->tensor_symbol_info[i].s_ref); | |||
203 | simplify->tensor_symbol_info[i].s_ref = 0; | |||
204 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(i))))->s_ref = 0; | |||
205 | for (j = 0; j < ref_s_ref->rnum; j++) | |||
206 | { | |||
207 | const int ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))) - 1; | |||
208 | if (ref_k >= 0) | |||
209 | { | |||
210 | ccv_nnc_symbolic_graph_t* const sub_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(simplify->graph->sub_graphs, j)((void*)(((char*)((simplify->graph->sub_graphs)->data )) + (size_t)(simplify->graph->sub_graphs)->rsize * ( size_t)(j))); | |||
211 | assert(sub_graph)((void) sizeof ((sub_graph) ? 1 : 0), __extension__ ({ if (sub_graph ) ; else __assert_fail ("sub_graph", "ccv_nnc_symbolic_graph_simplify.c" , 211, __extension__ __PRETTY_FUNCTION__); })); | |||
212 | // Update its p_ref. | |||
213 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, ref_k)((void*)(((char*)((sub_graph->tensor_symbol_info)->data )) + (size_t)(sub_graph->tensor_symbol_info)->rsize * ( size_t)(ref_k))))->p_ref = ref + 1; | |||
214 | } | |||
215 | } | |||
216 | } | |||
217 | assert(simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[i].s_ref == ( (ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify-> graph->tensor_symbol_info)->data)) + (size_t)(simplify-> graph->tensor_symbol_info)->rsize * (size_t)(i))))-> s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info [i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char* )((simplify->graph->tensor_symbol_info)->data)) + (size_t )(simplify->graph->tensor_symbol_info)->rsize * (size_t )(i))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref" , "ccv_nnc_symbolic_graph_simplify.c", 217, __extension__ __PRETTY_FUNCTION__ ); })); | |||
218 | assert(simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify-> graph->tensor_symbol_info)->data)) + (size_t)(simplify-> graph->tensor_symbol_info)->rsize * (size_t)(ref))))-> s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info [ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char *)((simplify->graph->tensor_symbol_info)->data)) + ( size_t)(simplify->graph->tensor_symbol_info)->rsize * (size_t)(ref))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref" , "ccv_nnc_symbolic_graph_simplify.c", 218, __extension__ __PRETTY_FUNCTION__ ); })); | |||
219 | } | |||
220 | } | |||
221 | // Going through refs that we are updating, going through its p_ref to make sure both are updated. | |||
222 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
223 | if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))) && simplify->tensor_symbol_info[i].p_ref) | |||
224 | { | |||
225 | const int ref = refs[i]; | |||
226 | const int p_ref = simplify->tensor_symbol_info[i].p_ref - 1; | |||
227 | assert(p_ref >= 0)((void) sizeof ((p_ref >= 0) ? 1 : 0), __extension__ ({ if (p_ref >= 0) ; else __assert_fail ("p_ref >= 0", "ccv_nnc_symbolic_graph_simplify.c" , 227, __extension__ __PRETTY_FUNCTION__); })); | |||
228 | assert(simplify->graph->p)((void) sizeof ((simplify->graph->p) ? 1 : 0), __extension__ ({ if (simplify->graph->p) ; else __assert_fail ("simplify->graph->p" , "ccv_nnc_symbolic_graph_simplify.c", 228, __extension__ __PRETTY_FUNCTION__ ); })); | |||
229 | ccv_array_t* const s_ref = ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->p->tensor_symbol_info, p_ref)((void*)(((char*)((simplify->graph->p->tensor_symbol_info )->data)) + (size_t)(simplify->graph->p->tensor_symbol_info )->rsize * (size_t)(p_ref))))->s_ref; | |||
230 | const int s_idx = simplify->graph->p_idx - 1; | |||
231 | assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if (s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_simplify.c" , 231, __extension__ __PRETTY_FUNCTION__); })); | |||
232 | assert(s_ref && s_ref->rnum > s_idx)((void) sizeof ((s_ref && s_ref->rnum > s_idx) ? 1 : 0), __extension__ ({ if (s_ref && s_ref->rnum > s_idx) ; else __assert_fail ("s_ref && s_ref->rnum > s_idx" , "ccv_nnc_symbolic_graph_simplify.c", 232, __extension__ __PRETTY_FUNCTION__ ); })); | |||
233 | *(int*)ccv_array_get(s_ref, s_idx)((void*)(((char*)((s_ref)->data)) + (size_t)(s_ref)->rsize * (size_t)(s_idx))) = ref + 1; // Update so it references to the new s_ref. | |||
234 | assert(!simplify->tensor_symbol_info[ref].p_ref)((void) sizeof ((!simplify->tensor_symbol_info[ref].p_ref) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [ref].p_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[ref].p_ref" , "ccv_nnc_symbolic_graph_simplify.c", 234, __extension__ __PRETTY_FUNCTION__ ); })); | |||
235 | simplify->tensor_symbol_info[ref].p_ref = p_ref + 1; | |||
236 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(ref))))->p_ref = p_ref + 1; | |||
237 | } | |||
238 | // Now go over exec to mark them as dead because we don't need these to generate refs. | |||
239 | if (output_exec_ref_dead) | |||
240 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
241 | if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f)))) | |||
242 | { | |||
243 | const int output_exec = simplify->output_execs[i]; | |||
244 | assert(output_exec >= 0)((void) sizeof ((output_exec >= 0) ? 1 : 0), __extension__ ({ if (output_exec >= 0) ; else __assert_fail ("output_exec >= 0" , "ccv_nnc_symbolic_graph_simplify.c", 244, __extension__ __PRETTY_FUNCTION__ ); })); | |||
245 | const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = simplify->exec_symbol_info + output_exec; | |||
246 | int flag = 0; | |||
247 | for (j = 0; !flag && j < symbol_info->output_size; j++) | |||
248 | { | |||
249 | const int d = symbol_info->outputs[j]; | |||
250 | if (d >= 0) | |||
251 | flag = (!(simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))); // If some of the output is not dead, we cannot proceed. | |||
252 | } | |||
253 | if (!flag) // If all outputs are dead, mark the exec as dead. | |||
254 | simplify->exec_dead[output_exec >> 5] |= (1u << (output_exec & 0x1f)); | |||
255 | } | |||
256 | int updated_refs = 0; | |||
257 | // Go over replace inputs / outputs. | |||
258 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
259 | for (i = 0; i < node->input_size; i++) | |||
260 | { | |||
261 | const int d = node->inputs[i]; | |||
262 | if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))) | |||
263 | { | |||
264 | node->inputs[i] = refs[d]; // It can be replaced. | |||
265 | updated_refs = 1; | |||
266 | } | |||
267 | } | |||
268 | for (i = 0; i < node->output_size; i++) | |||
269 | { | |||
270 | const int d = node->outputs[i]; | |||
271 | if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))) | |||
272 | { | |||
273 | node->outputs[i] = refs[d]; // It can be replaced. | |||
274 | updated_refs = 1; | |||
275 | } | |||
276 | } | |||
277 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(idx))))->inputs == node->inputs) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*) ((simplify->graph->exec_symbol_info)->data)) + (size_t )(simplify->graph->exec_symbol_info)->rsize * (size_t )(idx))))->inputs == node->inputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs" , "ccv_nnc_symbolic_graph_simplify.c", 277, __extension__ __PRETTY_FUNCTION__ ); })); | |||
278 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(idx))))->outputs == node->outputs) ? 1 : 0) , __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t*)(( void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))))->outputs == node->outputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs" , "ccv_nnc_symbolic_graph_simplify.c", 278, __extension__ __PRETTY_FUNCTION__ ); })); | |||
279 | } ccv_nnc_graph_visit_endfor} } | |||
280 | const ccv_nnc_graph_exec_symbol_info_t* const p_node_info = simplify->graph->p ? (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->p->exec_symbol_info, simplify->graph->exec_idx - 1)((void*)(((char*)((simplify->graph->p->exec_symbol_info )->data)) + (size_t)(simplify->graph->p->exec_symbol_info )->rsize * (size_t)(simplify->graph->exec_idx - 1))) : 0; | |||
281 | if (p_node_info && (p_node_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)) | |||
282 | // Go over the while inputs as well. | |||
283 | for (i = 0; i < p_node_info->p_while.input_size; i++) | |||
284 | { | |||
285 | const int d = p_node_info->p_while.inputs[i]; | |||
286 | if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))) | |||
287 | { | |||
288 | p_node_info->p_while.inputs[i] = refs[d]; | |||
289 | updated_refs = 1; | |||
290 | } | |||
291 | } | |||
292 | return updated_refs; | |||
293 | } | |||
294 | ||||
295 | // This is a simple common sub-expression elimination implementation, particularly, we only replace the later computed output | |||
296 | // with the identical earlier computed output, and let the "elimination" part to the graph pruning. | |||
297 | static void _ccv_nnc_symbolic_graph_common_subexpression_elimination(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
298 | { | |||
299 | _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify); | |||
300 | // tensor_hash starts with 0s, and it is either marked with the tensor index + 1, or the hash of the computations. | |||
301 | uint64_t* const tensor_hash = (uint64_t*)cccalloccalloc(simplify->tensor_symbol_info_size, sizeof(uint64_t)); | |||
302 | int i; | |||
303 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
304 | // If already marked as dead, skip. | |||
305 | if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
306 | continue; | |||
307 | for (i = 0; i < node->input_size; i++) | |||
308 | { | |||
309 | assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 309, __extension__ __PRETTY_FUNCTION__ ); })); | |||
310 | // If no hash for the input, use the index + 1 as the hash. | |||
311 | if (node->inputs[i] >= 0 && tensor_hash[node->inputs[i]] == 0) | |||
312 | tensor_hash[node->inputs[i]] = node->inputs[i] + 1; | |||
313 | } | |||
314 | // We cannot support graph / custom command (we cannot model them properly). | |||
315 | if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || | |||
316 | node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD || | |||
317 | node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD || | |||
318 | node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD || | |||
319 | // No need to process a opt disabled node. | |||
320 | (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT)) | |||
321 | continue; | |||
322 | uint64_t hashout, hashin[3]; | |||
323 | siphash((uint8_t*)&hashin[0], (const uint8_t*)&node->cmd.info, sizeof(node->cmd.info), key_siphash); | |||
324 | siphash((uint8_t*)&hashin[1], (const uint8_t*)&node->hint, sizeof(node->hint), key_siphash); | |||
325 | hashin[2] = node->cmd.cmd; // Now actually hash the cmd name. | |||
326 | siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash); | |||
327 | // First, hash the cmd and the hints with the cmd. | |||
328 | // Note on alias, we cannot really generate proper hash for alias (yet). Thus, just treat alias as normal tensors. | |||
329 | for (i = 0; i < node->input_size; i++) | |||
330 | { | |||
331 | assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 331, __extension__ __PRETTY_FUNCTION__ ); })); | |||
332 | if (node->inputs[i] >= 0) | |||
333 | { | |||
334 | // Hash using the tensor hash. | |||
335 | hashin[0] = hashout; | |||
336 | hashin[1] = i; // Encode the positional information. | |||
337 | hashin[2] = tensor_hash[node->inputs[i]]; | |||
338 | } else { | |||
339 | // Hash using the input integer (could be special integer). | |||
340 | hashin[0] = hashout; | |||
341 | hashin[1] = i; // Encode the positional information. | |||
342 | hashin[2] = node->inputs[i]; | |||
343 | } | |||
344 | siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash); | |||
345 | } | |||
346 | for (i = 0; i < node->output_size; i++) | |||
347 | if (node->outputs[i] >= 0) | |||
348 | { | |||
349 | assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 349, __extension__ __PRETTY_FUNCTION__ ); })); | |||
350 | // Assigning once, especially now we don't consider aliases. | |||
351 | assert(tensor_hash[node->outputs[i]] == 0)((void) sizeof ((tensor_hash[node->outputs[i]] == 0) ? 1 : 0), __extension__ ({ if (tensor_hash[node->outputs[i]] == 0) ; else __assert_fail ("tensor_hash[node->outputs[i]] == 0" , "ccv_nnc_symbolic_graph_simplify.c", 351, __extension__ __PRETTY_FUNCTION__ ); })); | |||
352 | hashin[0] = hashout; | |||
353 | hashin[1] = i; // Positional information. | |||
354 | siphash((uint8_t*)&hashin[2], (const uint8_t*)&simplify->tensor_symbol_info[node->outputs[i]].info, | |||
355 | sizeof(simplify->tensor_symbol_info[node->outputs[i]].info), key_siphash); | |||
356 | // Generate hash for the output. | |||
357 | siphash((uint8_t*)&tensor_hash[node->outputs[i]], (const uint8_t*)hashin, sizeof(hashin), key_siphash); | |||
358 | } | |||
359 | } ccv_nnc_graph_visit_endfor} } | |||
360 | // Allocate 3 / 2 as much space, for the simple robin-hood open address hash map. | |||
361 | const int map_size = (simplify->tensor_symbol_info_size * 3 + 1) / 2; | |||
362 | int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size); | |||
363 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
364 | refs[i] = -1; | |||
365 | ccv_nnc_cse_hash_t* const hash_map = (ccv_nnc_cse_hash_t*)cccalloccalloc(map_size, sizeof(ccv_nnc_cse_hash_t)); | |||
366 | // Now, all tensors are hashed, identify tensors with the same hash code, replace the ones that accessed later. | |||
367 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
368 | // If already marked as dead, skip. | |||
369 | if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
370 | continue; | |||
371 | // No need to visit a opt disabled node. | |||
372 | if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) | |||
373 | continue; | |||
374 | for (i = 0; i < node->input_size; i++) | |||
375 | if (node->inputs[i] >= 0) | |||
376 | { | |||
377 | const int d = node->inputs[i]; | |||
378 | assert(tensor_hash[d])((void) sizeof ((tensor_hash[d]) ? 1 : 0), __extension__ ({ if (tensor_hash[d]) ; else __assert_fail ("tensor_hash[d]", "ccv_nnc_symbolic_graph_simplify.c" , 378, __extension__ __PRETTY_FUNCTION__); })); | |||
379 | const int new_d = _ccv_nnc_cse_hash_find(hash_map, tensor_hash[d], map_size); | |||
380 | if (new_d >= 0 && new_d != d) | |||
381 | { | |||
382 | // Check whether this can be replaced. | |||
383 | assert(refs[d] == -1 || refs[d] == new_d)((void) sizeof ((refs[d] == -1 || refs[d] == new_d) ? 1 : 0), __extension__ ({ if (refs[d] == -1 || refs[d] == new_d) ; else __assert_fail ("refs[d] == -1 || refs[d] == new_d", "ccv_nnc_symbolic_graph_simplify.c" , 383, __extension__ __PRETTY_FUNCTION__); })); | |||
384 | assert(!simplify->tensor_symbol_info[d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 384, __extension__ __PRETTY_FUNCTION__ ); })); | |||
385 | assert(!simplify->tensor_symbol_info[d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].r_assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].r_assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 385, __extension__ __PRETTY_FUNCTION__ ); })); | |||
386 | assert(!simplify->tensor_symbol_info[d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].bypass_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].bypass_ref" , "ccv_nnc_symbolic_graph_simplify.c", 386, __extension__ __PRETTY_FUNCTION__ ); })); | |||
387 | assert(!simplify->tensor_symbol_info[new_d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [new_d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 387, __extension__ __PRETTY_FUNCTION__ ); })); | |||
388 | assert(!simplify->tensor_symbol_info[new_d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].r_assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [new_d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].r_assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 388, __extension__ __PRETTY_FUNCTION__ ); })); | |||
389 | assert(!simplify->tensor_symbol_info[new_d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].bypass_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [new_d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].bypass_ref" , "ccv_nnc_symbolic_graph_simplify.c", 389, __extension__ __PRETTY_FUNCTION__ ); })); | |||
390 | // Ignore if there is a pair_ref (again, pair_ref has side effect that is deeper (using tape)) | |||
391 | if (simplify->tensor_symbol_info[d].pair_ref) | |||
392 | continue; | |||
393 | // If both have p_ref, we cannot merge. | |||
394 | if (simplify->tensor_symbol_info[d].p_ref && simplify->tensor_symbol_info[new_d].p_ref) | |||
395 | continue; | |||
396 | // Merge s_refs from ref[d] later. | |||
397 | if (refs[d] != new_d) | |||
398 | refs[d] = new_d; | |||
399 | assert(simplify->output_execs[new_d] >= 0)((void) sizeof ((simplify->output_execs[new_d] >= 0) ? 1 : 0), __extension__ ({ if (simplify->output_execs[new_d] >= 0) ; else __assert_fail ("simplify->output_execs[new_d] >= 0" , "ccv_nnc_symbolic_graph_simplify.c", 399, __extension__ __PRETTY_FUNCTION__ ); })); | |||
400 | // Establish new dependency. | |||
401 | ccv_nnc_graph_exec_symbol_concat(simplify->graph, (ccv_nnc_graph_exec_symbol_t){ | |||
402 | .d = simplify->output_execs[new_d], | |||
403 | .graph = simplify->graph, | |||
404 | }, (ccv_nnc_graph_exec_symbol_t){ | |||
405 | .d = idx, | |||
406 | .graph = simplify->graph, | |||
407 | }); | |||
408 | } | |||
409 | } | |||
410 | // We can reuse the input, but we cannot do that for output of these commands. | |||
411 | if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || | |||
412 | node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD || | |||
413 | node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD || | |||
414 | node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD) | |||
415 | continue; | |||
416 | for (i = 0; i < node->output_size; i++) | |||
417 | if (node->outputs[i] >= 0) // This tensor can be reused by others. | |||
418 | _ccv_nnc_cse_hash_add(hash_map, tensor_hash[node->outputs[i]], node->outputs[i], map_size); | |||
419 | } ccv_nnc_graph_visit_endfor} } | |||
420 | _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 1 /* For these exec that generates refs, we don't need them any more. */); | |||
421 | ccfreefree(tensor_hash); | |||
422 | ccfreefree(hash_map); | |||
423 | ccfreefree(refs); | |||
424 | } | |||
425 | ||||
426 | static void _ccv_nnc_symbolic_graph_data_transfer_opt(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
427 | { | |||
428 | _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify); | |||
429 | uint32_t* const exec_dead = simplify->exec_dead; | |||
430 | const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = simplify->tensor_symbol_info; | |||
431 | int i; | |||
432 | uint32_t* const has_alias = cccalloccalloc(2 * ((simplify->tensor_symbol_info_size + 31) >> 5), sizeof(uint32_t)); | |||
433 | uint32_t* const has_binds = has_alias + ((simplify->tensor_symbol_info_size + 31) >> 5); | |||
434 | for (i = 0; i < bind_size; i++) | |||
435 | has_binds[binds[i].d >> 5] |= (1u << (binds[i].d & 0x1f)); | |||
436 | for (i = 0; i < output_size; i++) | |||
437 | has_binds[outputs[i].d >> 5] |= (1u << (outputs[i].d & 0x1f)); | |||
438 | int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size); | |||
439 | int updated_refs; | |||
440 | do { | |||
441 | memset(has_alias, 0, sizeof(uint32_t) * ((simplify->tensor_symbol_info_size + 31) >> 5)); | |||
442 | // Go through until no updates is possible. This won't result an infinite loop because every time, | |||
443 | // a tensor is eliminated. | |||
444 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
445 | { | |||
446 | refs[i] = -1; | |||
447 | if (tensor_symbol_info[i].alias_ref) | |||
448 | { | |||
449 | const int alias_ref = tensor_symbol_info[i].alias_ref - 1; | |||
450 | has_alias[alias_ref >> 5] |= (1u << (alias_ref & 0x1f)); | |||
451 | } | |||
452 | } | |||
453 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
454 | // If already marked as dead, skip. | |||
455 | if (exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
456 | continue; | |||
457 | if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD && | |||
458 | node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD && | |||
459 | // Conversion, if the datatype is the same, it is the data transfer op. | |||
460 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD && | |||
461 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD && | |||
462 | // Format transform, if the format is the same, it is the data transfer op. | |||
463 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD && | |||
464 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD) | |||
465 | continue; | |||
466 | if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) // If optimization pass disabled, skip. | |||
467 | continue; | |||
468 | for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size. | |||
469 | if (node->inputs[i] >= 0 && node->outputs[i] >= 0) | |||
470 | { | |||
471 | assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 471, __extension__ __PRETTY_FUNCTION__ ); })); | |||
472 | assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 472, __extension__ __PRETTY_FUNCTION__ ); })); | |||
473 | int input_ref = node->inputs[i]; | |||
474 | while (refs[input_ref] >= 0) | |||
475 | input_ref = refs[input_ref]; | |||
476 | int output_ref = node->outputs[i]; | |||
477 | while (refs[output_ref] >= 0) | |||
478 | output_ref = refs[output_ref]; | |||
479 | if (input_ref == output_ref) | |||
480 | continue; | |||
481 | const ccv_nnc_tensor_symbol_info_t* const input = tensor_symbol_info + input_ref; | |||
482 | const ccv_nnc_tensor_symbol_info_t* const output = tensor_symbol_info + output_ref; | |||
483 | // If they are not the same data type, skip. (Likely data conversion op). | |||
484 | if (input->info.datatype != output->info.datatype) | |||
485 | continue; | |||
486 | // If they are not the same format, skip. (Likely format transform op). | |||
487 | if (input->info.format != output->info.format) | |||
488 | continue; | |||
489 | // If they are not on the same device (even for NUMA), skip. | |||
490 | if (input->info.type != output->info.type) | |||
491 | continue; | |||
492 | // If both are alias, we cannot consolidate this. | |||
493 | if (input->alias_ref && output->alias_ref) | |||
494 | continue; | |||
495 | // If input is alias, and output has alias reference to it, output cannot be the same as input. | |||
496 | if (input->alias_ref && (has_alias[output_ref >> 5] & (1u << (output_ref & 0x1f)))) | |||
497 | continue; | |||
498 | // If output is alias, and input has alias reference to it, input cannot be the same as output. | |||
499 | if (output->alias_ref && (has_alias[input_ref >> 5] & (1u << (input_ref & 0x1f)))) | |||
500 | continue; | |||
501 | // If either are carry overs (for while), we cannot do anything. | |||
502 | if (input->assign_ref || output->assign_ref || | |||
503 | input->r_assign_ref || output->r_assign_ref) | |||
504 | continue; | |||
505 | // If either are bypasses (for case..of), we cannot do anything. | |||
506 | if (input->bypass_ref || output->bypass_ref || | |||
507 | input->r_bypass_ref || output->r_bypass_ref) | |||
508 | continue; | |||
509 | // If either are inputs / outputs connecting the parent graph, we cannot do anything. | |||
510 | if (input->p_ref || output->p_ref) | |||
511 | continue; | |||
512 | // If the type is the same, check which one is the alias. | |||
513 | // We always prefer alias. | |||
514 | if (output->alias_ref) | |||
515 | { | |||
516 | // Input cannot be binds. | |||
517 | if (has_binds[input_ref >> 5] & (1u << (input_ref & 0x1f))) | |||
518 | continue; | |||
519 | refs[input_ref] = output_ref; | |||
520 | } else { // if (input->alias_ref), else | |||
521 | // Output cannot be binds. | |||
522 | if (has_binds[output_ref >> 5] & (1u << (output_ref & 0x1f))) | |||
523 | continue; | |||
524 | refs[output_ref] = input_ref; | |||
525 | } | |||
526 | } | |||
527 | } ccv_nnc_graph_visit_endfor} } | |||
528 | // Make sure refs reference to the end. | |||
529 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
530 | if (refs[i] >= 0) | |||
531 | { | |||
532 | int ref = refs[i]; | |||
533 | while (refs[ref] >= 0) | |||
534 | ref = refs[ref]; | |||
535 | refs[i] = ref; | |||
536 | } | |||
537 | updated_refs = _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 0 /* We still need these exec that generates the refs. */); | |||
538 | } while (updated_refs); | |||
539 | ccfreefree(refs); | |||
540 | ccfreefree(has_alias); | |||
541 | // Now, all references updated, remove data transfers that sources and destinations are the same. | |||
542 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
543 | // If already marked as dead, skip. | |||
544 | if (exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
545 | continue; | |||
546 | if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD && | |||
547 | node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD && | |||
548 | // Conversion, if the datatype is the same, it is the data transfer op. | |||
549 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD && | |||
550 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD && | |||
551 | // Format transform, if the format is the same, it is the data transfer op. | |||
552 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD && | |||
553 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD) | |||
554 | continue; | |||
555 | for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size. | |||
556 | if (node->inputs[i] == node->outputs[i]) | |||
557 | { | |||
558 | if (i + 1 < node->output_size) | |||
559 | { | |||
560 | node->inputs[i] = node->inputs[node->output_size - 1]; | |||
561 | node->outputs[i] = node->outputs[node->output_size - 1]; | |||
562 | } | |||
563 | --node->output_size; | |||
564 | --i; | |||
565 | } | |||
566 | node->input_size = node->output_size; | |||
567 | ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))))->input_size = node->input_size; | |||
568 | ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))))->output_size = node->output_size; | |||
569 | // Remove this data transfer node if it has no outputs. | |||
570 | if (node->output_size == 0) | |||
571 | exec_dead[idx >> 5] |= (1u << (idx & 0x1f)); | |||
572 | } ccv_nnc_graph_visit_endfor} } | |||
573 | } | |||
574 | ||||
575 | typedef struct { | |||
576 | uint32_t fused_op; // The final fused op id. | |||
577 | uint32_t ops_seq[2]; // The sequence of commands to identify. | |||
578 | int ops_seq_size; | |||
579 | int ops_info_select; // Which ops' info will be selected. | |||
580 | struct { | |||
581 | int type; // Whether it is input, or output. It doesn't make sense for example, input in ops_seq, but output in fused_op. | |||
582 | int op_idx; // Index into the ops_seq. | |||
583 | int from; // The index in ops_seq. | |||
584 | int to; // The index in fused_op. | |||
585 | } pos[4]; // maps of positions from ops seq to fused_op for inputs (outputs). | |||
586 | int pos_size; | |||
587 | } ccv_nnc_ops_fusion_t; | |||
588 | ||||
589 | enum { | |||
590 | CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
591 | CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
592 | }; | |||
593 | ||||
594 | const static int ccv_nnc_ops_fusion_io_size = 2; | |||
595 | const static ccv_nnc_ops_fusion_t ccv_nnc_ops_fusions[] = { | |||
596 | { | |||
597 | .fused_op = CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD, | |||
598 | .ops_seq = { | |||
599 | CCV_NNC_SOFTMAX_FORWARD, CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, | |||
600 | }, | |||
601 | .ops_seq_size = 2, | |||
602 | .ops_info_select = 1, | |||
603 | .pos = { | |||
604 | { | |||
605 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
606 | .op_idx = 0, | |||
607 | .from = 0, | |||
608 | .to = 0, | |||
609 | }, | |||
610 | { | |||
611 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
612 | .op_idx = 1, | |||
613 | .from = 1, | |||
614 | .to = 1, | |||
615 | }, | |||
616 | { | |||
617 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
618 | .op_idx = 0, | |||
619 | .from = 0, | |||
620 | .to = 1, | |||
621 | }, | |||
622 | { | |||
623 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
624 | .op_idx = 1, | |||
625 | .from = 0, | |||
626 | .to = 0, | |||
627 | }, | |||
628 | }, | |||
629 | .pos_size = 4, | |||
630 | }, | |||
631 | { | |||
632 | .fused_op = CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD, | |||
633 | .ops_seq = { | |||
634 | CCV_NNC_SIGMOID_FORWARD, CCV_NNC_BINARY_CROSSENTROPY_FORWARD, | |||
635 | }, | |||
636 | .ops_seq_size = 2, | |||
637 | .ops_info_select = 1, | |||
638 | .pos = { | |||
639 | { | |||
640 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
641 | .op_idx = 0, | |||
642 | .from = 0, | |||
643 | .to = 0, | |||
644 | }, | |||
645 | { | |||
646 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
647 | .op_idx = 1, | |||
648 | .from = 1, | |||
649 | .to = 1, | |||
650 | }, | |||
651 | { | |||
652 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
653 | .op_idx = 0, | |||
654 | .from = 0, | |||
655 | .to = 1, | |||
656 | }, | |||
657 | { | |||
658 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
659 | .op_idx = 1, | |||
660 | .from = 0, | |||
661 | .to = 0, | |||
662 | }, | |||
663 | }, | |||
664 | .pos_size = 4, | |||
665 | } | |||
666 | }; | |||
667 | ||||
668 | static int _ccv_nnc_find_ops_for_fusion(const ccv_nnc_ops_fusion_t* const fusion, const int ops_idx, const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const uint32_t* const exec_dead, const int exec_idx, int* const fusing_exec_symbols) | |||
669 | { | |||
670 | if (exec_dead[exec_idx >> 5] & (1u << (exec_idx & 0x1f))) | |||
671 | return 0; | |||
672 | const ccv_nnc_graph_exec_symbol_info_t* const node = exec_symbol_info + exec_idx; | |||
673 | // Doesn't match the ops_seq, return 0. | |||
674 | if (fusion->ops_seq[ops_idx] != node->cmd.cmd) | |||
675 | return 0; | |||
676 | fusing_exec_symbols[ops_idx] = exec_idx; | |||
677 | // If already reached the end, we are good. | |||
678 | if (ops_idx == fusion->ops_seq_size - 1) | |||
679 | return 1; | |||
680 | // Otherwise, we need to go on, but don't have any to follow-up. | |||
681 | if (!node->outgoings || !node->outgoings->rnum) | |||
682 | return 0; | |||
683 | int i; | |||
684 | for (i = 0; i < node->outgoings->rnum; i++) | |||
685 | if (_ccv_nnc_find_ops_for_fusion(fusion, ops_idx + 1, exec_symbol_info, exec_dead, *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))), fusing_exec_symbols)) | |||
686 | return 1; | |||
687 | return 0; | |||
688 | } | |||
689 | ||||
690 | static void _ccv_nnc_symbolic_graph_ops_fusion(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
691 | { | |||
692 | uint32_t* const exec_dead = simplify->exec_dead; | |||
693 | ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info; | |||
694 | int i, j; | |||
695 | int fusing_exec_symbols[sizeof(ccv_nnc_ops_fusions->ops_seq)]; | |||
696 | int fused_inputs[ccv_nnc_ops_fusion_io_size]; // 2 is just a number based on the ops_fusion. | |||
697 | int fused_outputs[ccv_nnc_ops_fusion_io_size]; | |||
698 | ccv_nnc_graph_visit_for(simplify->visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (exec_symbol_info)) const node __attribute__((unused)) = (exec_symbol_info ) + idx; { | |||
699 | // If already marked as dead, skip. | |||
700 | if (exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
701 | continue; | |||
702 | if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) // If optimization pass disabled, skip. | |||
703 | continue; | |||
704 | // Run through rudimentary pattern matching for ops_seq. There are better ways to do this (immediately come to mind, Boyer-Moore). However, this is simpler to code. | |||
705 | // If I am running into performance issues with this, I would be very happy. | |||
706 | for (i = 0; i < sizeof(ccv_nnc_ops_fusions) / sizeof(ccv_nnc_ops_fusion_t); i++) | |||
707 | { | |||
708 | const ccv_nnc_ops_fusion_t* const ops_fusion = ccv_nnc_ops_fusions + i; | |||
709 | // Check to see if a list of symbols are possible. | |||
710 | if (ops_fusion->ops_seq[0] == node->cmd.cmd && | |||
711 | _ccv_nnc_find_ops_for_fusion(ops_fusion, 0, exec_symbol_info, exec_dead, idx, fusing_exec_symbols)) | |||
712 | { | |||
713 | // Go through all the inputs and outputs, check if they exists and are mapped. | |||
714 | // TODO: the mapping can be more sophisticated than what we have here. | |||
715 | // Also, I need to check if some inputs / outputs cannot be mapped, then we cannot continue. | |||
716 | for (j = 0; j < ccv_nnc_ops_fusion_io_size; j++) | |||
717 | fused_inputs[j] = fused_outputs[j] = CCV_NNC_NO_TENSOR_SYMBOL; | |||
718 | int input_size = 0, output_size = 0; | |||
719 | for (j = 0; j < ops_fusion->pos_size; j++) | |||
720 | { | |||
721 | ccv_nnc_graph_exec_symbol_info_t* const fusing_op = exec_symbol_info + fusing_exec_symbols[ops_fusion->pos[j].op_idx]; | |||
722 | switch (ops_fusion->pos[j].type) | |||
723 | { | |||
724 | case CCV_NNC_OPS_FUSION_INPUT_INDEX: | |||
725 | fused_inputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->input_size ? fusing_op->inputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL; | |||
726 | input_size = ccv_max(input_size, ops_fusion->pos[j].to + 1)({ typeof (input_size) _a = (input_size); typeof (ops_fusion-> pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a > _b ) ? _a : _b; }); | |||
727 | break; | |||
728 | case CCV_NNC_OPS_FUSION_OUTPUT_INDEX: | |||
729 | fused_outputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->output_size ? fusing_op->outputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL; | |||
730 | output_size = ccv_max(output_size, ops_fusion->pos[j].to + 1)({ typeof (output_size) _a = (output_size); typeof (ops_fusion ->pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a > _b) ? _a : _b; }); | |||
731 | break; | |||
732 | } | |||
733 | } | |||
734 | const ccv_nnc_cmd_param_t info = exec_symbol_info[fusing_exec_symbols[ops_fusion->ops_info_select]].cmd.info; | |||
735 | // Modify the first node so it is the correct type and value. I need to look back to the actual graph to get the info. | |||
736 | ccv_nnc_graph_exec_symbol_info_t* const actual_node = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))); | |||
737 | actual_node->cmd.cmd = node->cmd.cmd = ops_fusion->fused_op; | |||
738 | actual_node->cmd.info = node->cmd.info = info; | |||
739 | if (node->input_size + node->output_size < input_size + output_size) | |||
740 | actual_node->inputs = node->inputs = node->inputs ? ccreallocrealloc(node->inputs, sizeof(int) * (input_size + output_size)) : ccmallocmalloc(sizeof(int) * (input_size + output_size)); | |||
741 | actual_node->outputs = node->outputs = node->inputs + input_size; | |||
742 | actual_node->input_size = node->input_size = input_size; | |||
743 | actual_node->output_size = node->output_size = output_size; | |||
744 | memcpy(node->inputs, fused_inputs, sizeof(int) * input_size); | |||
745 | memcpy(node->outputs, fused_outputs, sizeof(int) * output_size); | |||
746 | // Afterwards, mark the rest as dead. | |||
747 | for (j = 1; j < ops_fusion->ops_seq_size; j++) | |||
748 | exec_dead[fusing_exec_symbols[j] >> 5] |= (1u << (fusing_exec_symbols[j] & 0x1f)); | |||
749 | break; | |||
750 | } | |||
751 | } | |||
752 | } ccv_nnc_graph_visit_endfor} } | |||
753 | } | |||
754 | ||||
755 | static void _ccv_nnc_symbolic_graph_pruning_undead_exec(ccv_nnc_symbolic_graph_simplify_t* const simplify, const int exec_idx, uint32_t* const tensor_visited, ccv_array_t* const next) | |||
756 | { | |||
757 | assert(exec_idx >= 0)((void) sizeof ((exec_idx >= 0) ? 1 : 0), __extension__ ({ if (exec_idx >= 0) ; else __assert_fail ("exec_idx >= 0" , "ccv_nnc_symbolic_graph_simplify.c", 757, __extension__ __PRETTY_FUNCTION__ ); })); | |||
758 | uint32_t* const exec_dead = simplify->exec_dead; | |||
759 | uint32_t* const tensor_dead = simplify->tensor_dead; | |||
760 | exec_dead[exec_idx >> 5] &= ~(1u << (exec_idx & 0x1f)); // Undead the exec. | |||
761 | ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info + exec_idx; | |||
762 | int i; | |||
763 | if (exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_FORWARD || | |||
764 | exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_BACKWARD || | |||
765 | exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_FORWARD || | |||
766 | exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD) | |||
767 | { | |||
768 | // All of its inputs / outputs need to be undead for these commands. | |||
769 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
770 | { | |||
771 | const int d = exec_symbol_info->inputs[i]; | |||
772 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
773 | { | |||
774 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
775 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
776 | } | |||
777 | } | |||
778 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
779 | { | |||
780 | const int d = exec_symbol_info->outputs[i]; | |||
781 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
782 | { | |||
783 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
784 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
785 | } | |||
786 | } | |||
787 | return; | |||
788 | } | |||
789 | // Go through the input / output, to make sure that all of them can be available. | |||
790 | const int input_bitmask_size = (exec_symbol_info->input_size + 63) >> 6; | |||
791 | const int output_bitmask_size = (exec_symbol_info->output_size + 63) >> 6; | |||
792 | uint64_t input_bitmasks[ccv_max(1, input_bitmask_size)({ typeof (1) _a = (1); typeof (input_bitmask_size) _b = (input_bitmask_size ); (_a > _b) ? _a : _b; })]; | |||
793 | for (i = 0; i
| |||
794 | input_bitmasks[i] = 0; | |||
795 | uint64_t output_bitmasks[ccv_max(1, output_bitmask_size)({ typeof (1) _a = (1); typeof (output_bitmask_size) _b = (output_bitmask_size ); (_a > _b) ? _a : _b; })]; | |||
796 | for (i = 0; i
| |||
797 | output_bitmasks[i] = 0; | |||
798 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
799 | if (exec_symbol_info->inputs[i] >= 0) | |||
800 | input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| ||||
801 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
802 | if (exec_symbol_info->outputs[i] >= 0) | |||
803 | output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
804 | // First, mark everything with bitmasks, and verify it works. | |||
805 | assert(ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))((void) sizeof ((ccv_nnc_cmd_bitmask(exec_symbol_info->cmd , exec_symbol_info->input_size, exec_symbol_info->output_size , input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size )) ? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_bitmask(exec_symbol_info ->cmd, exec_symbol_info->input_size, exec_symbol_info-> output_size, input_bitmasks, input_bitmask_size, output_bitmasks , output_bitmask_size)) ; else __assert_fail ("ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)" , "ccv_nnc_symbolic_graph_simplify.c", 805, __extension__ __PRETTY_FUNCTION__ ); })); | |||
806 | int flag; | |||
807 | do { | |||
808 | flag = 0; | |||
809 | // Try to eliminate one at a time. Go over output first. | |||
810 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
811 | { | |||
812 | const int d = exec_symbol_info->outputs[i]; | |||
813 | // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all. | |||
814 | if (d >= 0 && (tensor_dead[d >> 5] & (1u << (d & 0x1f))) && | |||
815 | (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))) | |||
816 | { | |||
817 | output_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
818 | if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)) | |||
819 | flag = 1; | |||
820 | else // Reset the bitmask. | |||
821 | output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
822 | } | |||
823 | } | |||
824 | // Only check if there are inputs we can remove if it is backward. | |||
825 | if (!ccv_nnc_cmd_is_forward(exec_symbol_info->cmd)) | |||
826 | // For inputs, no matter if it s dead or not, we try to limit our input to the smallest number. | |||
827 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
828 | { | |||
829 | const int d = exec_symbol_info->inputs[i]; | |||
830 | // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all. | |||
831 | if (d >= 0 && (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))) | |||
832 | { | |||
833 | input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
834 | if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)) | |||
835 | flag = 1; | |||
836 | else // Reset the bitmask. | |||
837 | input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
838 | } | |||
839 | } | |||
840 | } while (flag); | |||
841 | // Now we know which one to keep, which one to undead. | |||
842 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
843 | if (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
844 | { | |||
845 | const int d = exec_symbol_info->inputs[i]; | |||
846 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
847 | { | |||
848 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
849 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
850 | } | |||
851 | } else { | |||
852 | // Clean up the inputs. | |||
853 | exec_symbol_info->inputs[i] = CCV_NNC_NO_TENSOR_SYMBOL; | |||
854 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(exec_idx))))->inputs == exec_symbol_info->inputs ) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((simplify->graph->exec_symbol_info) ->data)) + (size_t)(simplify->graph->exec_symbol_info )->rsize * (size_t)(exec_idx))))->inputs == exec_symbol_info ->inputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs" , "ccv_nnc_symbolic_graph_simplify.c", 854, __extension__ __PRETTY_FUNCTION__ ); })); | |||
855 | } | |||
856 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
857 | if (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
858 | { | |||
859 | const int d = exec_symbol_info->outputs[i]; | |||
860 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
861 | { | |||
862 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
863 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
864 | } | |||
865 | } else { | |||
866 | // Clean up the outputs. | |||
867 | exec_symbol_info->outputs[i] = CCV_NNC_NO_TENSOR_SYMBOL; | |||
868 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(exec_idx))))->outputs == exec_symbol_info-> outputs) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((simplify->graph->exec_symbol_info) ->data)) + (size_t)(simplify->graph->exec_symbol_info )->rsize * (size_t)(exec_idx))))->outputs == exec_symbol_info ->outputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs" , "ccv_nnc_symbolic_graph_simplify.c", 868, __extension__ __PRETTY_FUNCTION__ ); })); | |||
869 | } | |||
870 | } | |||
871 | ||||
872 | static void _ccv_nnc_symbolic_graph_pruning(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
873 | { | |||
874 | uint32_t* const tensor_visited = (uint32_t*)cccalloccalloc(sizeof(uint32_t), (simplify->tensor_symbol_info_size + 31) >> 5); | |||
875 | ccv_array_t* const preserve[2] = { | |||
876 | ccv_array_new(sizeof(int), output_size, 0), | |||
877 | ccv_array_new(sizeof(int), 0, 0), | |||
878 | }; | |||
879 | int i, j; | |||
880 | ccv_array_t** const r_alias_refs = (ccv_array_t**)cccalloccalloc(sizeof(ccv_array_t*), simplify->tensor_symbol_info_size); | |||
881 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
882 | if (simplify->tensor_symbol_info[i].alias_ref) | |||
883 | { | |||
884 | const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1; | |||
885 | assert(alias_ref < simplify->tensor_symbol_info_size)((void) sizeof ((alias_ref < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (alias_ref < simplify-> tensor_symbol_info_size) ; else __assert_fail ("alias_ref < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 885, __extension__ __PRETTY_FUNCTION__ ); })); | |||
886 | if (!r_alias_refs[alias_ref]) | |||
887 | r_alias_refs[alias_ref] = ccv_array_new(sizeof(int), 1, 0); | |||
888 | ccv_array_push(r_alias_refs[alias_ref], &i); | |||
889 | } | |||
890 | uint32_t* const exec_dead = simplify->exec_dead; | |||
891 | uint32_t* const tensor_dead = simplify->tensor_dead; | |||
892 | int* const output_execs = simplify->output_execs; | |||
893 | _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify); | |||
894 | // Mark everything visited as dead. | |||
895 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
896 | if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) // If optimization pass disabled, skip. | |||
897 | continue; | |||
898 | exec_dead[idx >> 5] |= (1u << (idx & 0x1f)); | |||
899 | for (i = 0; i < node->input_size; i++) | |||
900 | { | |||
901 | const int d = node->inputs[i]; | |||
902 | if (d >= 0) | |||
903 | tensor_dead[d >> 5] |= (1u << (d & 0x1f)); | |||
904 | } | |||
905 | for (i = 0; i < node->output_size; i++) | |||
906 | { | |||
907 | const int d = node->outputs[i]; | |||
908 | if (d >= 0) | |||
909 | tensor_dead[d >> 5] |= (1u << (d & 0x1f)); | |||
910 | } | |||
911 | } ccv_nnc_graph_visit_endfor} } | |||
912 | // If the tensor symbol is used by other exec that is not visited, unmark it. | |||
913 | for (i = 0; i < simplify->exec_symbol_info_size; i++) | |||
914 | { | |||
915 | if (exec_dead[i >> 5] & (1u << (i & 0x1f))) | |||
916 | continue; | |||
917 | const ccv_nnc_graph_exec_symbol_info_t* const node = simplify->exec_symbol_info + i; | |||
918 | for (j = 0; j < node->input_size; j++) | |||
919 | { | |||
920 | const int d = node->inputs[j]; | |||
921 | // Undead it. | |||
922 | if (d >= 0) | |||
923 | tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
924 | } | |||
925 | for (j = 0; j < node->output_size; j++) | |||
926 | { | |||
927 | const int d = node->outputs[j]; | |||
928 | // Undead it. | |||
929 | if (d >= 0) | |||
930 | tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
931 | } | |||
932 | } | |||
933 | for (i = 0; i < output_size; i++) | |||
934 | ccv_array_push(preserve[0], &outputs[i].d); | |||
935 | int p = 0, q = 1; | |||
936 | // BFS to mark execs / tensors as not dead. | |||
937 | while (preserve[p]->rnum > 0) | |||
938 | { | |||
939 | ccv_array_clear(preserve[q]); | |||
940 | // First, undead all relevant tensors. | |||
941 | for (i = 0; i
| |||
942 | { | |||
943 | const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve [p])->rsize * (size_t)(i))); | |||
944 | // Undead the outputs. | |||
945 | tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
946 | int alias_ref = d; | |||
947 | if (simplify->tensor_symbol_info[d].alias_ref) | |||
948 | { | |||
949 | alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1; | |||
950 | tensor_dead[alias_ref >> 5] &= ~(1u << (alias_ref & 0x1f)); | |||
951 | assert(r_alias_refs[alias_ref])((void) sizeof ((r_alias_refs[alias_ref]) ? 1 : 0), __extension__ ({ if (r_alias_refs[alias_ref]) ; else __assert_fail ("r_alias_refs[alias_ref]" , "ccv_nnc_symbolic_graph_simplify.c", 951, __extension__ __PRETTY_FUNCTION__ ); })); | |||
952 | } | |||
953 | if (r_alias_refs[alias_ref]) | |||
954 | for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++) | |||
955 | { | |||
956 | const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t )(r_alias_refs[alias_ref])->rsize * (size_t)(j))); | |||
957 | if (output_execs[b] >= 0) // Only revive if it is written alias. | |||
958 | tensor_dead[b >> 5] &= ~(1u << (b & 0x1f)); | |||
959 | } | |||
960 | } | |||
961 | for (i = 0; i < preserve[p]->rnum; i++) | |||
962 | { | |||
963 | const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve [p])->rsize * (size_t)(i))); | |||
964 | const int output_exec = output_execs[d]; | |||
965 | // Undead the exec. | |||
966 | if (output_exec >= 0) | |||
967 | _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]); | |||
968 | int alias_ref = d; | |||
969 | if (simplify->tensor_symbol_info[d].alias_ref) | |||
970 | { | |||
971 | alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1; | |||
972 | const int output_exec = output_execs[alias_ref]; | |||
973 | if (output_exec >= 0) | |||
974 | _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]); | |||
975 | } | |||
976 | if (r_alias_refs[alias_ref]) | |||
977 | for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++) | |||
978 | { | |||
979 | const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t )(r_alias_refs[alias_ref])->rsize * (size_t)(j))); | |||
980 | const int output_exec = output_execs[b]; | |||
981 | if (output_exec >= 0) | |||
982 | _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]); | |||
983 | } | |||
984 | } | |||
985 | CCV_SWAP(p, q, i)((i) = (p), (p) = (q), (q) = (i)); | |||
986 | } | |||
987 | ccfreefree(tensor_visited); | |||
988 | ccv_array_free(preserve[0]); | |||
989 | ccv_array_free(preserve[1]); | |||
990 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
991 | if (r_alias_refs[i]) | |||
992 | ccv_array_free(r_alias_refs[i]); | |||
993 | ccfreefree(r_alias_refs); | |||
994 | } | |||
995 | ||||
996 | void ccv_nnc_symbolic_graph_simplify(ccv_nnc_symbolic_graph_t* const graph, const int* const passes, const int pass_size, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size) | |||
997 | { | |||
998 | ccv_nnc_symbolic_graph_simplify_t* const simplify = _ccv_nnc_symbolic_graph_simplify_new(graph, sources, source_size, destinations, destination_size); | |||
999 | int i; | |||
1000 | for (i = 0; i < pass_size; i++) | |||
| ||||
1001 | switch (passes[i]) | |||
1002 | { | |||
1003 | case CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION: | |||
1004 | _ccv_nnc_symbolic_graph_common_subexpression_elimination(simplify, outputs, output_size); | |||
1005 | break; | |||
1006 | case CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT: | |||
1007 | _ccv_nnc_symbolic_graph_data_transfer_opt(simplify, binds, bind_size, outputs, output_size); | |||
1008 | break; | |||
1009 | case CCV_NNC_SIMPLIFY_GRAPH_PRUNING: | |||
1010 | _ccv_nnc_symbolic_graph_pruning(simplify, outputs, output_size); | |||
1011 | break; | |||
1012 | case CCV_NNC_SIMPLIFY_OPS_FUSION: | |||
1013 | _ccv_nnc_symbolic_graph_ops_fusion(simplify, outputs, output_size); | |||
1014 | break; | |||
1015 | } | |||
1016 | _ccv_nnc_symbolic_graph_simplify_apply(simplify); | |||
1017 | _ccv_nnc_symbolic_graph_simplify_free(simplify); | |||
1018 | } |