| File: | nnc/ccv_nnc_symbolic_graph_simplify.c |
| Warning: | line 800, column 27 The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv_nnc.h" | |||
| 2 | #include "ccv_nnc_easy.h" | |||
| 3 | #include "ccv_nnc_internal.h" | |||
| 4 | #include "ccv_internal.h" | |||
| 5 | #include "_ccv_nnc_symbolic_graph.h" | |||
| 6 | #include "3rdparty/siphash/siphash24.h" | |||
| 7 | ||||
| 8 | // MARK - Level-3.5 API | |||
| 9 | ||||
| 10 | static uint8_t key_siphash[16] = "graphcsekvlibnnc"; | |||
| 11 | ||||
| 12 | typedef struct { | |||
| 13 | int tensor_symbol_info_size; | |||
| 14 | int exec_symbol_info_size; | |||
| 15 | ccv_nnc_symbolic_graph_t* graph; | |||
| 16 | ccv_nnc_graph_visit_t* visit; | |||
| 17 | ccv_nnc_tensor_symbol_info_t* tensor_symbol_info; | |||
| 18 | ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info; | |||
| 19 | uint32_t* exec_dead; // Mark a exec is dead and need to be cleared, each bit represent a exec. | |||
| 20 | uint32_t* tensor_dead; // Mark a tensor is dead and need to be cleared, each bit represent a tensor. | |||
| 21 | int* output_execs; // Mapping from tensor to the exec that generates this tensor. | |||
| 22 | } ccv_nnc_symbolic_graph_simplify_t; | |||
| 23 | ||||
| 24 | static void _ccv_nnc_symbolic_graph_simplify_update_output_execs(ccv_nnc_symbolic_graph_simplify_t* const simplify) | |||
| 25 | { | |||
| 26 | int i; | |||
| 27 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
| 28 | simplify->output_execs[i] = -1; | |||
| 29 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
| 30 | if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
| 31 | continue; | |||
| 32 | for (i = 0; i < node->output_size; i++) | |||
| 33 | if (node->outputs[i] >= 0) | |||
| 34 | simplify->output_execs[node->outputs[i]] = idx; // A tensor can only be written once. | |||
| 35 | } ccv_nnc_graph_visit_endfor} } | |||
| 36 | } | |||
| 37 | ||||
| 38 | static ccv_nnc_symbolic_graph_simplify_t* _ccv_nnc_symbolic_graph_simplify_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size) | |||
| 39 | { | |||
| 40 | ccv_nnc_symbolic_graph_simplify_t* const simplify = (ccv_nnc_symbolic_graph_simplify_t*)ccmallocmalloc(sizeof(ccv_nnc_symbolic_graph_simplify_t)); | |||
| 41 | simplify->graph = graph; | |||
| 42 | simplify->visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0), graph->exec_symbol_info->rnum, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc (sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) * ((graph->exec_symbol_info->rnum) - 1)); _visit_->size = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_ = 0; for (_i_ = 0; _i_ < (graph->exec_symbol_info-> rnum); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_i_] .outgoings->rnum : 0; const int _heap_mem_ = ((graph->exec_symbol_info ->rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t *)malloc(sizeof(ccv_nnc_incoming_t) * (graph->exec_symbol_info ->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info-> rnum) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t *)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (graph->exec_symbol_info ->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info-> rnum) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof (ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum)) ; int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (graph-> exec_symbol_info->rnum)), (int32_t*)(_incomings_ + (graph-> exec_symbol_info->rnum)) + (graph->exec_symbol_info-> rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_symbol_info ->rnum); for (_i_ = 0; _i_ < (source_size); _i_++) { (( void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == graph) ; else __assert_fail ( "(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c" , 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(sources )[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_ [2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[ _idx_].r = 2; if (((ccv_nnc_graph_exec_symbol_info_t*)((void* )(((char*)((graph->exec_symbol_info)->data)) + (size_t) (graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*) ((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_ [d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_symbol_info ->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph->exec_symbol_info->rnum)) ; else __assert_fail ( "_exist_size_[_q_] < (graph->exec_symbol_info->rnum)" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (( (sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ( (sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_ [1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_ ][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[ _idx_].r = 4; if (((ccv_nnc_graph_exec_symbol_info_t*)((void* )(((char*)((graph->exec_symbol_info)->data)) + (size_t) (graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*) ((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_ [d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_ [d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges - 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_ [d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph ->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c" , 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations )[_i_].d].r = 5; _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_ [0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue ; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_ [d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof (( _exist_size_[_q_] < (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph ->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c" , 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations )[_i_].d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++ ) { ((void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c" , 42, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_ ] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = ( source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_ [_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_ ]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_ ->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++ _visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_ [_idx_].r = 7; } if (((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings->rnum == 1) { const int d = *(int*)((void*)((( char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*) ((graph->exec_symbol_info)->data)) + (size_t)(graph-> exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings )->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t*)( (void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { _exists_[_p_][_i_] = d; continue ; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*) ((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void *)(((char*)((graph->exec_symbol_info)->data)) + (size_t )(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_ ].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info)->rsize * (size_t)(0) )))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_ [d].c; if (_incomings_[d].c == 0 && _incomings_[d].r == 6 && _d_ < (destination_size)) { ((void) sizeof ( (_exist_size_[_q_] < (graph->exec_symbol_info->rnum) ) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph ->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[ _q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = ( _i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { ( (void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c" , 42, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_ [(destinations)[_i_].d].r == 7) continue; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_].d].c == 0) ? 1 : 0) , __extension__ ({ if (_incomings_[(destinations)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); } else if (_incomings_[(destinations)[_i_].d].c > 0 ) continue; _visit_->node[_visit_->size].index = (((destinations )[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_ [(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_ ) free(_incomings_); } while (0);; ((void) sizeof ((_visit_-> size <= (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (graph->exec_symbol_info-> rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_symbol_info->rnum)" , "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__ ); })); _visit_; }); | |||
| 43 | simplify->tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_info_t) * graph->tensor_symbol_info->rnum); | |||
| 44 | simplify->exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_info_t) * graph->exec_symbol_info->rnum); | |||
| 45 | ccv_nnc_symbolic_graph_symbol_infer(graph, simplify->visit, sources, source_size, destinations, destination_size, 0, 0, simplify->tensor_symbol_info, simplify->exec_symbol_info); | |||
| 46 | simplify->tensor_symbol_info_size = graph->tensor_symbol_info->rnum; | |||
| 47 | simplify->exec_symbol_info_size = graph->exec_symbol_info->rnum; | |||
| 48 | simplify->exec_dead = cccalloccalloc(((simplify->exec_symbol_info_size + 31) >> 5) + ((simplify->tensor_symbol_info_size + 31) >> 5), sizeof(uint32_t)); | |||
| 49 | simplify->tensor_dead = simplify->exec_dead + ((simplify->exec_symbol_info_size + 31) >> 5); | |||
| 50 | simplify->output_execs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size); | |||
| 51 | return simplify; | |||
| 52 | } | |||
| 53 | ||||
| 54 | static void _ccv_nnc_symbolic_graph_simplify_apply(ccv_nnc_symbolic_graph_simplify_t* const simplify) | |||
| 55 | { | |||
| 56 | int i, j; | |||
| 57 | for (i = 0; i < simplify->exec_symbol_info_size; i++) | |||
| 58 | if (simplify->exec_dead[i >> 5] & (1u << (i & 0x1f))) | |||
| 59 | ccv_nnc_graph_exec_symbol_free(simplify->graph, (ccv_nnc_graph_exec_symbol_t){ | |||
| 60 | .d = i, | |||
| 61 | .graph = simplify->graph, | |||
| 62 | }); | |||
| 63 | else // If it is not marked as dead, go through to unmark tensor | |||
| 64 | for (j = 0; j < simplify->exec_symbol_info[i].output_size; j++) | |||
| 65 | { | |||
| 66 | const int d = simplify->exec_symbol_info[i].outputs[j]; | |||
| 67 | if (d >= 0) | |||
| 68 | simplify->tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
| 69 | } | |||
| 70 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
| 71 | if (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))) | |||
| 72 | ccv_nnc_tensor_symbol_free(simplify->graph, (ccv_nnc_tensor_symbol_t){ | |||
| 73 | .d = i, | |||
| 74 | .graph = simplify->graph, | |||
| 75 | }); | |||
| 76 | } | |||
| 77 | ||||
| 78 | static void _ccv_nnc_symbolic_graph_simplify_free(ccv_nnc_symbolic_graph_simplify_t* const simplify) | |||
| 79 | { | |||
| 80 | ccv_nnc_graph_visit_free(simplify->visit); | |||
| 81 | ccfreefree(simplify->tensor_symbol_info); | |||
| 82 | ccfreefree(simplify->exec_symbol_info); | |||
| 83 | ccfreefree(simplify->exec_dead); | |||
| 84 | ccfreefree(simplify->output_execs); | |||
| 85 | ccfreefree(simplify); | |||
| 86 | } | |||
| 87 | ||||
| 88 | typedef struct { | |||
| 89 | int d; | |||
| 90 | int ifbit; | |||
| 91 | uint64_t hash; | |||
| 92 | } ccv_nnc_cse_hash_t; | |||
| 93 | ||||
| 94 | static int _ccv_nnc_cse_hash_find(ccv_nnc_cse_hash_t* const hash_map, const uint64_t hash, const int map_size) | |||
| 95 | { | |||
| 96 | assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if ( hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c" , 96, __extension__ __PRETTY_FUNCTION__); })); | |||
| 97 | int i, j; | |||
| 98 | i = (hash - 1) % map_size; | |||
| 99 | for (j = 0; ; j++, i++) | |||
| 100 | { | |||
| 101 | if (i >= map_size) | |||
| 102 | i = 0; | |||
| 103 | if (j > hash_map[i].ifbit) | |||
| 104 | return -1; | |||
| 105 | if (hash_map[i].hash == hash) | |||
| 106 | return hash_map[i].d; | |||
| 107 | } | |||
| 108 | } | |||
| 109 | ||||
| 110 | static void _ccv_nnc_cse_hash_add(ccv_nnc_cse_hash_t* const hash_map, uint64_t hash, int d, const int map_size) | |||
| 111 | { | |||
| 112 | assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if ( hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c" , 112, __extension__ __PRETTY_FUNCTION__); })); | |||
| 113 | int i, j; | |||
| 114 | i = (hash - 1) % map_size; | |||
| 115 | for (j = 0; ; j++, i++) | |||
| 116 | { | |||
| 117 | if (i >= map_size) | |||
| 118 | i = 0; | |||
| 119 | if (hash_map[i].hash == hash) // Already exists, do nothing. | |||
| 120 | return; | |||
| 121 | if (hash_map[i].hash == 0) | |||
| 122 | { | |||
| 123 | // Insert. | |||
| 124 | hash_map[i].d = d; | |||
| 125 | hash_map[i].ifbit = j; | |||
| 126 | hash_map[i].hash = hash; | |||
| 127 | return; | |||
| 128 | } | |||
| 129 | if (j > hash_map[i].ifbit) | |||
| 130 | { | |||
| 131 | const ccv_nnc_cse_hash_t old_hash = hash_map[i]; | |||
| 132 | // Swap, and continue, until find an empty slot. | |||
| 133 | hash_map[i].d = d; | |||
| 134 | hash_map[i].ifbit = j; | |||
| 135 | hash_map[i].hash = hash; | |||
| 136 | d = old_hash.d; | |||
| 137 | j = old_hash.ifbit; | |||
| 138 | hash = old_hash.hash; | |||
| 139 | } | |||
| 140 | } | |||
| 141 | } | |||
| 142 | ||||
| 143 | static int _ccv_nnc_symbolic_graph_update_refs(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const int* const refs, const int output_exec_ref_dead) | |||
| 144 | { | |||
| 145 | int i, j; | |||
| 146 | // Go over refs, if a tensor is an alias, mark it reference to the new one. | |||
| 147 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
| 148 | if (refs[i] >= 0) | |||
| 149 | // Mark this tensor as dead. | |||
| 150 | simplify->tensor_dead[i >> 5] |= (1u << (i & 0x1f)); | |||
| 151 | else if (simplify->tensor_symbol_info[i].alias_ref && refs[simplify->tensor_symbol_info[i].alias_ref - 1] >= 0) { | |||
| 152 | const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1; | |||
| 153 | simplify->tensor_symbol_info[i].alias_ref = refs[alias_ref] + 1; | |||
| 154 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(i))))->alias_ref = refs[alias_ref] + 1; | |||
| 155 | } | |||
| 156 | for (i = 0; i < output_size; i++) | |||
| 157 | // If the output is an alias, that's fine, because if the alias is re-binded, we are good. | |||
| 158 | simplify->tensor_dead[outputs[i].d >> 5] &= ~(1u << (outputs[i].d & 0x1f)); // Undead for output tensor symbols. | |||
| 159 | // Merge s_refs if the tensor is dead. | |||
| 160 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
| 161 | if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f)))) | |||
| 162 | { | |||
| 163 | const int ref = refs[i]; | |||
| 164 | if (simplify->tensor_symbol_info[i].s_ref && simplify->tensor_symbol_info[i].s_ref->rnum) | |||
| 165 | { | |||
| 166 | if (!simplify->tensor_symbol_info[ref].s_ref) // If there is no s_ref, simple, just assign the pointer and set the old one to nil. | |||
| 167 | { | |||
| 168 | simplify->tensor_symbol_info[ref].s_ref = simplify->tensor_symbol_info[i].s_ref; | |||
| 169 | simplify->tensor_symbol_info[i].s_ref = 0; | |||
| 170 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(i))))->s_ref = 0; | |||
| 171 | } else { | |||
| 172 | ccv_array_t* const ref_s_ref = simplify->tensor_symbol_info[ref].s_ref; | |||
| 173 | ccv_array_t* const i_s_ref = simplify->tensor_symbol_info[i].s_ref; | |||
| 174 | const int ref_s_ref_rnum = ref_s_ref->rnum; | |||
| 175 | int flag = 0; | |||
| 176 | // Detect conflict, if there is, undead. | |||
| 177 | for (j = 0; !flag && j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref ->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; } ); j++) | |||
| 178 | { | |||
| 179 | const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))); | |||
| 180 | const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)-> rsize * (size_t)(j))); | |||
| 181 | // If for the same sub-graph, they have different tensors linked, we cannot merge these two. | |||
| 182 | flag = (ref_s_ref_k > 0 && i_s_ref_k > 0 && ref_s_ref_k != i_s_ref_k); | |||
| 183 | } | |||
| 184 | if (flag) | |||
| 185 | { | |||
| 186 | simplify->tensor_dead[i >> 5] &= ~(1u << (i & 0x1f)); // Undead | |||
| 187 | continue; | |||
| 188 | } | |||
| 189 | if (ref_s_ref_rnum < i_s_ref->rnum) | |||
| 190 | { | |||
| 191 | ccv_array_resize(ref_s_ref, i_s_ref->rnum); | |||
| 192 | memcpy(ccv_array_get(ref_s_ref, ref_s_ref_rnum)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(ref_s_ref_rnum))), ccv_array_get(i_s_ref, ref_s_ref_rnum)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)-> rsize * (size_t)(ref_s_ref_rnum))), sizeof(int) * (i_s_ref->rnum - ref_s_ref_rnum)); | |||
| 193 | } | |||
| 194 | for (j = 0; j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref ->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; } ); j++) | |||
| 195 | { | |||
| 196 | const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))); | |||
| 197 | const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)-> rsize * (size_t)(j))); | |||
| 198 | assert(ref_s_ref_k == 0 || i_s_ref_k == 0)((void) sizeof ((ref_s_ref_k == 0 || i_s_ref_k == 0) ? 1 : 0) , __extension__ ({ if (ref_s_ref_k == 0 || i_s_ref_k == 0) ; else __assert_fail ("ref_s_ref_k == 0 || i_s_ref_k == 0", "ccv_nnc_symbolic_graph_simplify.c" , 198, __extension__ __PRETTY_FUNCTION__); })); | |||
| 199 | if (i_s_ref_k) | |||
| 200 | *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))) = i_s_ref_k; | |||
| 201 | } | |||
| 202 | ccv_array_free(simplify->tensor_symbol_info[i].s_ref); | |||
| 203 | simplify->tensor_symbol_info[i].s_ref = 0; | |||
| 204 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(i))))->s_ref = 0; | |||
| 205 | for (j = 0; j < ref_s_ref->rnum; j++) | |||
| 206 | { | |||
| 207 | const int ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref )->rsize * (size_t)(j))) - 1; | |||
| 208 | if (ref_k >= 0) | |||
| 209 | { | |||
| 210 | ccv_nnc_symbolic_graph_t* const sub_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(simplify->graph->sub_graphs, j)((void*)(((char*)((simplify->graph->sub_graphs)->data )) + (size_t)(simplify->graph->sub_graphs)->rsize * ( size_t)(j))); | |||
| 211 | assert(sub_graph)((void) sizeof ((sub_graph) ? 1 : 0), __extension__ ({ if (sub_graph ) ; else __assert_fail ("sub_graph", "ccv_nnc_symbolic_graph_simplify.c" , 211, __extension__ __PRETTY_FUNCTION__); })); | |||
| 212 | // Update its p_ref. | |||
| 213 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, ref_k)((void*)(((char*)((sub_graph->tensor_symbol_info)->data )) + (size_t)(sub_graph->tensor_symbol_info)->rsize * ( size_t)(ref_k))))->p_ref = ref + 1; | |||
| 214 | } | |||
| 215 | } | |||
| 216 | } | |||
| 217 | assert(simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[i].s_ref == ( (ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify-> graph->tensor_symbol_info)->data)) + (size_t)(simplify-> graph->tensor_symbol_info)->rsize * (size_t)(i))))-> s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info [i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char* )((simplify->graph->tensor_symbol_info)->data)) + (size_t )(simplify->graph->tensor_symbol_info)->rsize * (size_t )(i))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref" , "ccv_nnc_symbolic_graph_simplify.c", 217, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 218 | assert(simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify-> graph->tensor_symbol_info)->data)) + (size_t)(simplify-> graph->tensor_symbol_info)->rsize * (size_t)(ref))))-> s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info [ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char *)((simplify->graph->tensor_symbol_info)->data)) + ( size_t)(simplify->graph->tensor_symbol_info)->rsize * (size_t)(ref))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref" , "ccv_nnc_symbolic_graph_simplify.c", 218, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 219 | } | |||
| 220 | } | |||
| 221 | // Going through refs that we are updating, going through its p_ref to make sure both are updated. | |||
| 222 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
| 223 | if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))) && simplify->tensor_symbol_info[i].p_ref) | |||
| 224 | { | |||
| 225 | const int ref = refs[i]; | |||
| 226 | const int p_ref = simplify->tensor_symbol_info[i].p_ref - 1; | |||
| 227 | assert(p_ref >= 0)((void) sizeof ((p_ref >= 0) ? 1 : 0), __extension__ ({ if (p_ref >= 0) ; else __assert_fail ("p_ref >= 0", "ccv_nnc_symbolic_graph_simplify.c" , 227, __extension__ __PRETTY_FUNCTION__); })); | |||
| 228 | assert(simplify->graph->p)((void) sizeof ((simplify->graph->p) ? 1 : 0), __extension__ ({ if (simplify->graph->p) ; else __assert_fail ("simplify->graph->p" , "ccv_nnc_symbolic_graph_simplify.c", 228, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 229 | ccv_array_t* const s_ref = ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->p->tensor_symbol_info, p_ref)((void*)(((char*)((simplify->graph->p->tensor_symbol_info )->data)) + (size_t)(simplify->graph->p->tensor_symbol_info )->rsize * (size_t)(p_ref))))->s_ref; | |||
| 230 | const int s_idx = simplify->graph->p_idx - 1; | |||
| 231 | assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if (s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_simplify.c" , 231, __extension__ __PRETTY_FUNCTION__); })); | |||
| 232 | assert(s_ref && s_ref->rnum > s_idx)((void) sizeof ((s_ref && s_ref->rnum > s_idx) ? 1 : 0), __extension__ ({ if (s_ref && s_ref->rnum > s_idx) ; else __assert_fail ("s_ref && s_ref->rnum > s_idx" , "ccv_nnc_symbolic_graph_simplify.c", 232, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 233 | *(int*)ccv_array_get(s_ref, s_idx)((void*)(((char*)((s_ref)->data)) + (size_t)(s_ref)->rsize * (size_t)(s_idx))) = ref + 1; // Update so it references to the new s_ref. | |||
| 234 | assert(!simplify->tensor_symbol_info[ref].p_ref)((void) sizeof ((!simplify->tensor_symbol_info[ref].p_ref) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [ref].p_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[ref].p_ref" , "ccv_nnc_symbolic_graph_simplify.c", 234, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 235 | simplify->tensor_symbol_info[ref].p_ref = p_ref + 1; | |||
| 236 | ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref)((void*)(((char*)((simplify->graph->tensor_symbol_info) ->data)) + (size_t)(simplify->graph->tensor_symbol_info )->rsize * (size_t)(ref))))->p_ref = p_ref + 1; | |||
| 237 | } | |||
| 238 | // Now go over exec to mark them as dead because we don't need these to generate refs. | |||
| 239 | if (output_exec_ref_dead) | |||
| 240 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
| 241 | if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f)))) | |||
| 242 | { | |||
| 243 | const int output_exec = simplify->output_execs[i]; | |||
| 244 | assert(output_exec >= 0)((void) sizeof ((output_exec >= 0) ? 1 : 0), __extension__ ({ if (output_exec >= 0) ; else __assert_fail ("output_exec >= 0" , "ccv_nnc_symbolic_graph_simplify.c", 244, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 245 | const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = simplify->exec_symbol_info + output_exec; | |||
| 246 | int flag = 0; | |||
| 247 | for (j = 0; !flag && j < symbol_info->output_size; j++) | |||
| 248 | { | |||
| 249 | const int d = symbol_info->outputs[j]; | |||
| 250 | if (d >= 0) | |||
| 251 | flag = (!(simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))); // If some of the output is not dead, we cannot proceed. | |||
| 252 | } | |||
| 253 | if (!flag) // If all outputs are dead, mark the exec as dead. | |||
| 254 | simplify->exec_dead[output_exec >> 5] |= (1u << (output_exec & 0x1f)); | |||
| 255 | } | |||
| 256 | int updated_refs = 0; | |||
| 257 | // Go over replace inputs / outputs. | |||
| 258 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
| 259 | for (i = 0; i < node->input_size; i++) | |||
| 260 | { | |||
| 261 | const int d = node->inputs[i]; | |||
| 262 | if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))) | |||
| 263 | { | |||
| 264 | node->inputs[i] = refs[d]; // It can be replaced. | |||
| 265 | updated_refs = 1; | |||
| 266 | } | |||
| 267 | } | |||
| 268 | for (i = 0; i < node->output_size; i++) | |||
| 269 | { | |||
| 270 | const int d = node->outputs[i]; | |||
| 271 | if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))) | |||
| 272 | { | |||
| 273 | node->outputs[i] = refs[d]; // It can be replaced. | |||
| 274 | updated_refs = 1; | |||
| 275 | } | |||
| 276 | } | |||
| 277 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(idx))))->inputs == node->inputs) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*) ((simplify->graph->exec_symbol_info)->data)) + (size_t )(simplify->graph->exec_symbol_info)->rsize * (size_t )(idx))))->inputs == node->inputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs" , "ccv_nnc_symbolic_graph_simplify.c", 277, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 278 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(idx))))->outputs == node->outputs) ? 1 : 0) , __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t*)(( void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))))->outputs == node->outputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs" , "ccv_nnc_symbolic_graph_simplify.c", 278, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 279 | } ccv_nnc_graph_visit_endfor} } | |||
| 280 | const ccv_nnc_graph_exec_symbol_info_t* const p_node_info = simplify->graph->p ? (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->p->exec_symbol_info, simplify->graph->exec_idx - 1)((void*)(((char*)((simplify->graph->p->exec_symbol_info )->data)) + (size_t)(simplify->graph->p->exec_symbol_info )->rsize * (size_t)(simplify->graph->exec_idx - 1))) : 0; | |||
| 281 | if (p_node_info && (p_node_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)) | |||
| 282 | // Go over the while inputs as well. | |||
| 283 | for (i = 0; i < p_node_info->p_while.input_size; i++) | |||
| 284 | { | |||
| 285 | const int d = p_node_info->p_while.inputs[i]; | |||
| 286 | if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))) | |||
| 287 | { | |||
| 288 | p_node_info->p_while.inputs[i] = refs[d]; | |||
| 289 | updated_refs = 1; | |||
| 290 | } | |||
| 291 | } | |||
| 292 | return updated_refs; | |||
| 293 | } | |||
| 294 | ||||
| 295 | // This is a simple common sub-expression elimination implementation, particularly, we only replace the later computed output | |||
| 296 | // with the identical earlier computed output, and let the "elimination" part to the graph pruning. | |||
| 297 | static void _ccv_nnc_symbolic_graph_common_subexpression_elimination(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
| 298 | { | |||
| 299 | _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify); | |||
| 300 | // tensor_hash starts with 0s, and it is either marked with the tensor index + 1, or the hash of the computations. | |||
| 301 | uint64_t* const tensor_hash = (uint64_t*)cccalloccalloc(simplify->tensor_symbol_info_size, sizeof(uint64_t)); | |||
| 302 | int i; | |||
| 303 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
| 304 | // If already marked as dead, skip. | |||
| 305 | if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
| 306 | continue; | |||
| 307 | for (i = 0; i < node->input_size; i++) | |||
| 308 | { | |||
| 309 | assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 309, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 310 | // If no hash for the input, use the index + 1 as the hash. | |||
| 311 | if (node->inputs[i] >= 0 && tensor_hash[node->inputs[i]] == 0) | |||
| 312 | tensor_hash[node->inputs[i]] = node->inputs[i] + 1; | |||
| 313 | } | |||
| 314 | // We cannot support graph / custom command (we cannot model them properly). | |||
| 315 | if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || | |||
| 316 | node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD || | |||
| 317 | node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD || | |||
| 318 | node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD || | |||
| 319 | // No need to process a opt disabled node. | |||
| 320 | (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT)) | |||
| 321 | continue; | |||
| 322 | uint64_t hashout, hashin[3]; | |||
| 323 | siphash((uint8_t*)&hashin[0], (const uint8_t*)&node->cmd.info, sizeof(node->cmd.info), key_siphash); | |||
| 324 | siphash((uint8_t*)&hashin[1], (const uint8_t*)&node->hint, sizeof(node->hint), key_siphash); | |||
| 325 | hashin[2] = node->cmd.cmd; // Now actually hash the cmd name. | |||
| 326 | siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash); | |||
| 327 | // First, hash the cmd and the hints with the cmd. | |||
| 328 | // Note on alias, we cannot really generate proper hash for alias (yet). Thus, just treat alias as normal tensors. | |||
| 329 | for (i = 0; i < node->input_size; i++) | |||
| 330 | { | |||
| 331 | assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 331, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 332 | if (node->inputs[i] >= 0) | |||
| 333 | { | |||
| 334 | // Hash using the tensor hash. | |||
| 335 | hashin[0] = hashout; | |||
| 336 | hashin[1] = i; // Encode the positional information. | |||
| 337 | hashin[2] = tensor_hash[node->inputs[i]]; | |||
| 338 | } else { | |||
| 339 | // Hash using the input integer (could be special integer). | |||
| 340 | hashin[0] = hashout; | |||
| 341 | hashin[1] = i; // Encode the positional information. | |||
| 342 | hashin[2] = node->inputs[i]; | |||
| 343 | } | |||
| 344 | siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash); | |||
| 345 | } | |||
| 346 | for (i = 0; i < node->output_size; i++) | |||
| 347 | if (node->outputs[i] >= 0) | |||
| 348 | { | |||
| 349 | assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 349, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 350 | // Assigning once, especially now we don't consider aliases. | |||
| 351 | assert(tensor_hash[node->outputs[i]] == 0)((void) sizeof ((tensor_hash[node->outputs[i]] == 0) ? 1 : 0), __extension__ ({ if (tensor_hash[node->outputs[i]] == 0) ; else __assert_fail ("tensor_hash[node->outputs[i]] == 0" , "ccv_nnc_symbolic_graph_simplify.c", 351, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 352 | hashin[0] = hashout; | |||
| 353 | hashin[1] = i; // Positional information. | |||
| 354 | siphash((uint8_t*)&hashin[2], (const uint8_t*)&simplify->tensor_symbol_info[node->outputs[i]].info, | |||
| 355 | sizeof(simplify->tensor_symbol_info[node->outputs[i]].info), key_siphash); | |||
| 356 | // Generate hash for the output. | |||
| 357 | siphash((uint8_t*)&tensor_hash[node->outputs[i]], (const uint8_t*)hashin, sizeof(hashin), key_siphash); | |||
| 358 | } | |||
| 359 | } ccv_nnc_graph_visit_endfor} } | |||
| 360 | // Allocate 3 / 2 as much space, for the simple robin-hood open address hash map. | |||
| 361 | const int map_size = (simplify->tensor_symbol_info_size * 3 + 1) / 2; | |||
| 362 | int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size); | |||
| 363 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
| 364 | refs[i] = -1; | |||
| 365 | ccv_nnc_cse_hash_t* const hash_map = (ccv_nnc_cse_hash_t*)cccalloccalloc(map_size, sizeof(ccv_nnc_cse_hash_t)); | |||
| 366 | // Now, all tensors are hashed, identify tensors with the same hash code, replace the ones that accessed later. | |||
| 367 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
| 368 | // If already marked as dead, skip. | |||
| 369 | if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
| 370 | continue; | |||
| 371 | // No need to visit a opt disabled node. | |||
| 372 | if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) | |||
| 373 | continue; | |||
| 374 | for (i = 0; i < node->input_size; i++) | |||
| 375 | if (node->inputs[i] >= 0) | |||
| 376 | { | |||
| 377 | const int d = node->inputs[i]; | |||
| 378 | assert(tensor_hash[d])((void) sizeof ((tensor_hash[d]) ? 1 : 0), __extension__ ({ if (tensor_hash[d]) ; else __assert_fail ("tensor_hash[d]", "ccv_nnc_symbolic_graph_simplify.c" , 378, __extension__ __PRETTY_FUNCTION__); })); | |||
| 379 | const int new_d = _ccv_nnc_cse_hash_find(hash_map, tensor_hash[d], map_size); | |||
| 380 | if (new_d >= 0 && new_d != d) | |||
| 381 | { | |||
| 382 | // Check whether this can be replaced. | |||
| 383 | assert(refs[d] == -1 || refs[d] == new_d)((void) sizeof ((refs[d] == -1 || refs[d] == new_d) ? 1 : 0), __extension__ ({ if (refs[d] == -1 || refs[d] == new_d) ; else __assert_fail ("refs[d] == -1 || refs[d] == new_d", "ccv_nnc_symbolic_graph_simplify.c" , 383, __extension__ __PRETTY_FUNCTION__); })); | |||
| 384 | assert(!simplify->tensor_symbol_info[d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 384, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 385 | assert(!simplify->tensor_symbol_info[d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].r_assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].r_assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 385, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 386 | assert(!simplify->tensor_symbol_info[d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].bypass_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].bypass_ref" , "ccv_nnc_symbolic_graph_simplify.c", 386, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 387 | assert(!simplify->tensor_symbol_info[new_d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [new_d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 387, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 388 | assert(!simplify->tensor_symbol_info[new_d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].r_assign_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [new_d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].r_assign_ref" , "ccv_nnc_symbolic_graph_simplify.c", 388, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 389 | assert(!simplify->tensor_symbol_info[new_d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].bypass_ref ) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info [new_d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].bypass_ref" , "ccv_nnc_symbolic_graph_simplify.c", 389, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 390 | // Ignore if there is a pair_ref (again, pair_ref has side effect that is deeper (using tape)) | |||
| 391 | if (simplify->tensor_symbol_info[d].pair_ref) | |||
| 392 | continue; | |||
| 393 | // If both have p_ref, we cannot merge. | |||
| 394 | if (simplify->tensor_symbol_info[d].p_ref && simplify->tensor_symbol_info[new_d].p_ref) | |||
| 395 | continue; | |||
| 396 | // Merge s_refs from ref[d] later. | |||
| 397 | if (refs[d] != new_d) | |||
| 398 | refs[d] = new_d; | |||
| 399 | assert(simplify->output_execs[new_d] >= 0)((void) sizeof ((simplify->output_execs[new_d] >= 0) ? 1 : 0), __extension__ ({ if (simplify->output_execs[new_d] >= 0) ; else __assert_fail ("simplify->output_execs[new_d] >= 0" , "ccv_nnc_symbolic_graph_simplify.c", 399, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 400 | // Establish new dependency. | |||
| 401 | ccv_nnc_graph_exec_symbol_concat(simplify->graph, (ccv_nnc_graph_exec_symbol_t){ | |||
| 402 | .d = simplify->output_execs[new_d], | |||
| 403 | .graph = simplify->graph, | |||
| 404 | }, (ccv_nnc_graph_exec_symbol_t){ | |||
| 405 | .d = idx, | |||
| 406 | .graph = simplify->graph, | |||
| 407 | }); | |||
| 408 | } | |||
| 409 | } | |||
| 410 | // We can reuse the input, but we cannot do that for output of these commands. | |||
| 411 | if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD || | |||
| 412 | node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD || | |||
| 413 | node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD || | |||
| 414 | node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD) | |||
| 415 | continue; | |||
| 416 | for (i = 0; i < node->output_size; i++) | |||
| 417 | if (node->outputs[i] >= 0) // This tensor can be reused by others. | |||
| 418 | _ccv_nnc_cse_hash_add(hash_map, tensor_hash[node->outputs[i]], node->outputs[i], map_size); | |||
| 419 | } ccv_nnc_graph_visit_endfor} } | |||
| 420 | _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 1 /* For these exec that generates refs, we don't need them any more. */); | |||
| 421 | ccfreefree(tensor_hash); | |||
| 422 | ccfreefree(hash_map); | |||
| 423 | ccfreefree(refs); | |||
| 424 | } | |||
| 425 | ||||
| 426 | static void _ccv_nnc_symbolic_graph_data_transfer_opt(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
| 427 | { | |||
| 428 | _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify); | |||
| 429 | uint32_t* const exec_dead = simplify->exec_dead; | |||
| 430 | const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = simplify->tensor_symbol_info; | |||
| 431 | int i; | |||
| 432 | uint32_t* const has_alias = cccalloccalloc(2 * ((simplify->tensor_symbol_info_size + 31) >> 5), sizeof(uint32_t)); | |||
| 433 | uint32_t* const has_binds = has_alias + ((simplify->tensor_symbol_info_size + 31) >> 5); | |||
| 434 | for (i = 0; i < bind_size; i++) | |||
| 435 | has_binds[binds[i].d >> 5] |= (1u << (binds[i].d & 0x1f)); | |||
| 436 | for (i = 0; i < output_size; i++) | |||
| 437 | has_binds[outputs[i].d >> 5] |= (1u << (outputs[i].d & 0x1f)); | |||
| 438 | int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size); | |||
| 439 | int updated_refs; | |||
| 440 | do { | |||
| 441 | memset(has_alias, 0, sizeof(uint32_t) * ((simplify->tensor_symbol_info_size + 31) >> 5)); | |||
| 442 | // Go through until no updates is possible. This won't result an infinite loop because every time, | |||
| 443 | // a tensor is eliminated. | |||
| 444 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
| 445 | { | |||
| 446 | refs[i] = -1; | |||
| 447 | if (tensor_symbol_info[i].alias_ref) | |||
| 448 | { | |||
| 449 | const int alias_ref = tensor_symbol_info[i].alias_ref - 1; | |||
| 450 | has_alias[alias_ref >> 5] |= (1u << (alias_ref & 0x1f)); | |||
| 451 | } | |||
| 452 | } | |||
| 453 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
| 454 | // If already marked as dead, skip. | |||
| 455 | if (exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
| 456 | continue; | |||
| 457 | if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD && | |||
| 458 | node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD && | |||
| 459 | // Conversion, if the datatype is the same, it is the data transfer op. | |||
| 460 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD && | |||
| 461 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD && | |||
| 462 | // Format transform, if the format is the same, it is the data transfer op. | |||
| 463 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD && | |||
| 464 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD) | |||
| 465 | continue; | |||
| 466 | if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) // If optimization pass disabled, skip. | |||
| 467 | continue; | |||
| 468 | for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size. | |||
| 469 | if (node->inputs[i] >= 0 && node->outputs[i] >= 0) | |||
| 470 | { | |||
| 471 | assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 471, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 472 | assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify ->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 472, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 473 | int input_ref = node->inputs[i]; | |||
| 474 | while (refs[input_ref] >= 0) | |||
| 475 | input_ref = refs[input_ref]; | |||
| 476 | int output_ref = node->outputs[i]; | |||
| 477 | while (refs[output_ref] >= 0) | |||
| 478 | output_ref = refs[output_ref]; | |||
| 479 | if (input_ref == output_ref) | |||
| 480 | continue; | |||
| 481 | const ccv_nnc_tensor_symbol_info_t* const input = tensor_symbol_info + input_ref; | |||
| 482 | const ccv_nnc_tensor_symbol_info_t* const output = tensor_symbol_info + output_ref; | |||
| 483 | // If they are not the same data type, skip. (Likely data conversion op). | |||
| 484 | if (input->info.datatype != output->info.datatype) | |||
| 485 | continue; | |||
| 486 | // If they are not the same format, skip. (Likely format transform op). | |||
| 487 | if (input->info.format != output->info.format) | |||
| 488 | continue; | |||
| 489 | // If they are not on the same device (even for NUMA), skip. | |||
| 490 | if (input->info.type != output->info.type) | |||
| 491 | continue; | |||
| 492 | // If both are alias, we cannot consolidate this. | |||
| 493 | if (input->alias_ref && output->alias_ref) | |||
| 494 | continue; | |||
| 495 | // If input is alias, and output has alias reference to it, output cannot be the same as input. | |||
| 496 | if (input->alias_ref && (has_alias[output_ref >> 5] & (1u << (output_ref & 0x1f)))) | |||
| 497 | continue; | |||
| 498 | // If output is alias, and input has alias reference to it, input cannot be the same as output. | |||
| 499 | if (output->alias_ref && (has_alias[input_ref >> 5] & (1u << (input_ref & 0x1f)))) | |||
| 500 | continue; | |||
| 501 | // If either are carry overs (for while), we cannot do anything. | |||
| 502 | if (input->assign_ref || output->assign_ref || | |||
| 503 | input->r_assign_ref || output->r_assign_ref) | |||
| 504 | continue; | |||
| 505 | // If either are bypasses (for case..of), we cannot do anything. | |||
| 506 | if (input->bypass_ref || output->bypass_ref || | |||
| 507 | input->r_bypass_ref || output->r_bypass_ref) | |||
| 508 | continue; | |||
| 509 | // If either are inputs / outputs connecting the parent graph, we cannot do anything. | |||
| 510 | if (input->p_ref || output->p_ref) | |||
| 511 | continue; | |||
| 512 | // If the type is the same, check which one is the alias. | |||
| 513 | // We always prefer alias. | |||
| 514 | if (output->alias_ref) | |||
| 515 | { | |||
| 516 | // Input cannot be binds. | |||
| 517 | if (has_binds[input_ref >> 5] & (1u << (input_ref & 0x1f))) | |||
| 518 | continue; | |||
| 519 | refs[input_ref] = output_ref; | |||
| 520 | } else { // if (input->alias_ref), else | |||
| 521 | // Output cannot be binds. | |||
| 522 | if (has_binds[output_ref >> 5] & (1u << (output_ref & 0x1f))) | |||
| 523 | continue; | |||
| 524 | refs[output_ref] = input_ref; | |||
| 525 | } | |||
| 526 | } | |||
| 527 | } ccv_nnc_graph_visit_endfor} } | |||
| 528 | // Make sure refs reference to the end. | |||
| 529 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
| 530 | if (refs[i] >= 0) | |||
| 531 | { | |||
| 532 | int ref = refs[i]; | |||
| 533 | while (refs[ref] >= 0) | |||
| 534 | ref = refs[ref]; | |||
| 535 | refs[i] = ref; | |||
| 536 | } | |||
| 537 | updated_refs = _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 0 /* We still need these exec that generates the refs. */); | |||
| 538 | } while (updated_refs); | |||
| 539 | ccfreefree(refs); | |||
| 540 | ccfreefree(has_alias); | |||
| 541 | // Now, all references updated, remove data transfers that sources and destinations are the same. | |||
| 542 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
| 543 | // If already marked as dead, skip. | |||
| 544 | if (exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
| 545 | continue; | |||
| 546 | if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD && | |||
| 547 | node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD && | |||
| 548 | // Conversion, if the datatype is the same, it is the data transfer op. | |||
| 549 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD && | |||
| 550 | node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD && | |||
| 551 | // Format transform, if the format is the same, it is the data transfer op. | |||
| 552 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD && | |||
| 553 | node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD) | |||
| 554 | continue; | |||
| 555 | for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size. | |||
| 556 | if (node->inputs[i] == node->outputs[i]) | |||
| 557 | { | |||
| 558 | if (i + 1 < node->output_size) | |||
| 559 | { | |||
| 560 | node->inputs[i] = node->inputs[node->output_size - 1]; | |||
| 561 | node->outputs[i] = node->outputs[node->output_size - 1]; | |||
| 562 | } | |||
| 563 | --node->output_size; | |||
| 564 | --i; | |||
| 565 | } | |||
| 566 | node->input_size = node->output_size; | |||
| 567 | ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))))->input_size = node->input_size; | |||
| 568 | ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))))->output_size = node->output_size; | |||
| 569 | // Remove this data transfer node if it has no outputs. | |||
| 570 | if (node->output_size == 0) | |||
| 571 | exec_dead[idx >> 5] |= (1u << (idx & 0x1f)); | |||
| 572 | } ccv_nnc_graph_visit_endfor} } | |||
| 573 | } | |||
| 574 | ||||
| 575 | typedef struct { | |||
| 576 | uint32_t fused_op; // The final fused op id. | |||
| 577 | uint32_t ops_seq[2]; // The sequence of commands to identify. | |||
| 578 | int ops_seq_size; | |||
| 579 | int ops_info_select; // Which ops' info will be selected. | |||
| 580 | struct { | |||
| 581 | int type; // Whether it is input, or output. It doesn't make sense for example, input in ops_seq, but output in fused_op. | |||
| 582 | int op_idx; // Index into the ops_seq. | |||
| 583 | int from; // The index in ops_seq. | |||
| 584 | int to; // The index in fused_op. | |||
| 585 | } pos[4]; // maps of positions from ops seq to fused_op for inputs (outputs). | |||
| 586 | int pos_size; | |||
| 587 | } ccv_nnc_ops_fusion_t; | |||
| 588 | ||||
| 589 | enum { | |||
| 590 | CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
| 591 | CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
| 592 | }; | |||
| 593 | ||||
| 594 | const static int ccv_nnc_ops_fusion_io_size = 2; | |||
| 595 | const static ccv_nnc_ops_fusion_t ccv_nnc_ops_fusions[] = { | |||
| 596 | { | |||
| 597 | .fused_op = CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD, | |||
| 598 | .ops_seq = { | |||
| 599 | CCV_NNC_SOFTMAX_FORWARD, CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, | |||
| 600 | }, | |||
| 601 | .ops_seq_size = 2, | |||
| 602 | .ops_info_select = 1, | |||
| 603 | .pos = { | |||
| 604 | { | |||
| 605 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
| 606 | .op_idx = 0, | |||
| 607 | .from = 0, | |||
| 608 | .to = 0, | |||
| 609 | }, | |||
| 610 | { | |||
| 611 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
| 612 | .op_idx = 1, | |||
| 613 | .from = 1, | |||
| 614 | .to = 1, | |||
| 615 | }, | |||
| 616 | { | |||
| 617 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
| 618 | .op_idx = 0, | |||
| 619 | .from = 0, | |||
| 620 | .to = 1, | |||
| 621 | }, | |||
| 622 | { | |||
| 623 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
| 624 | .op_idx = 1, | |||
| 625 | .from = 0, | |||
| 626 | .to = 0, | |||
| 627 | }, | |||
| 628 | }, | |||
| 629 | .pos_size = 4, | |||
| 630 | }, | |||
| 631 | { | |||
| 632 | .fused_op = CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD, | |||
| 633 | .ops_seq = { | |||
| 634 | CCV_NNC_SIGMOID_FORWARD, CCV_NNC_BINARY_CROSSENTROPY_FORWARD, | |||
| 635 | }, | |||
| 636 | .ops_seq_size = 2, | |||
| 637 | .ops_info_select = 1, | |||
| 638 | .pos = { | |||
| 639 | { | |||
| 640 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
| 641 | .op_idx = 0, | |||
| 642 | .from = 0, | |||
| 643 | .to = 0, | |||
| 644 | }, | |||
| 645 | { | |||
| 646 | .type = CCV_NNC_OPS_FUSION_INPUT_INDEX, | |||
| 647 | .op_idx = 1, | |||
| 648 | .from = 1, | |||
| 649 | .to = 1, | |||
| 650 | }, | |||
| 651 | { | |||
| 652 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
| 653 | .op_idx = 0, | |||
| 654 | .from = 0, | |||
| 655 | .to = 1, | |||
| 656 | }, | |||
| 657 | { | |||
| 658 | .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX, | |||
| 659 | .op_idx = 1, | |||
| 660 | .from = 0, | |||
| 661 | .to = 0, | |||
| 662 | }, | |||
| 663 | }, | |||
| 664 | .pos_size = 4, | |||
| 665 | } | |||
| 666 | }; | |||
| 667 | ||||
| 668 | static int _ccv_nnc_find_ops_for_fusion(const ccv_nnc_ops_fusion_t* const fusion, const int ops_idx, const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const uint32_t* const exec_dead, const int exec_idx, int* const fusing_exec_symbols) | |||
| 669 | { | |||
| 670 | if (exec_dead[exec_idx >> 5] & (1u << (exec_idx & 0x1f))) | |||
| 671 | return 0; | |||
| 672 | const ccv_nnc_graph_exec_symbol_info_t* const node = exec_symbol_info + exec_idx; | |||
| 673 | // Doesn't match the ops_seq, return 0. | |||
| 674 | if (fusion->ops_seq[ops_idx] != node->cmd.cmd) | |||
| 675 | return 0; | |||
| 676 | fusing_exec_symbols[ops_idx] = exec_idx; | |||
| 677 | // If already reached the end, we are good. | |||
| 678 | if (ops_idx == fusion->ops_seq_size - 1) | |||
| 679 | return 1; | |||
| 680 | // Otherwise, we need to go on, but don't have any to follow-up. | |||
| 681 | if (!node->outgoings || !node->outgoings->rnum) | |||
| 682 | return 0; | |||
| 683 | int i; | |||
| 684 | for (i = 0; i < node->outgoings->rnum; i++) | |||
| 685 | if (_ccv_nnc_find_ops_for_fusion(fusion, ops_idx + 1, exec_symbol_info, exec_dead, *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)( node->outgoings)->rsize * (size_t)(i))), fusing_exec_symbols)) | |||
| 686 | return 1; | |||
| 687 | return 0; | |||
| 688 | } | |||
| 689 | ||||
| 690 | static void _ccv_nnc_symbolic_graph_ops_fusion(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
| 691 | { | |||
| 692 | uint32_t* const exec_dead = simplify->exec_dead; | |||
| 693 | ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info; | |||
| 694 | int i, j; | |||
| 695 | int fusing_exec_symbols[sizeof(ccv_nnc_ops_fusions->ops_seq)]; | |||
| 696 | int fused_inputs[ccv_nnc_ops_fusion_io_size]; // 2 is just a number based on the ops_fusion. | |||
| 697 | int fused_outputs[ccv_nnc_ops_fusion_io_size]; | |||
| 698 | ccv_nnc_graph_visit_for(simplify->visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (exec_symbol_info)) const node __attribute__((unused)) = (exec_symbol_info ) + idx; { | |||
| 699 | // If already marked as dead, skip. | |||
| 700 | if (exec_dead[idx >> 5] & (1u << (idx & 0x1f))) | |||
| 701 | continue; | |||
| 702 | if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) // If optimization pass disabled, skip. | |||
| 703 | continue; | |||
| 704 | // Run through rudimentary pattern matching for ops_seq. There are better ways to do this (immediately come to mind, Boyer-Moore). However, this is simpler to code. | |||
| 705 | // If I am running into performance issues with this, I would be very happy. | |||
| 706 | for (i = 0; i < sizeof(ccv_nnc_ops_fusions) / sizeof(ccv_nnc_ops_fusion_t); i++) | |||
| 707 | { | |||
| 708 | const ccv_nnc_ops_fusion_t* const ops_fusion = ccv_nnc_ops_fusions + i; | |||
| 709 | // Check to see if a list of symbols are possible. | |||
| 710 | if (ops_fusion->ops_seq[0] == node->cmd.cmd && | |||
| 711 | _ccv_nnc_find_ops_for_fusion(ops_fusion, 0, exec_symbol_info, exec_dead, idx, fusing_exec_symbols)) | |||
| 712 | { | |||
| 713 | // Go through all the inputs and outputs, check if they exists and are mapped. | |||
| 714 | // TODO: the mapping can be more sophisticated than what we have here. | |||
| 715 | // Also, I need to check if some inputs / outputs cannot be mapped, then we cannot continue. | |||
| 716 | for (j = 0; j < ccv_nnc_ops_fusion_io_size; j++) | |||
| 717 | fused_inputs[j] = fused_outputs[j] = CCV_NNC_NO_TENSOR_SYMBOL; | |||
| 718 | int input_size = 0, output_size = 0; | |||
| 719 | for (j = 0; j < ops_fusion->pos_size; j++) | |||
| 720 | { | |||
| 721 | ccv_nnc_graph_exec_symbol_info_t* const fusing_op = exec_symbol_info + fusing_exec_symbols[ops_fusion->pos[j].op_idx]; | |||
| 722 | switch (ops_fusion->pos[j].type) | |||
| 723 | { | |||
| 724 | case CCV_NNC_OPS_FUSION_INPUT_INDEX: | |||
| 725 | fused_inputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->input_size ? fusing_op->inputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL; | |||
| 726 | input_size = ccv_max(input_size, ops_fusion->pos[j].to + 1)({ typeof (input_size) _a = (input_size); typeof (ops_fusion-> pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a > _b ) ? _a : _b; }); | |||
| 727 | break; | |||
| 728 | case CCV_NNC_OPS_FUSION_OUTPUT_INDEX: | |||
| 729 | fused_outputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->output_size ? fusing_op->outputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL; | |||
| 730 | output_size = ccv_max(output_size, ops_fusion->pos[j].to + 1)({ typeof (output_size) _a = (output_size); typeof (ops_fusion ->pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a > _b) ? _a : _b; }); | |||
| 731 | break; | |||
| 732 | } | |||
| 733 | } | |||
| 734 | const ccv_nnc_cmd_param_t info = exec_symbol_info[fusing_exec_symbols[ops_fusion->ops_info_select]].cmd.info; | |||
| 735 | // Modify the first node so it is the correct type and value. I need to look back to the actual graph to get the info. | |||
| 736 | ccv_nnc_graph_exec_symbol_info_t* const actual_node = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)-> data)) + (size_t)(simplify->graph->exec_symbol_info)-> rsize * (size_t)(idx))); | |||
| 737 | actual_node->cmd.cmd = node->cmd.cmd = ops_fusion->fused_op; | |||
| 738 | actual_node->cmd.info = node->cmd.info = info; | |||
| 739 | if (node->input_size + node->output_size < input_size + output_size) | |||
| 740 | actual_node->inputs = node->inputs = node->inputs ? ccreallocrealloc(node->inputs, sizeof(int) * (input_size + output_size)) : ccmallocmalloc(sizeof(int) * (input_size + output_size)); | |||
| 741 | actual_node->outputs = node->outputs = node->inputs + input_size; | |||
| 742 | actual_node->input_size = node->input_size = input_size; | |||
| 743 | actual_node->output_size = node->output_size = output_size; | |||
| 744 | memcpy(node->inputs, fused_inputs, sizeof(int) * input_size); | |||
| 745 | memcpy(node->outputs, fused_outputs, sizeof(int) * output_size); | |||
| 746 | // Afterwards, mark the rest as dead. | |||
| 747 | for (j = 1; j < ops_fusion->ops_seq_size; j++) | |||
| 748 | exec_dead[fusing_exec_symbols[j] >> 5] |= (1u << (fusing_exec_symbols[j] & 0x1f)); | |||
| 749 | break; | |||
| 750 | } | |||
| 751 | } | |||
| 752 | } ccv_nnc_graph_visit_endfor} } | |||
| 753 | } | |||
| 754 | ||||
| 755 | static void _ccv_nnc_symbolic_graph_pruning_undead_exec(ccv_nnc_symbolic_graph_simplify_t* const simplify, const int exec_idx, uint32_t* const tensor_visited, ccv_array_t* const next) | |||
| 756 | { | |||
| 757 | assert(exec_idx >= 0)((void) sizeof ((exec_idx >= 0) ? 1 : 0), __extension__ ({ if (exec_idx >= 0) ; else __assert_fail ("exec_idx >= 0" , "ccv_nnc_symbolic_graph_simplify.c", 757, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 758 | uint32_t* const exec_dead = simplify->exec_dead; | |||
| 759 | uint32_t* const tensor_dead = simplify->tensor_dead; | |||
| 760 | exec_dead[exec_idx >> 5] &= ~(1u << (exec_idx & 0x1f)); // Undead the exec. | |||
| 761 | ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info + exec_idx; | |||
| 762 | int i; | |||
| 763 | if (exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_FORWARD || | |||
| 764 | exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_BACKWARD || | |||
| 765 | exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_FORWARD || | |||
| 766 | exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD) | |||
| 767 | { | |||
| 768 | // All of its inputs / outputs need to be undead for these commands. | |||
| 769 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
| 770 | { | |||
| 771 | const int d = exec_symbol_info->inputs[i]; | |||
| 772 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
| 773 | { | |||
| 774 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
| 775 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
| 776 | } | |||
| 777 | } | |||
| 778 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
| 779 | { | |||
| 780 | const int d = exec_symbol_info->outputs[i]; | |||
| 781 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
| 782 | { | |||
| 783 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
| 784 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
| 785 | } | |||
| 786 | } | |||
| 787 | return; | |||
| 788 | } | |||
| 789 | // Go through the input / output, to make sure that all of them can be available. | |||
| 790 | const int input_bitmask_size = (exec_symbol_info->input_size + 63) >> 6; | |||
| 791 | const int output_bitmask_size = (exec_symbol_info->output_size + 63) >> 6; | |||
| 792 | uint64_t input_bitmasks[ccv_max(1, input_bitmask_size)({ typeof (1) _a = (1); typeof (input_bitmask_size) _b = (input_bitmask_size ); (_a > _b) ? _a : _b; })]; | |||
| 793 | for (i = 0; i
| |||
| 794 | input_bitmasks[i] = 0; | |||
| 795 | uint64_t output_bitmasks[ccv_max(1, output_bitmask_size)({ typeof (1) _a = (1); typeof (output_bitmask_size) _b = (output_bitmask_size ); (_a > _b) ? _a : _b; })]; | |||
| 796 | for (i = 0; i
| |||
| 797 | output_bitmasks[i] = 0; | |||
| 798 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
| 799 | if (exec_symbol_info->inputs[i] >= 0) | |||
| 800 | input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| ||||
| 801 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
| 802 | if (exec_symbol_info->outputs[i] >= 0) | |||
| 803 | output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| 804 | // First, mark everything with bitmasks, and verify it works. | |||
| 805 | assert(ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))((void) sizeof ((ccv_nnc_cmd_bitmask(exec_symbol_info->cmd , exec_symbol_info->input_size, exec_symbol_info->output_size , input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size )) ? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_bitmask(exec_symbol_info ->cmd, exec_symbol_info->input_size, exec_symbol_info-> output_size, input_bitmasks, input_bitmask_size, output_bitmasks , output_bitmask_size)) ; else __assert_fail ("ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)" , "ccv_nnc_symbolic_graph_simplify.c", 805, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 806 | int flag; | |||
| 807 | do { | |||
| 808 | flag = 0; | |||
| 809 | // Try to eliminate one at a time. Go over output first. | |||
| 810 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
| 811 | { | |||
| 812 | const int d = exec_symbol_info->outputs[i]; | |||
| 813 | // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all. | |||
| 814 | if (d >= 0 && (tensor_dead[d >> 5] & (1u << (d & 0x1f))) && | |||
| 815 | (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))) | |||
| 816 | { | |||
| 817 | output_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
| 818 | if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)) | |||
| 819 | flag = 1; | |||
| 820 | else // Reset the bitmask. | |||
| 821 | output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| 822 | } | |||
| 823 | } | |||
| 824 | // Only check if there are inputs we can remove if it is backward. | |||
| 825 | if (!ccv_nnc_cmd_is_forward(exec_symbol_info->cmd)) | |||
| 826 | // For inputs, no matter if it s dead or not, we try to limit our input to the smallest number. | |||
| 827 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
| 828 | { | |||
| 829 | const int d = exec_symbol_info->inputs[i]; | |||
| 830 | // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all. | |||
| 831 | if (d >= 0 && (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))) | |||
| 832 | { | |||
| 833 | input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63)); | |||
| 834 | if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)) | |||
| 835 | flag = 1; | |||
| 836 | else // Reset the bitmask. | |||
| 837 | input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63)); | |||
| 838 | } | |||
| 839 | } | |||
| 840 | } while (flag); | |||
| 841 | // Now we know which one to keep, which one to undead. | |||
| 842 | for (i = 0; i < exec_symbol_info->input_size; i++) | |||
| 843 | if (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
| 844 | { | |||
| 845 | const int d = exec_symbol_info->inputs[i]; | |||
| 846 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
| 847 | { | |||
| 848 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
| 849 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
| 850 | } | |||
| 851 | } else { | |||
| 852 | // Clean up the inputs. | |||
| 853 | exec_symbol_info->inputs[i] = CCV_NNC_NO_TENSOR_SYMBOL; | |||
| 854 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(exec_idx))))->inputs == exec_symbol_info->inputs ) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((simplify->graph->exec_symbol_info) ->data)) + (size_t)(simplify->graph->exec_symbol_info )->rsize * (size_t)(exec_idx))))->inputs == exec_symbol_info ->inputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs" , "ccv_nnc_symbolic_graph_simplify.c", 854, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 855 | } | |||
| 856 | for (i = 0; i < exec_symbol_info->output_size; i++) | |||
| 857 | if (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))) | |||
| 858 | { | |||
| 859 | const int d = exec_symbol_info->outputs[i]; | |||
| 860 | if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f)))) | |||
| 861 | { | |||
| 862 | ccv_array_push(next, &d); // Push to the next round to be undead. | |||
| 863 | tensor_visited[d >> 5] |= (1u << (d & 0x1f)); | |||
| 864 | } | |||
| 865 | } else { | |||
| 866 | // Clean up the outputs. | |||
| 867 | exec_symbol_info->outputs[i] = CCV_NNC_NO_TENSOR_SYMBOL; | |||
| 868 | assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*) (((char*)((simplify->graph->exec_symbol_info)->data) ) + (size_t)(simplify->graph->exec_symbol_info)->rsize * (size_t)(exec_idx))))->outputs == exec_symbol_info-> outputs) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t *)((void*)(((char*)((simplify->graph->exec_symbol_info) ->data)) + (size_t)(simplify->graph->exec_symbol_info )->rsize * (size_t)(exec_idx))))->outputs == exec_symbol_info ->outputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs" , "ccv_nnc_symbolic_graph_simplify.c", 868, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 869 | } | |||
| 870 | } | |||
| 871 | ||||
| 872 | static void _ccv_nnc_symbolic_graph_pruning(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size) | |||
| 873 | { | |||
| 874 | uint32_t* const tensor_visited = (uint32_t*)cccalloccalloc(sizeof(uint32_t), (simplify->tensor_symbol_info_size + 31) >> 5); | |||
| 875 | ccv_array_t* const preserve[2] = { | |||
| 876 | ccv_array_new(sizeof(int), output_size, 0), | |||
| 877 | ccv_array_new(sizeof(int), 0, 0), | |||
| 878 | }; | |||
| 879 | int i, j; | |||
| 880 | ccv_array_t** const r_alias_refs = (ccv_array_t**)cccalloccalloc(sizeof(ccv_array_t*), simplify->tensor_symbol_info_size); | |||
| 881 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
| 882 | if (simplify->tensor_symbol_info[i].alias_ref) | |||
| 883 | { | |||
| 884 | const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1; | |||
| 885 | assert(alias_ref < simplify->tensor_symbol_info_size)((void) sizeof ((alias_ref < simplify->tensor_symbol_info_size ) ? 1 : 0), __extension__ ({ if (alias_ref < simplify-> tensor_symbol_info_size) ; else __assert_fail ("alias_ref < simplify->tensor_symbol_info_size" , "ccv_nnc_symbolic_graph_simplify.c", 885, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 886 | if (!r_alias_refs[alias_ref]) | |||
| 887 | r_alias_refs[alias_ref] = ccv_array_new(sizeof(int), 1, 0); | |||
| 888 | ccv_array_push(r_alias_refs[alias_ref], &i); | |||
| 889 | } | |||
| 890 | uint32_t* const exec_dead = simplify->exec_dead; | |||
| 891 | uint32_t* const tensor_dead = simplify->tensor_dead; | |||
| 892 | int* const output_execs = simplify->output_execs; | |||
| 893 | _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify); | |||
| 894 | // Mark everything visited as dead. | |||
| 895 | ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size ; _i_++) { const int idx __attribute__((unused)) = (simplify-> visit)->node[_i_].index; const int _node_unused_ __attribute__ ((unused)) = (simplify->visit)->node[_i_].term; typeof ( (simplify->exec_symbol_info)) const node __attribute__((unused )) = (simplify->exec_symbol_info) + idx; { | |||
| 896 | if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) // If optimization pass disabled, skip. | |||
| 897 | continue; | |||
| 898 | exec_dead[idx >> 5] |= (1u << (idx & 0x1f)); | |||
| 899 | for (i = 0; i < node->input_size; i++) | |||
| 900 | { | |||
| 901 | const int d = node->inputs[i]; | |||
| 902 | if (d >= 0) | |||
| 903 | tensor_dead[d >> 5] |= (1u << (d & 0x1f)); | |||
| 904 | } | |||
| 905 | for (i = 0; i < node->output_size; i++) | |||
| 906 | { | |||
| 907 | const int d = node->outputs[i]; | |||
| 908 | if (d >= 0) | |||
| 909 | tensor_dead[d >> 5] |= (1u << (d & 0x1f)); | |||
| 910 | } | |||
| 911 | } ccv_nnc_graph_visit_endfor} } | |||
| 912 | // If the tensor symbol is used by other exec that is not visited, unmark it. | |||
| 913 | for (i = 0; i < simplify->exec_symbol_info_size; i++) | |||
| 914 | { | |||
| 915 | if (exec_dead[i >> 5] & (1u << (i & 0x1f))) | |||
| 916 | continue; | |||
| 917 | const ccv_nnc_graph_exec_symbol_info_t* const node = simplify->exec_symbol_info + i; | |||
| 918 | for (j = 0; j < node->input_size; j++) | |||
| 919 | { | |||
| 920 | const int d = node->inputs[j]; | |||
| 921 | // Undead it. | |||
| 922 | if (d >= 0) | |||
| 923 | tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
| 924 | } | |||
| 925 | for (j = 0; j < node->output_size; j++) | |||
| 926 | { | |||
| 927 | const int d = node->outputs[j]; | |||
| 928 | // Undead it. | |||
| 929 | if (d >= 0) | |||
| 930 | tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
| 931 | } | |||
| 932 | } | |||
| 933 | for (i = 0; i < output_size; i++) | |||
| 934 | ccv_array_push(preserve[0], &outputs[i].d); | |||
| 935 | int p = 0, q = 1; | |||
| 936 | // BFS to mark execs / tensors as not dead. | |||
| 937 | while (preserve[p]->rnum > 0) | |||
| 938 | { | |||
| 939 | ccv_array_clear(preserve[q]); | |||
| 940 | // First, undead all relevant tensors. | |||
| 941 | for (i = 0; i
| |||
| 942 | { | |||
| 943 | const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve [p])->rsize * (size_t)(i))); | |||
| 944 | // Undead the outputs. | |||
| 945 | tensor_dead[d >> 5] &= ~(1u << (d & 0x1f)); | |||
| 946 | int alias_ref = d; | |||
| 947 | if (simplify->tensor_symbol_info[d].alias_ref) | |||
| 948 | { | |||
| 949 | alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1; | |||
| 950 | tensor_dead[alias_ref >> 5] &= ~(1u << (alias_ref & 0x1f)); | |||
| 951 | assert(r_alias_refs[alias_ref])((void) sizeof ((r_alias_refs[alias_ref]) ? 1 : 0), __extension__ ({ if (r_alias_refs[alias_ref]) ; else __assert_fail ("r_alias_refs[alias_ref]" , "ccv_nnc_symbolic_graph_simplify.c", 951, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 952 | } | |||
| 953 | if (r_alias_refs[alias_ref]) | |||
| 954 | for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++) | |||
| 955 | { | |||
| 956 | const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t )(r_alias_refs[alias_ref])->rsize * (size_t)(j))); | |||
| 957 | if (output_execs[b] >= 0) // Only revive if it is written alias. | |||
| 958 | tensor_dead[b >> 5] &= ~(1u << (b & 0x1f)); | |||
| 959 | } | |||
| 960 | } | |||
| 961 | for (i = 0; i < preserve[p]->rnum; i++) | |||
| 962 | { | |||
| 963 | const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve [p])->rsize * (size_t)(i))); | |||
| 964 | const int output_exec = output_execs[d]; | |||
| 965 | // Undead the exec. | |||
| 966 | if (output_exec >= 0) | |||
| 967 | _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]); | |||
| 968 | int alias_ref = d; | |||
| 969 | if (simplify->tensor_symbol_info[d].alias_ref) | |||
| 970 | { | |||
| 971 | alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1; | |||
| 972 | const int output_exec = output_execs[alias_ref]; | |||
| 973 | if (output_exec >= 0) | |||
| 974 | _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]); | |||
| 975 | } | |||
| 976 | if (r_alias_refs[alias_ref]) | |||
| 977 | for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++) | |||
| 978 | { | |||
| 979 | const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t )(r_alias_refs[alias_ref])->rsize * (size_t)(j))); | |||
| 980 | const int output_exec = output_execs[b]; | |||
| 981 | if (output_exec >= 0) | |||
| 982 | _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]); | |||
| 983 | } | |||
| 984 | } | |||
| 985 | CCV_SWAP(p, q, i)((i) = (p), (p) = (q), (q) = (i)); | |||
| 986 | } | |||
| 987 | ccfreefree(tensor_visited); | |||
| 988 | ccv_array_free(preserve[0]); | |||
| 989 | ccv_array_free(preserve[1]); | |||
| 990 | for (i = 0; i < simplify->tensor_symbol_info_size; i++) | |||
| 991 | if (r_alias_refs[i]) | |||
| 992 | ccv_array_free(r_alias_refs[i]); | |||
| 993 | ccfreefree(r_alias_refs); | |||
| 994 | } | |||
| 995 | ||||
| 996 | void ccv_nnc_symbolic_graph_simplify(ccv_nnc_symbolic_graph_t* const graph, const int* const passes, const int pass_size, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size) | |||
| 997 | { | |||
| 998 | ccv_nnc_symbolic_graph_simplify_t* const simplify = _ccv_nnc_symbolic_graph_simplify_new(graph, sources, source_size, destinations, destination_size); | |||
| 999 | int i; | |||
| 1000 | for (i = 0; i < pass_size; i++) | |||
| ||||
| 1001 | switch (passes[i]) | |||
| 1002 | { | |||
| 1003 | case CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION: | |||
| 1004 | _ccv_nnc_symbolic_graph_common_subexpression_elimination(simplify, outputs, output_size); | |||
| 1005 | break; | |||
| 1006 | case CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT: | |||
| 1007 | _ccv_nnc_symbolic_graph_data_transfer_opt(simplify, binds, bind_size, outputs, output_size); | |||
| 1008 | break; | |||
| 1009 | case CCV_NNC_SIMPLIFY_GRAPH_PRUNING: | |||
| 1010 | _ccv_nnc_symbolic_graph_pruning(simplify, outputs, output_size); | |||
| 1011 | break; | |||
| 1012 | case CCV_NNC_SIMPLIFY_OPS_FUSION: | |||
| 1013 | _ccv_nnc_symbolic_graph_ops_fusion(simplify, outputs, output_size); | |||
| 1014 | break; | |||
| 1015 | } | |||
| 1016 | _ccv_nnc_symbolic_graph_simplify_apply(simplify); | |||
| 1017 | _ccv_nnc_symbolic_graph_simplify_free(simplify); | |||
| 1018 | } |