Bug Summary

File:nnc/ccv_nnc_symbolic_graph_simplify.c
Warning:line 776, column 27
The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_symbolic_graph_simplify.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -resource-dir /usr/local/lib/clang/14.0.6 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/14.0.6/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/9/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fblocks -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/buildslave/public_html/analyze/2022-07-27-230511-338087-1 -x c ccv_nnc_symbolic_graph_simplify.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_symbolic_graph.h"
6#include "3rdparty/siphash/siphash24.h"
7
8// MARK - Level-3.5 API
9
10static uint8_t key_siphash[16] = "graphcsekvlibnnc";
11
12typedef struct {
13 int tensor_symbol_info_size;
14 int exec_symbol_info_size;
15 ccv_nnc_symbolic_graph_t* graph;
16 ccv_nnc_graph_visit_t* visit;
17 ccv_nnc_tensor_symbol_info_t* tensor_symbol_info;
18 ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info;
19 uint32_t* exec_dead; // Mark a exec is dead and need to be cleared, each bit represent a exec.
20 uint32_t* tensor_dead; // Mark a tensor is dead and need to be cleared, each bit represent a tensor.
21 int* output_execs; // Mapping from tensor to the exec that generates this tensor.
22} ccv_nnc_symbolic_graph_simplify_t;
23
24static void _ccv_nnc_symbolic_graph_simplify_update_output_execs(ccv_nnc_symbolic_graph_simplify_t* const simplify)
25{
26 int i;
27 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
28 simplify->output_execs[i] = -1;
29 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
30 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
31 continue;
32 for (i = 0; i < node->output_size; i++)
33 if (node->outputs[i] >= 0)
34 simplify->output_execs[node->outputs[i]] = idx; // A tensor can only be written once.
35 } ccv_nnc_graph_visit_endfor} }
36}
37
38static ccv_nnc_symbolic_graph_simplify_t* _ccv_nnc_symbolic_graph_simplify_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
39{
40 ccv_nnc_symbolic_graph_simplify_t* const simplify = (ccv_nnc_symbolic_graph_simplify_t*)ccmallocmalloc(sizeof(ccv_nnc_symbolic_graph_simplify_t));
41 simplify->graph = graph;
42 simplify->visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0), graph->exec_symbol_info->rnum, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_symbol_info->rnum) - 1)); _visit_->size
= 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_symbol_info->
rnum); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_i_]
.outgoings->rnum : 0; const int _heap_mem_ = (graph->exec_symbol_info
->rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum) +
sizeof(int32_t) * ((graph->exec_symbol_info->rnum) * 2
+ _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (graph->exec_symbol_info
->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info->
rnum) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof
(ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum))
; int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (graph->
exec_symbol_info->rnum)), (int32_t*)(_incomings_ + (graph->
exec_symbol_info->rnum)) + (graph->exec_symbol_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_symbol_info
->rnum); for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == graph) ; else __assert_fail (
"(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size)
, 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0
) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r == 1) continue; _incomings_[_idx_].r = 1
; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)((
graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info
)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_
< ((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)((
graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info
)->rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++
) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_
[d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0
] = (source_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int
_bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[
_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
== 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_
[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; _exists_[
_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) =
(_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_
[0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0;
for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum == 1) { const int d = *(int*)((
void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((graph->exec_symbol_info)->data)) + (size_t)(
graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_]
.outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
3 && _d_ < (destination_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
3 && _d_ < (destination_size)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size
); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph ==
graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 4) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__
({ if (_visit_->size <= (graph->exec_symbol_info->
rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
43 simplify->tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_info_t) * graph->tensor_symbol_info->rnum);
44 simplify->exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_info_t) * graph->exec_symbol_info->rnum);
45 ccv_nnc_symbolic_graph_symbol_infer(graph, simplify->visit, sources, source_size, destinations, destination_size, 0, 0, simplify->tensor_symbol_info, simplify->exec_symbol_info);
46 simplify->tensor_symbol_info_size = graph->tensor_symbol_info->rnum;
47 simplify->exec_symbol_info_size = graph->exec_symbol_info->rnum;
48 simplify->exec_dead = cccalloccalloc(((simplify->exec_symbol_info_size + 31) >> 5) + ((simplify->tensor_symbol_info_size + 31) >> 5), sizeof(uint32_t));
49 simplify->tensor_dead = simplify->exec_dead + ((simplify->exec_symbol_info_size + 31) >> 5);
50 simplify->output_execs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
51 return simplify;
52}
53
54static void _ccv_nnc_symbolic_graph_simplify_apply(ccv_nnc_symbolic_graph_simplify_t* const simplify)
55{
56 int i, j;
57 for (i = 0; i < simplify->exec_symbol_info_size; i++)
58 if (simplify->exec_dead[i >> 5] & (1u << (i & 0x1f)))
59 ccv_nnc_graph_exec_symbol_free(simplify->graph, (ccv_nnc_graph_exec_symbol_t){
60 .d = i,
61 .graph = simplify->graph,
62 });
63 else // If it is not marked as dead, go through to unmark tensor
64 for (j = 0; j < simplify->exec_symbol_info[i].output_size; j++)
65 {
66 const int d = simplify->exec_symbol_info[i].outputs[j];
67 if (d >= 0)
68 simplify->tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
69 }
70 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
71 if (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f)))
72 ccv_nnc_tensor_symbol_free(simplify->graph, (ccv_nnc_tensor_symbol_t){
73 .d = i,
74 .graph = simplify->graph,
75 });
76 // autogen the sources / destinations.
77 ccv_nnc_graph_exec_symbol_autogen(simplify->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
78}
79
80static void _ccv_nnc_symbolic_graph_simplify_free(ccv_nnc_symbolic_graph_simplify_t* const simplify)
81{
82 ccv_nnc_graph_visit_free(simplify->visit);
83 ccfreefree(simplify->tensor_symbol_info);
84 ccfreefree(simplify->exec_symbol_info);
85 ccfreefree(simplify->exec_dead);
86 ccfreefree(simplify->output_execs);
87 ccfreefree(simplify);
88}
89
90typedef struct {
91 int d;
92 int ifbit;
93 uint64_t hash;
94} ccv_nnc_cse_hash_t;
95
96static int _ccv_nnc_cse_hash_find(ccv_nnc_cse_hash_t* const hash_map, const uint64_t hash, const int map_size)
97{
98 assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if (
hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c"
, 98, __extension__ __PRETTY_FUNCTION__); }))
;
99 int i, j;
100 i = (hash - 1) % map_size;
101 for (j = 0; ; j++, i++)
102 {
103 if (i >= map_size)
104 i = 0;
105 if (j > hash_map[i].ifbit)
106 return -1;
107 if (hash_map[i].hash == hash)
108 return hash_map[i].d;
109 }
110}
111
112static void _ccv_nnc_cse_hash_add(ccv_nnc_cse_hash_t* const hash_map, uint64_t hash, int d, const int map_size)
113{
114 assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if (
hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c"
, 114, __extension__ __PRETTY_FUNCTION__); }))
;
115 int i, j;
116 i = (hash - 1) % map_size;
117 for (j = 0; ; j++, i++)
118 {
119 if (i >= map_size)
120 i = 0;
121 if (hash_map[i].hash == hash) // Already exists, do nothing.
122 return;
123 if (hash_map[i].hash == 0)
124 {
125 // Insert.
126 hash_map[i].d = d;
127 hash_map[i].ifbit = j;
128 hash_map[i].hash = hash;
129 return;
130 }
131 if (j > hash_map[i].ifbit)
132 {
133 const ccv_nnc_cse_hash_t old_hash = hash_map[i];
134 // Swap, and continue, until find an empty slot.
135 hash_map[i].d = d;
136 hash_map[i].ifbit = j;
137 hash_map[i].hash = hash;
138 d = old_hash.d;
139 j = old_hash.ifbit;
140 hash = old_hash.hash;
141 }
142 }
143}
144
145static int _ccv_nnc_symbolic_graph_update_refs(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const int* const refs, const int output_exec_ref_dead)
146{
147 int i, j;
148 // Go over refs, if a tensor is an alias, mark it reference to the new one.
149 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
150 if (refs[i] >= 0)
151 // Mark this tensor as dead.
152 simplify->tensor_dead[i >> 5] |= (1u << (i & 0x1f));
153 else if (simplify->tensor_symbol_info[i].alias_ref && refs[simplify->tensor_symbol_info[i].alias_ref - 1] >= 0) {
154 const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1;
155 simplify->tensor_symbol_info[i].alias_ref = refs[alias_ref] + 1;
156 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->alias_ref = refs[alias_ref] + 1;
157 }
158 for (i = 0; i < output_size; i++)
159 // If the output is an alias, that's fine, because if the alias is re-binded, we are good.
160 simplify->tensor_dead[outputs[i].d >> 5] &= ~(1u << (outputs[i].d & 0x1f)); // Undead for output tensor symbols.
161 // Merge s_refs if the tensor is dead.
162 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
163 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))))
164 {
165 const int ref = refs[i];
166 if (simplify->tensor_symbol_info[i].s_ref && simplify->tensor_symbol_info[i].s_ref->rnum)
167 {
168 if (!simplify->tensor_symbol_info[ref].s_ref) // If there is no s_ref, simple, just assign the pointer and set the old one to nil.
169 {
170 simplify->tensor_symbol_info[ref].s_ref = simplify->tensor_symbol_info[i].s_ref;
171 simplify->tensor_symbol_info[i].s_ref = 0;
172 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->s_ref = 0;
173 } else {
174 ccv_array_t* const ref_s_ref = simplify->tensor_symbol_info[ref].s_ref;
175 ccv_array_t* const i_s_ref = simplify->tensor_symbol_info[i].s_ref;
176 const int ref_s_ref_rnum = ref_s_ref->rnum;
177 int flag = 0;
178 // Detect conflict, if there is, undead.
179 for (j = 0; !flag && j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref
->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; }
)
; j++)
180 {
181 const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
;
182 const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(j)))
;
183 // If for the same sub-graph, they have different tensors linked, we cannot merge these two.
184 flag = (ref_s_ref_k > 0 && i_s_ref_k > 0 && ref_s_ref_k != i_s_ref_k);
185 }
186 if (flag)
187 {
188 simplify->tensor_dead[i >> 5] &= ~(1u << (i & 0x1f)); // Undead
189 continue;
190 }
191 if (ref_s_ref_rnum < i_s_ref->rnum)
192 {
193 ccv_array_resize(ref_s_ref, i_s_ref->rnum);
194 memcpy(ccv_array_get(ref_s_ref, ref_s_ref_rnum)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(ref_s_ref_rnum)))
, ccv_array_get(i_s_ref, ref_s_ref_rnum)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(ref_s_ref_rnum)))
, sizeof(int) * (i_s_ref->rnum - ref_s_ref_rnum));
195 }
196 for (j = 0; j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref
->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; }
)
; j++)
197 {
198 const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
;
199 const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(j)))
;
200 assert(ref_s_ref_k == 0 || i_s_ref_k == 0)((void) sizeof ((ref_s_ref_k == 0 || i_s_ref_k == 0) ? 1 : 0)
, __extension__ ({ if (ref_s_ref_k == 0 || i_s_ref_k == 0) ; else
__assert_fail ("ref_s_ref_k == 0 || i_s_ref_k == 0", "ccv_nnc_symbolic_graph_simplify.c"
, 200, __extension__ __PRETTY_FUNCTION__); }))
;
201 if (i_s_ref_k)
202 *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
= i_s_ref_k;
203 }
204 ccv_array_free(simplify->tensor_symbol_info[i].s_ref);
205 simplify->tensor_symbol_info[i].s_ref = 0;
206 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->s_ref = 0;
207 for (j = 0; j < ref_s_ref->rnum; j++)
208 {
209 const int ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
- 1;
210 if (ref_k >= 0)
211 {
212 ccv_nnc_symbolic_graph_t* const sub_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(simplify->graph->sub_graphs, j)((void*)(((char*)((simplify->graph->sub_graphs)->data
)) + (size_t)(simplify->graph->sub_graphs)->rsize * (
size_t)(j)))
;
213 assert(sub_graph)((void) sizeof ((sub_graph) ? 1 : 0), __extension__ ({ if (sub_graph
) ; else __assert_fail ("sub_graph", "ccv_nnc_symbolic_graph_simplify.c"
, 213, __extension__ __PRETTY_FUNCTION__); }))
;
214 // Update its p_ref.
215 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, ref_k)((void*)(((char*)((sub_graph->tensor_symbol_info)->data
)) + (size_t)(sub_graph->tensor_symbol_info)->rsize * (
size_t)(ref_k)))
)->p_ref = ref + 1;
216 }
217 }
218 }
219 assert(simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[i].s_ref == (
(ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify->
graph->tensor_symbol_info)->data)) + (size_t)(simplify->
graph->tensor_symbol_info)->rsize * (size_t)(i))))->
s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info
[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*
)((simplify->graph->tensor_symbol_info)->data)) + (size_t
)(simplify->graph->tensor_symbol_info)->rsize * (size_t
)(i))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 219, __extension__ __PRETTY_FUNCTION__
); }))
;
220 assert(simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[ref].s_ref ==
((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify->
graph->tensor_symbol_info)->data)) + (size_t)(simplify->
graph->tensor_symbol_info)->rsize * (size_t)(ref))))->
s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info
[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char
*)((simplify->graph->tensor_symbol_info)->data)) + (
size_t)(simplify->graph->tensor_symbol_info)->rsize *
(size_t)(ref))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 220, __extension__ __PRETTY_FUNCTION__
); }))
;
221 }
222 }
223 // Going through refs that we are updating, going through its p_ref to make sure both are updated.
224 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
225 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))) && simplify->tensor_symbol_info[i].p_ref)
226 {
227 const int ref = refs[i];
228 const int p_ref = simplify->tensor_symbol_info[i].p_ref - 1;
229 assert(p_ref >= 0)((void) sizeof ((p_ref >= 0) ? 1 : 0), __extension__ ({ if
(p_ref >= 0) ; else __assert_fail ("p_ref >= 0", "ccv_nnc_symbolic_graph_simplify.c"
, 229, __extension__ __PRETTY_FUNCTION__); }))
;
230 assert(simplify->graph->p)((void) sizeof ((simplify->graph->p) ? 1 : 0), __extension__
({ if (simplify->graph->p) ; else __assert_fail ("simplify->graph->p"
, "ccv_nnc_symbolic_graph_simplify.c", 230, __extension__ __PRETTY_FUNCTION__
); }))
;
231 ccv_array_t* const s_ref = ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->p->tensor_symbol_info, p_ref)((void*)(((char*)((simplify->graph->p->tensor_symbol_info
)->data)) + (size_t)(simplify->graph->p->tensor_symbol_info
)->rsize * (size_t)(p_ref)))
)->s_ref;
232 const int s_idx = simplify->graph->p_idx - 1;
233 assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if
(s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_simplify.c"
, 233, __extension__ __PRETTY_FUNCTION__); }))
;
234 assert(s_ref && s_ref->rnum > s_idx)((void) sizeof ((s_ref && s_ref->rnum > s_idx) ?
1 : 0), __extension__ ({ if (s_ref && s_ref->rnum
> s_idx) ; else __assert_fail ("s_ref && s_ref->rnum > s_idx"
, "ccv_nnc_symbolic_graph_simplify.c", 234, __extension__ __PRETTY_FUNCTION__
); }))
;
235 *(int*)ccv_array_get(s_ref, s_idx)((void*)(((char*)((s_ref)->data)) + (size_t)(s_ref)->rsize
* (size_t)(s_idx)))
= ref + 1; // Update so it references to the new s_ref.
236 assert(!simplify->tensor_symbol_info[ref].p_ref)((void) sizeof ((!simplify->tensor_symbol_info[ref].p_ref)
? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[ref].p_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[ref].p_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 236, __extension__ __PRETTY_FUNCTION__
); }))
;
237 simplify->tensor_symbol_info[ref].p_ref = p_ref + 1;
238 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(ref)))
)->p_ref = p_ref + 1;
239 }
240 // Now go over exec to mark them as dead because we don't need these to generate refs.
241 if (output_exec_ref_dead)
242 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
243 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))))
244 {
245 const int output_exec = simplify->output_execs[i];
246 assert(output_exec >= 0)((void) sizeof ((output_exec >= 0) ? 1 : 0), __extension__
({ if (output_exec >= 0) ; else __assert_fail ("output_exec >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 246, __extension__ __PRETTY_FUNCTION__
); }))
;
247 const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = simplify->exec_symbol_info + output_exec;
248 int flag = 0;
249 for (j = 0; !flag && j < symbol_info->output_size; j++)
250 {
251 const int d = symbol_info->outputs[j];
252 if (d >= 0)
253 flag = (!(simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))); // If some of the output is not dead, we cannot proceed.
254 }
255 if (!flag) // If all outputs are dead, mark the exec as dead.
256 simplify->exec_dead[output_exec >> 5] |= (1u << (output_exec & 0x1f));
257 }
258 int updated_refs = 0;
259 // Go over replace inputs / outputs.
260 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
261 for (i = 0; i < node->input_size; i++)
262 {
263 const int d = node->inputs[i];
264 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
265 {
266 node->inputs[i] = refs[d]; // It can be replaced.
267 updated_refs = 1;
268 }
269 }
270 for (i = 0; i < node->output_size; i++)
271 {
272 const int d = node->outputs[i];
273 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
274 {
275 node->outputs[i] = refs[d]; // It can be replaced.
276 updated_refs = 1;
277 }
278 }
279 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(idx))))->inputs == node->inputs) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)
((simplify->graph->exec_symbol_info)->data)) + (size_t
)(simplify->graph->exec_symbol_info)->rsize * (size_t
)(idx))))->inputs == node->inputs) ; else __assert_fail
("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs"
, "ccv_nnc_symbolic_graph_simplify.c", 279, __extension__ __PRETTY_FUNCTION__
); }))
;
280 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(idx))))->outputs == node->outputs) ? 1 : 0)
, __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t*)((
void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx))))->outputs == node->outputs) ; else
__assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs"
, "ccv_nnc_symbolic_graph_simplify.c", 280, __extension__ __PRETTY_FUNCTION__
); }))
;
281 } ccv_nnc_graph_visit_endfor} }
282 const ccv_nnc_graph_exec_symbol_info_t* const p_node_info = simplify->graph->p ? (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->p->exec_symbol_info, simplify->graph->exec_idx - 1)((void*)(((char*)((simplify->graph->p->exec_symbol_info
)->data)) + (size_t)(simplify->graph->p->exec_symbol_info
)->rsize * (size_t)(simplify->graph->exec_idx - 1)))
: 0;
283 if (p_node_info && (p_node_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE))
284 // Go over the while inputs as well.
285 for (i = 0; i < p_node_info->p_while.input_size; i++)
286 {
287 const int d = p_node_info->p_while.inputs[i];
288 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
289 {
290 p_node_info->p_while.inputs[i] = refs[d];
291 updated_refs = 1;
292 }
293 }
294 return updated_refs;
295}
296
297// This is a simple common sub-expression elimination implementation, particularly, we only replace the later computed output
298// with the identical earlier computed output, and let the "elimination" part to the graph pruning.
299static void _ccv_nnc_symbolic_graph_common_subexpression_elimination(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
300{
301 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
302 // tensor_hash starts with 0s, and it is either marked with the tensor index + 1, or the hash of the computations.
303 uint64_t* const tensor_hash = (uint64_t*)cccalloccalloc(simplify->tensor_symbol_info_size, sizeof(uint64_t));
304 int i;
305 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
306 // We cannot support graph / custom command (we cannot model them properly).
307 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
308 node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
309 node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
310 node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
311 continue;
312 // If already marked as dead, skip.
313 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
314 continue;
315 uint64_t hashout, hashin[3];
316 siphash((uint8_t*)&hashin[0], (const uint8_t*)&node->cmd.info, sizeof(node->cmd.info), key_siphash);
317 siphash((uint8_t*)&hashin[1], (const uint8_t*)&node->hint, sizeof(node->hint), key_siphash);
318 hashin[2] = node->cmd.cmd; // Now actually hash the cmd name.
319 siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash);
320 // First, hash the cmd and the hints with the cmd.
321 // Note on alias, we cannot really generate proper hash for alias (yet). Thus, just treat alias as normal tensors.
322 for (i = 0; i < node->input_size; i++)
323 {
324 assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 324, __extension__ __PRETTY_FUNCTION__
); }))
;
325 // If no hash for the input, use the index + 1 as the hash.
326 if (node->inputs[i] >= 0 && tensor_hash[node->inputs[i]] == 0)
327 tensor_hash[node->inputs[i]] = node->inputs[i] + 1;
328 if (node->inputs[i] >= 0)
329 {
330 // Hash using the tensor hash.
331 hashin[0] = hashout;
332 hashin[1] = i; // Encode the positional information.
333 hashin[2] = tensor_hash[node->inputs[i]];
334 } else {
335 // Hash using the input integer (could be special integer).
336 hashin[0] = hashout;
337 hashin[1] = i; // Encode the positional information.
338 hashin[2] = node->inputs[i];
339 }
340 siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash);
341 }
342 for (i = 0; i < node->output_size; i++)
343 if (node->outputs[i] >= 0)
344 {
345 assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 345, __extension__ __PRETTY_FUNCTION__
); }))
;
346 // Assigning once, especially now we don't consider aliases.
347 assert(tensor_hash[node->outputs[i]] == 0)((void) sizeof ((tensor_hash[node->outputs[i]] == 0) ? 1 :
0), __extension__ ({ if (tensor_hash[node->outputs[i]] ==
0) ; else __assert_fail ("tensor_hash[node->outputs[i]] == 0"
, "ccv_nnc_symbolic_graph_simplify.c", 347, __extension__ __PRETTY_FUNCTION__
); }))
;
348 hashin[0] = hashout;
349 hashin[1] = i; // Positional information.
350 siphash((uint8_t*)&hashin[2], (const uint8_t*)&simplify->tensor_symbol_info[node->outputs[i]].info,
351 sizeof(simplify->tensor_symbol_info[node->outputs[i]].info), key_siphash);
352 // Generate hash for the output.
353 siphash((uint8_t*)&tensor_hash[node->outputs[i]], (const uint8_t*)hashin, sizeof(hashin), key_siphash);
354 }
355 } ccv_nnc_graph_visit_endfor} }
356 // Allocate 3 / 2 as much space, for the simple robin-hood open address hash map.
357 const int map_size = (simplify->tensor_symbol_info_size * 3 + 1) / 2;
358 int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
359 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
360 refs[i] = -1;
361 ccv_nnc_cse_hash_t* const hash_map = (ccv_nnc_cse_hash_t*)cccalloccalloc(map_size, sizeof(ccv_nnc_cse_hash_t));
362 // Now, all tensors are hashed, identify tensors with the same hash code, replace the ones that accessed later.
363 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
364 // If already marked as dead, skip.
365 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
366 continue;
367 for (i = 0; i < node->input_size; i++)
368 if (node->inputs[i] >= 0)
369 {
370 const int d = node->inputs[i];
371 assert(tensor_hash[d])((void) sizeof ((tensor_hash[d]) ? 1 : 0), __extension__ ({ if
(tensor_hash[d]) ; else __assert_fail ("tensor_hash[d]", "ccv_nnc_symbolic_graph_simplify.c"
, 371, __extension__ __PRETTY_FUNCTION__); }))
;
372 const int new_d = _ccv_nnc_cse_hash_find(hash_map, tensor_hash[d], map_size);
373 if (new_d >= 0 && new_d != d)
374 {
375 // Check whether this can be replaced.
376 assert(refs[d] == -1 || refs[d] == new_d)((void) sizeof ((refs[d] == -1 || refs[d] == new_d) ? 1 : 0),
__extension__ ({ if (refs[d] == -1 || refs[d] == new_d) ; else
__assert_fail ("refs[d] == -1 || refs[d] == new_d", "ccv_nnc_symbolic_graph_simplify.c"
, 376, __extension__ __PRETTY_FUNCTION__); }))
;
377 assert(!simplify->tensor_symbol_info[d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 377, __extension__ __PRETTY_FUNCTION__
); }))
;
378 assert(!simplify->tensor_symbol_info[d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].r_assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].r_assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 378, __extension__ __PRETTY_FUNCTION__
); }))
;
379 assert(!simplify->tensor_symbol_info[d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].bypass_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].bypass_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 379, __extension__ __PRETTY_FUNCTION__
); }))
;
380 assert(!simplify->tensor_symbol_info[new_d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 380, __extension__ __PRETTY_FUNCTION__
); }))
;
381 assert(!simplify->tensor_symbol_info[new_d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].r_assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].r_assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 381, __extension__ __PRETTY_FUNCTION__
); }))
;
382 assert(!simplify->tensor_symbol_info[new_d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].bypass_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].bypass_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 382, __extension__ __PRETTY_FUNCTION__
); }))
;
383 // Ignore if there is a pair_ref (again, pair_ref has side effect that is deeper (using tape))
384 if (simplify->tensor_symbol_info[d].pair_ref)
385 continue;
386 // If both have p_ref, we cannot merge.
387 if (simplify->tensor_symbol_info[d].p_ref && simplify->tensor_symbol_info[new_d].p_ref)
388 continue;
389 // Merge s_refs from ref[d] later.
390 if (refs[d] != new_d)
391 refs[d] = new_d;
392 assert(simplify->output_execs[new_d] >= 0)((void) sizeof ((simplify->output_execs[new_d] >= 0) ? 1
: 0), __extension__ ({ if (simplify->output_execs[new_d] >=
0) ; else __assert_fail ("simplify->output_execs[new_d] >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 392, __extension__ __PRETTY_FUNCTION__
); }))
;
393 // Establish new dependency.
394 ccv_nnc_graph_exec_symbol_concat(simplify->graph, (ccv_nnc_graph_exec_symbol_t){
395 .d = simplify->output_execs[new_d],
396 .graph = simplify->graph,
397 }, (ccv_nnc_graph_exec_symbol_t){
398 .d = idx,
399 .graph = simplify->graph,
400 });
401 }
402 }
403 // We can reuse the input, but we cannot do that for output of these commands.
404 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
405 node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
406 node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
407 node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
408 continue;
409 for (i = 0; i < node->output_size; i++)
410 if (node->outputs[i] >= 0) // This tensor can be reused by others.
411 _ccv_nnc_cse_hash_add(hash_map, tensor_hash[node->outputs[i]], node->outputs[i], map_size);
412 } ccv_nnc_graph_visit_endfor} }
413 _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 1 /* For these exec that generates refs, we don't need them any more. */);
414 ccfreefree(tensor_hash);
415 ccfreefree(hash_map);
416 ccfreefree(refs);
417}
418
419static void _ccv_nnc_symbolic_graph_data_transfer_opt(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
420{
421 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
422 uint32_t* const exec_dead = simplify->exec_dead;
423 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = simplify->tensor_symbol_info;
424 int i, j;
425 uint32_t* const has_alias = ccmallocmalloc(sizeof(uint32_t) * ((simplify->tensor_symbol_info_size + 31) >> 5));
426 int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
427 int updated_refs;
428 do {
429 memset(has_alias, 0, sizeof(uint32_t) * ((simplify->tensor_symbol_info_size + 31) >> 5));
430 // Go through until no updates is possible. This won't result an infinite loop because every time,
431 // a tensor is eliminated.
432 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
433 {
434 refs[i] = -1;
435 if (tensor_symbol_info[i].alias_ref)
436 {
437 const int alias_ref = tensor_symbol_info[i].alias_ref - 1;
438 has_alias[alias_ref >> 5] |= (1u << (alias_ref & 0x1f));
439 }
440 }
441 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
442 // If already marked as dead, skip.
443 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
444 continue;
445 if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD &&
446 node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD &&
447 // Conversion, if the datatype is the same, it is the data transfer op.
448 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD &&
449 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD &&
450 // Format transform, if the format is the same, it is the data transfer op.
451 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD &&
452 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD)
453 continue;
454 for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size.
455 if (node->inputs[i] >= 0 && node->outputs[i] >= 0)
456 {
457 assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 457, __extension__ __PRETTY_FUNCTION__
); }))
;
458 assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 458, __extension__ __PRETTY_FUNCTION__
); }))
;
459 const ccv_nnc_tensor_symbol_info_t* const input = tensor_symbol_info + node->inputs[i];
460 const ccv_nnc_tensor_symbol_info_t* const output = tensor_symbol_info + node->outputs[i];
461 // If they are not the same data type, skip. (Likely data conversion op).
462 if (input->info.datatype != output->info.datatype)
463 continue;
464 // If they are not the same format, skip. (Likely format transform op).
465 if (input->info.format != output->info.format)
466 continue;
467 // If they are not on the same device (even for NUMA), skip.
468 if (input->info.type != output->info.type)
469 continue;
470 // If both are alias, we cannot consolidate this.
471 if (input->alias_ref && output->alias_ref)
472 continue;
473 // If input is alias, and output has alias reference to it, output cannot be the same as input.
474 if (input->alias_ref && (has_alias[node->outputs[i] >> 5] & (1u << (node->outputs[i] & 0x1f))))
475 continue;
476 // If output is alias, and input has alias reference to it, input cannot be the same as output.
477 if (output->alias_ref && (has_alias[node->inputs[i] >> 5] & (1u << (node->inputs[i] & 0x1f))))
478 continue;
479 // If either are carry overs (for while), we cannot do anything.
480 if (input->assign_ref || output->assign_ref ||
481 input->r_assign_ref || output->r_assign_ref)
482 continue;
483 // If either are bypasses (for case..of), we cannot do anything.
484 if (input->bypass_ref || output->bypass_ref ||
485 input->r_bypass_ref || output->r_bypass_ref)
486 continue;
487 // If either are inputs / outputs connecting the parent graph, we cannot do anything.
488 if (input->p_ref || output->p_ref)
489 continue;
490 int flag = 0;
491 for (j = 0; !flag && j < bind_size; j++)
492 flag = (binds[j].d == node->inputs[i] || binds[j].d == node->outputs[i]);
493 for (j = 0; !flag && j < output_size; j++)
494 flag = (outputs[j].d == node->inputs[i] || outputs[j].d == node->outputs[i]);
495 // Either input or output cannot be in the outputs nor the binds.
496 if (flag)
497 continue;
498 // If the type is the same, check which one is the alias.
499 // We always prefer alias.
500 if (output->alias_ref)
501 refs[node->inputs[i]] = node->outputs[i];
502 else // if (input->alias_ref), else
503 refs[node->outputs[i]] = node->inputs[i];
504 }
505 } ccv_nnc_graph_visit_endfor} }
506 // Make sure refs reference to the end.
507 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
508 if (refs[i] >= 0)
509 {
510 int ref = refs[i];
511 while (refs[ref] >= 0)
512 ref = refs[i];
513 refs[i] = ref;
514 }
515 updated_refs = _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 0 /* We still need these exec that generates the refs. */);
516 } while (updated_refs);
517 ccfreefree(refs);
518 ccfreefree(has_alias);
519 // Now, all references updated, remove data transfers that sources and destinations are the same.
520 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
521 // If already marked as dead, skip.
522 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
523 continue;
524 if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD &&
525 node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD &&
526 // Conversion, if the datatype is the same, it is the data transfer op.
527 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD &&
528 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD &&
529 // Format transform, if the format is the same, it is the data transfer op.
530 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD &&
531 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD)
532 continue;
533 for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size.
534 if (node->inputs[i] == node->outputs[i])
535 {
536 if (i + 1 < node->output_size)
537 {
538 node->inputs[i] = node->inputs[i + 1];
539 node->outputs[i] = node->outputs[i + 1];
540 }
541 --node->output_size;
542 --i;
543 }
544 node->input_size = node->output_size;
545 ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
)->input_size = node->input_size;
546 ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
)->output_size = node->output_size;
547 // Remove this data transfer node if it has no outputs.
548 if (node->output_size == 0)
549 exec_dead[idx >> 5] |= (1u << (idx & 0x1f));
550 } ccv_nnc_graph_visit_endfor} }
551}
552
553typedef struct {
554 uint32_t fused_op; // The final fused op id.
555 uint32_t ops_seq[2]; // The sequence of commands to identify.
556 int ops_seq_size;
557 int ops_info_select; // Which ops' info will be selected.
558 struct {
559 int type; // Whether it is input, or output. It doesn't make sense for example, input in ops_seq, but output in fused_op.
560 int op_idx; // Index into the ops_seq.
561 int from; // The index in ops_seq.
562 int to; // The index in fused_op.
563 } pos[4]; // maps of positions from ops seq to fused_op for inputs (outputs).
564 int pos_size;
565} ccv_nnc_ops_fusion_t;
566
567enum {
568 CCV_NNC_OPS_FUSION_INPUT_INDEX,
569 CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
570};
571
572const static int ccv_nnc_ops_fusion_io_size = 2;
573const static ccv_nnc_ops_fusion_t ccv_nnc_ops_fusions[] = {
574 {
575 .fused_op = CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD,
576 .ops_seq = {
577 CCV_NNC_SOFTMAX_FORWARD, CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD,
578 },
579 .ops_seq_size = 2,
580 .ops_info_select = 1,
581 .pos = {
582 {
583 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
584 .op_idx = 0,
585 .from = 0,
586 .to = 0,
587 },
588 {
589 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
590 .op_idx = 1,
591 .from = 1,
592 .to = 1,
593 },
594 {
595 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
596 .op_idx = 0,
597 .from = 0,
598 .to = 1,
599 },
600 {
601 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
602 .op_idx = 1,
603 .from = 0,
604 .to = 0,
605 },
606 },
607 .pos_size = 4,
608 },
609 {
610 .fused_op = CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD,
611 .ops_seq = {
612 CCV_NNC_SIGMOID_FORWARD, CCV_NNC_BINARY_CROSSENTROPY_FORWARD,
613 },
614 .ops_seq_size = 2,
615 .ops_info_select = 1,
616 .pos = {
617 {
618 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
619 .op_idx = 0,
620 .from = 0,
621 .to = 0,
622 },
623 {
624 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
625 .op_idx = 1,
626 .from = 1,
627 .to = 1,
628 },
629 {
630 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
631 .op_idx = 0,
632 .from = 0,
633 .to = 1,
634 },
635 {
636 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
637 .op_idx = 1,
638 .from = 0,
639 .to = 0,
640 },
641 },
642 .pos_size = 4,
643 }
644};
645
646static int _ccv_nnc_find_ops_for_fusion(const ccv_nnc_ops_fusion_t* const fusion, const int ops_idx, const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const uint32_t* const exec_dead, const int exec_idx, int* const fusing_exec_symbols)
647{
648 if (exec_dead[exec_idx >> 5] & (1u << (exec_idx & 0x1f)))
649 return 0;
650 const ccv_nnc_graph_exec_symbol_info_t* const node = exec_symbol_info + exec_idx;
651 // Doesn't match the ops_seq, return 0.
652 if (fusion->ops_seq[ops_idx] != node->cmd.cmd)
653 return 0;
654 fusing_exec_symbols[ops_idx] = exec_idx;
655 // If already reached the end, we are good.
656 if (ops_idx == fusion->ops_seq_size - 1)
657 return 1;
658 // Otherwise, we need to go on, but don't have any to follow-up.
659 if (!node->outgoings || !node->outgoings->rnum)
660 return 0;
661 int i;
662 for (i = 0; i < node->outgoings->rnum; i++)
663 if (_ccv_nnc_find_ops_for_fusion(fusion, ops_idx + 1, exec_symbol_info, exec_dead, *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
, fusing_exec_symbols))
664 return 1;
665 return 0;
666}
667
668static void _ccv_nnc_symbolic_graph_ops_fusion(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
669{
670 uint32_t* const exec_dead = simplify->exec_dead;
671 ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info;
672 int i, j;
673 int fusing_exec_symbols[sizeof(ccv_nnc_ops_fusions->ops_seq)];
674 int fused_inputs[ccv_nnc_ops_fusion_io_size]; // 2 is just a number based on the ops_fusion.
675 int fused_outputs[ccv_nnc_ops_fusion_io_size];
676 ccv_nnc_graph_visit_for(simplify->visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(exec_symbol_info)) const node __attribute__((unused)) = (exec_symbol_info
) + idx;
{
677 // If already marked as dead, skip.
678 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
679 continue;
680 // Run through rudimentary pattern matching for ops_seq. There are better ways to do this (immediately come to mind, Boyer-Moore). However, this is simpler to code.
681 // If I am running into performance issues with this, I would be very happy.
682 for (i = 0; i < sizeof(ccv_nnc_ops_fusions) / sizeof(ccv_nnc_ops_fusion_t); i++)
683 {
684 const ccv_nnc_ops_fusion_t* const ops_fusion = ccv_nnc_ops_fusions + i;
685 // Check to see if a list of symbols are possible.
686 if (ops_fusion->ops_seq[0] == node->cmd.cmd &&
687 _ccv_nnc_find_ops_for_fusion(ops_fusion, 0, exec_symbol_info, exec_dead, idx, fusing_exec_symbols))
688 {
689 // Go through all the inputs and outputs, check if they exists and are mapped.
690 // TODO: the mapping can be more sophisticated than what we have here.
691 // Also, I need to check if some inputs / outputs cannot be mapped, then we cannot continue.
692 for (j = 0; j < ccv_nnc_ops_fusion_io_size; j++)
693 fused_inputs[j] = fused_outputs[j] = CCV_NNC_NO_TENSOR_SYMBOL;
694 int input_size = 0, output_size = 0;
695 for (j = 0; j < ops_fusion->pos_size; j++)
696 {
697 ccv_nnc_graph_exec_symbol_info_t* const fusing_op = exec_symbol_info + fusing_exec_symbols[ops_fusion->pos[j].op_idx];
698 switch (ops_fusion->pos[j].type)
699 {
700 case CCV_NNC_OPS_FUSION_INPUT_INDEX:
701 fused_inputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->input_size ? fusing_op->inputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL;
702 input_size = ccv_max(input_size, ops_fusion->pos[j].to + 1)({ typeof (input_size) _a = (input_size); typeof (ops_fusion->
pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a > _b
) ? _a : _b; })
;
703 break;
704 case CCV_NNC_OPS_FUSION_OUTPUT_INDEX:
705 fused_outputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->output_size ? fusing_op->outputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL;
706 output_size = ccv_max(output_size, ops_fusion->pos[j].to + 1)({ typeof (output_size) _a = (output_size); typeof (ops_fusion
->pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a >
_b) ? _a : _b; })
;
707 break;
708 }
709 }
710 const ccv_nnc_cmd_param_t info = exec_symbol_info[fusing_exec_symbols[ops_fusion->ops_info_select]].cmd.info;
711 // Modify the first node so it is the correct type and value. I need to look back to the actual graph to get the info.
712 ccv_nnc_graph_exec_symbol_info_t* const actual_node = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
;
713 actual_node->cmd.cmd = node->cmd.cmd = ops_fusion->fused_op;
714 actual_node->cmd.info = node->cmd.info = info;
715 if (node->input_size + node->output_size < input_size + output_size)
716 actual_node->inputs = node->inputs = node->inputs ? ccreallocrealloc(node->inputs, sizeof(int) * (input_size + output_size)) : ccmallocmalloc(sizeof(int) * (input_size + output_size));
717 actual_node->outputs = node->outputs = node->inputs + input_size;
718 actual_node->input_size = node->input_size = input_size;
719 actual_node->output_size = node->output_size = output_size;
720 memcpy(node->inputs, fused_inputs, sizeof(int) * input_size);
721 memcpy(node->outputs, fused_outputs, sizeof(int) * output_size);
722 // Afterwards, mark the rest as dead.
723 for (j = 1; j < ops_fusion->ops_seq_size; j++)
724 exec_dead[fusing_exec_symbols[j] >> 5] |= (1u << (fusing_exec_symbols[j] & 0x1f));
725 break;
726 }
727 }
728 } ccv_nnc_graph_visit_endfor} }
729}
730
731static void _ccv_nnc_symbolic_graph_pruning_undead_exec(ccv_nnc_symbolic_graph_simplify_t* const simplify, const int exec_idx, uint32_t* const tensor_visited, ccv_array_t* const next)
732{
733 assert(exec_idx >= 0)((void) sizeof ((exec_idx >= 0) ? 1 : 0), __extension__ ({
if (exec_idx >= 0) ; else __assert_fail ("exec_idx >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 733, __extension__ __PRETTY_FUNCTION__
); }))
;
25
Taking true branch
734 uint32_t* const exec_dead = simplify->exec_dead;
735 uint32_t* const tensor_dead = simplify->tensor_dead;
736 exec_dead[exec_idx >> 5] &= ~(1u << (exec_idx & 0x1f)); // Undead the exec.
737 ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info + exec_idx;
738 int i;
739 if (exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
26
Assuming field 'cmd' is not equal to CCV_NNC_GRAPH_FORWARD
30
Taking false branch
740 exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
27
Assuming field 'cmd' is not equal to CCV_NNC_GRAPH_BACKWARD
741 exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
28
Assuming field 'cmd' is not equal to CCV_NNC_CUSTOM_FORWARD
742 exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
29
Assuming field 'cmd' is not equal to CCV_NNC_CUSTOM_BACKWARD
743 {
744 // All of its inputs / outputs need to be undead for these commands.
745 for (i = 0; i < exec_symbol_info->input_size; i++)
746 {
747 const int d = exec_symbol_info->inputs[i];
748 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
749 {
750 ccv_array_push(next, &d); // Push to the next round to be undead.
751 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
752 }
753 }
754 for (i = 0; i < exec_symbol_info->output_size; i++)
755 {
756 const int d = exec_symbol_info->outputs[i];
757 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
758 {
759 ccv_array_push(next, &d); // Push to the next round to be undead.
760 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
761 }
762 }
763 return;
764 }
765 // Go through the input / output, to make sure that all of them can be available.
766 const int input_bitmask_size = (exec_symbol_info->input_size + 63) >> 6;
767 const int output_bitmask_size = (exec_symbol_info->output_size + 63) >> 6;
768 uint64_t input_bitmasks[ccv_max(1, input_bitmask_size)({ typeof (1) _a = (1); typeof (input_bitmask_size) _b = (input_bitmask_size
); (_a > _b) ? _a : _b; })
];
31
Assuming '_a' is > '_b'
32
'?' condition is true
769 for (i = 0; i
32.1
'i' is >= 'input_bitmask_size'
< input_bitmask_size; i++)
33
Loop condition is false. Execution continues on line 771
770 input_bitmasks[i] = 0;
771 uint64_t output_bitmasks[ccv_max(1, output_bitmask_size)({ typeof (1) _a = (1); typeof (output_bitmask_size) _b = (output_bitmask_size
); (_a > _b) ? _a : _b; })
];
34
Assuming '_a' is > '_b'
35
'?' condition is true
772 for (i = 0; i
35.1
'i' is >= 'output_bitmask_size'
< output_bitmask_size; i++)
36
Loop condition is false. Execution continues on line 774
773 output_bitmasks[i] = 0;
774 for (i = 0; i < exec_symbol_info->input_size; i++)
37
Assuming 'i' is < field 'input_size'
38
Loop condition is true. Entering loop body
775 if (exec_symbol_info->inputs[i] >= 0)
39
Assuming the condition is true
40
Taking true branch
776 input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
41
The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage
777 for (i = 0; i < exec_symbol_info->output_size; i++)
778 if (exec_symbol_info->outputs[i] >= 0)
779 output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
780 // First, mark everything with bitmasks, and verify it works.
781 assert(ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))((void) sizeof ((ccv_nnc_cmd_bitmask(exec_symbol_info->cmd
, exec_symbol_info->input_size, exec_symbol_info->output_size
, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size
)) ? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_bitmask(exec_symbol_info
->cmd, exec_symbol_info->input_size, exec_symbol_info->
output_size, input_bitmasks, input_bitmask_size, output_bitmasks
, output_bitmask_size)) ; else __assert_fail ("ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)"
, "ccv_nnc_symbolic_graph_simplify.c", 781, __extension__ __PRETTY_FUNCTION__
); }))
;
782 int flag;
783 do {
784 flag = 0;
785 // Try to eliminate one at a time. Go over output first.
786 for (i = 0; i < exec_symbol_info->output_size; i++)
787 {
788 const int d = exec_symbol_info->outputs[i];
789 // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all.
790 if (d >= 0 && (tensor_dead[d >> 5] & (1u << (d & 0x1f))) &&
791 (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))))
792 {
793 output_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
794 if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))
795 flag = 1;
796 else // Reset the bitmask.
797 output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
798 }
799 }
800 // For inputs, no matter if it s dead or not, we try to limit our input to the smallest number.
801 for (i = 0; i < exec_symbol_info->input_size; i++)
802 {
803 const int d = exec_symbol_info->inputs[i];
804 // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all.
805 if (d >= 0 && (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))))
806 {
807 input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
808 if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))
809 flag = 1;
810 else // Reset the bitmask.
811 input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
812 }
813 }
814 } while (flag);
815 // Now we know which one to keep, which one to undead.
816 for (i = 0; i < exec_symbol_info->input_size; i++)
817 if (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
818 {
819 const int d = exec_symbol_info->inputs[i];
820 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
821 {
822 ccv_array_push(next, &d); // Push to the next round to be undead.
823 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
824 }
825 } else {
826 // Clean up the inputs.
827 exec_symbol_info->inputs[i] = CCV_NNC_NO_TENSOR_SYMBOL;
828 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(exec_idx))))->inputs == exec_symbol_info->inputs
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((simplify->graph->exec_symbol_info)
->data)) + (size_t)(simplify->graph->exec_symbol_info
)->rsize * (size_t)(exec_idx))))->inputs == exec_symbol_info
->inputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs"
, "ccv_nnc_symbolic_graph_simplify.c", 828, __extension__ __PRETTY_FUNCTION__
); }))
;
829 }
830 for (i = 0; i < exec_symbol_info->output_size; i++)
831 if (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
832 {
833 const int d = exec_symbol_info->outputs[i];
834 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
835 {
836 ccv_array_push(next, &d); // Push to the next round to be undead.
837 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
838 }
839 } else {
840 // Clean up the outputs.
841 exec_symbol_info->outputs[i] = CCV_NNC_NO_TENSOR_SYMBOL;
842 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(exec_idx))))->outputs == exec_symbol_info->
outputs) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((simplify->graph->exec_symbol_info)
->data)) + (size_t)(simplify->graph->exec_symbol_info
)->rsize * (size_t)(exec_idx))))->outputs == exec_symbol_info
->outputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs"
, "ccv_nnc_symbolic_graph_simplify.c", 842, __extension__ __PRETTY_FUNCTION__
); }))
;
843 }
844}
845
846static void _ccv_nnc_symbolic_graph_pruning(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
847{
848 uint32_t* const tensor_visited = (uint32_t*)cccalloccalloc(sizeof(uint32_t), (simplify->tensor_symbol_info_size + 31) >> 5);
849 ccv_array_t* const preserve[2] = {
850 ccv_array_new(sizeof(int), output_size, 0),
851 ccv_array_new(sizeof(int), 0, 0),
852 };
853 int i, j;
854 ccv_array_t** const r_alias_refs = (ccv_array_t**)cccalloccalloc(sizeof(ccv_array_t*), simplify->tensor_symbol_info_size);
855 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
5
Assuming 'i' is >= field 'tensor_symbol_info_size'
6
Loop condition is false. Execution continues on line 864
856 if (simplify->tensor_symbol_info[i].alias_ref)
857 {
858 const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1;
859 assert(alias_ref < simplify->tensor_symbol_info_size)((void) sizeof ((alias_ref < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (alias_ref < simplify->
tensor_symbol_info_size) ; else __assert_fail ("alias_ref < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 859, __extension__ __PRETTY_FUNCTION__
); }))
;
860 if (!r_alias_refs[alias_ref])
861 r_alias_refs[alias_ref] = ccv_array_new(sizeof(int), 1, 0);
862 ccv_array_push(r_alias_refs[alias_ref], &i);
863 }
864 uint32_t* const exec_dead = simplify->exec_dead;
865 uint32_t* const tensor_dead = simplify->tensor_dead;
866 int* const output_execs = simplify->output_execs;
867 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
868 // Mark everything visited as dead.
869 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
7
Loop condition is false. Execution continues on line 885
870 exec_dead[idx >> 5] |= (1u << (idx & 0x1f));
871 for (i = 0; i < node->input_size; i++)
872 {
873 const int d = node->inputs[i];
874 if (d >= 0)
875 tensor_dead[d >> 5] |= (1u << (d & 0x1f));
876 }
877 for (i = 0; i < node->output_size; i++)
878 {
879 const int d = node->outputs[i];
880 if (d >= 0)
881 tensor_dead[d >> 5] |= (1u << (d & 0x1f));
882 }
883 } ccv_nnc_graph_visit_endfor} }
884 // If the tensor symbol is used by other exec that is not visited, unmark it.
885 for (i = 0; i < simplify->exec_symbol_info_size; i++)
8
Assuming 'i' is >= field 'exec_symbol_info_size'
9
Loop condition is false. Execution continues on line 905
886 {
887 if (exec_dead[i >> 5] & (1u << (i & 0x1f)))
888 continue;
889 const ccv_nnc_graph_exec_symbol_info_t* const node = simplify->exec_symbol_info + i;
890 for (j = 0; j < node->input_size; j++)
891 {
892 const int d = node->inputs[j];
893 // Undead it.
894 if (d >= 0)
895 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
896 }
897 for (j = 0; j < node->output_size; j++)
898 {
899 const int d = node->outputs[j];
900 // Undead it.
901 if (d >= 0)
902 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
903 }
904 }
905 for (i = 0; i < output_size; i++)
10
Assuming 'i' is >= 'output_size'
11
Loop condition is false. Execution continues on line 907
906 ccv_array_push(preserve[0], &outputs[i].d);
907 int p = 0, q = 1;
908 // BFS to mark execs / tensors as not dead.
909 while (preserve[p]->rnum > 0)
12
Assuming field 'rnum' is > 0
13
Loop condition is true. Entering loop body
910 {
911 ccv_array_clear(preserve[q]);
912 // First, undead all relevant tensors.
913 for (i = 0; i
13.1
'i' is < field 'rnum'
< preserve[p]->rnum
; i++)
14
Loop condition is true. Entering loop body
19
Assuming 'i' is >= field 'rnum'
20
Loop condition is false. Execution continues on line 933
914 {
915 const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve
[p])->rsize * (size_t)(i)))
;
916 // Undead the outputs.
917 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
918 int alias_ref = d;
919 if (simplify->tensor_symbol_info[d].alias_ref)
15
Assuming field 'alias_ref' is 0
16
Taking false branch
920 {
921 alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1;
922 tensor_dead[alias_ref >> 5] &= ~(1u << (alias_ref & 0x1f));
923 assert(r_alias_refs[alias_ref])((void) sizeof ((r_alias_refs[alias_ref]) ? 1 : 0), __extension__
({ if (r_alias_refs[alias_ref]) ; else __assert_fail ("r_alias_refs[alias_ref]"
, "ccv_nnc_symbolic_graph_simplify.c", 923, __extension__ __PRETTY_FUNCTION__
); }))
;
924 }
925 if (r_alias_refs[alias_ref])
17
Assuming the condition is false
18
Taking false branch
926 for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++)
927 {
928 const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t
)(r_alias_refs[alias_ref])->rsize * (size_t)(j)))
;
929 if (output_execs[b] >= 0) // Only revive if it is written alias.
930 tensor_dead[b >> 5] &= ~(1u << (b & 0x1f));
931 }
932 }
933 for (i = 0; i < preserve[p]->rnum; i++)
21
Loop condition is true. Entering loop body
934 {
935 const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve
[p])->rsize * (size_t)(i)))
;
936 const int output_exec = output_execs[d];
937 // Undead the exec.
938 if (output_exec >= 0)
22
Assuming 'output_exec' is >= 0
23
Taking true branch
939 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
24
Calling '_ccv_nnc_symbolic_graph_pruning_undead_exec'
940 int alias_ref = d;
941 if (simplify->tensor_symbol_info[d].alias_ref)
942 {
943 alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1;
944 const int output_exec = output_execs[alias_ref];
945 if (output_exec >= 0)
946 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
947 }
948 if (r_alias_refs[alias_ref])
949 for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++)
950 {
951 const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t
)(r_alias_refs[alias_ref])->rsize * (size_t)(j)))
;
952 const int output_exec = output_execs[b];
953 if (output_exec >= 0)
954 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
955 }
956 }
957 CCV_SWAP(p, q, i)((i) = (p), (p) = (q), (q) = (i));
958 }
959 ccfreefree(tensor_visited);
960 ccv_array_free(preserve[0]);
961 ccv_array_free(preserve[1]);
962 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
963 if (r_alias_refs[i])
964 ccv_array_free(r_alias_refs[i]);
965 ccfreefree(r_alias_refs);
966}
967
968void ccv_nnc_symbolic_graph_simplify(ccv_nnc_symbolic_graph_t* const graph, const int* const passes, const int pass_size, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
969{
970 ccv_nnc_symbolic_graph_simplify_t* const simplify = _ccv_nnc_symbolic_graph_simplify_new(graph, sources, source_size, destinations, destination_size);
971 int i;
972 for (i = 0; i < pass_size; i++)
1
Assuming 'i' is < 'pass_size'
2
Loop condition is true. Entering loop body
973 switch (passes[i])
3
Control jumps to 'case CCV_NNC_SIMPLIFY_GRAPH_PRUNING:' at line 981
974 {
975 case CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION:
976 _ccv_nnc_symbolic_graph_common_subexpression_elimination(simplify, outputs, output_size);
977 break;
978 case CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT:
979 _ccv_nnc_symbolic_graph_data_transfer_opt(simplify, binds, bind_size, outputs, output_size);
980 break;
981 case CCV_NNC_SIMPLIFY_GRAPH_PRUNING:
982 _ccv_nnc_symbolic_graph_pruning(simplify, outputs, output_size);
4
Calling '_ccv_nnc_symbolic_graph_pruning'
983 break;
984 case CCV_NNC_SIMPLIFY_OPS_FUSION:
985 _ccv_nnc_symbolic_graph_ops_fusion(simplify, outputs, output_size);
986 break;
987 }
988 _ccv_nnc_symbolic_graph_simplify_apply(simplify);
989 _ccv_nnc_symbolic_graph_simplify_free(simplify);
990}