Bug Summary

File:nnc/ccv_nnc_symbolic_graph_simplify.c
Warning:line 800, column 27
The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_symbolic_graph_simplify.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-10-29-232120-439362-1 -x c ccv_nnc_symbolic_graph_simplify.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_symbolic_graph.h"
6#include "3rdparty/siphash/siphash24.h"
7
8// MARK - Level-3.5 API
9
10static uint8_t key_siphash[16] = "graphcsekvlibnnc";
11
12typedef struct {
13 int tensor_symbol_info_size;
14 int exec_symbol_info_size;
15 ccv_nnc_symbolic_graph_t* graph;
16 ccv_nnc_graph_visit_t* visit;
17 ccv_nnc_tensor_symbol_info_t* tensor_symbol_info;
18 ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info;
19 uint32_t* exec_dead; // Mark a exec is dead and need to be cleared, each bit represent a exec.
20 uint32_t* tensor_dead; // Mark a tensor is dead and need to be cleared, each bit represent a tensor.
21 int* output_execs; // Mapping from tensor to the exec that generates this tensor.
22} ccv_nnc_symbolic_graph_simplify_t;
23
24static void _ccv_nnc_symbolic_graph_simplify_update_output_execs(ccv_nnc_symbolic_graph_simplify_t* const simplify)
25{
26 int i;
27 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
28 simplify->output_execs[i] = -1;
29 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
30 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
31 continue;
32 for (i = 0; i < node->output_size; i++)
33 if (node->outputs[i] >= 0)
34 simplify->output_execs[node->outputs[i]] = idx; // A tensor can only be written once.
35 } ccv_nnc_graph_visit_endfor} }
36}
37
38static ccv_nnc_symbolic_graph_simplify_t* _ccv_nnc_symbolic_graph_simplify_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
39{
40 ccv_nnc_symbolic_graph_simplify_t* const simplify = (ccv_nnc_symbolic_graph_simplify_t*)ccmallocmalloc(sizeof(ccv_nnc_symbolic_graph_simplify_t));
41 simplify->graph = graph;
42 simplify->visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0), graph->exec_symbol_info->rnum, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_symbol_info->rnum) - 1)); _visit_->size
= 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_symbol_info->
rnum); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_i_]
.outgoings->rnum : 0; const int _heap_mem_ = ((graph->exec_symbol_info
->rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t*
_incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t
*)malloc(sizeof(ccv_nnc_incoming_t) * (graph->exec_symbol_info
->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info->
rnum) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (graph->exec_symbol_info
->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info->
rnum) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof
(ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum))
; int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (graph->
exec_symbol_info->rnum)), (int32_t*)(_incomings_ + (graph->
exec_symbol_info->rnum)) + (graph->exec_symbol_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_symbol_info
->rnum); for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == graph) ; else __assert_fail (
"(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(sources
)[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_
[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*
)(((char*)((graph->exec_symbol_info)->data)) + (size_t)
(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_
[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r =
1; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_symbol_info
->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] <
(graph->exec_symbol_info->rnum)) ; else __assert_fail (
"_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (
(sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*
)(((char*)((graph->exec_symbol_info)->data)) + (size_t)
(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_
[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (graph->exec_symbol_info->rnum))
? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations
)[_i_].d].r = 5; _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_
[0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0;
for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue
; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_
[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((
_exist_size_[_q_] < (graph->exec_symbol_info->rnum))
? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations
)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++
) { ((void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0),
__extension__ ({ if ((sources)[_i_].graph == graph) ; else __assert_fail
("(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (
source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_
]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_
->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++
_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_
[_idx_].r = 7; } if (((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings->rnum == 1) { const int d = *(int*)((void*)(((
char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)
((graph->exec_symbol_info)->data)) + (size_t)(graph->
exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t*)(
(void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { _exists_[_p_][_i_] = d; continue
; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
6 && _d_ < (destination_size)) { ((void) sizeof (
(_exist_size_[_q_] < (graph->exec_symbol_info->rnum)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0),
__extension__ ({ if ((destinations)[_i_].graph == graph) ; else
__assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_
[(destinations)[_i_].d].r == 7) continue; if (!(0)) { ((void)
sizeof ((_incomings_[(destinations)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(destinations)[_i_].d].c ==
0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__
({ if (_visit_->size <= (graph->exec_symbol_info->
rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
43 simplify->tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_info_t) * graph->tensor_symbol_info->rnum);
44 simplify->exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_info_t) * graph->exec_symbol_info->rnum);
45 ccv_nnc_symbolic_graph_symbol_infer(graph, simplify->visit, sources, source_size, destinations, destination_size, 0, 0, simplify->tensor_symbol_info, simplify->exec_symbol_info);
46 simplify->tensor_symbol_info_size = graph->tensor_symbol_info->rnum;
47 simplify->exec_symbol_info_size = graph->exec_symbol_info->rnum;
48 simplify->exec_dead = cccalloccalloc(((simplify->exec_symbol_info_size + 31) >> 5) + ((simplify->tensor_symbol_info_size + 31) >> 5), sizeof(uint32_t));
49 simplify->tensor_dead = simplify->exec_dead + ((simplify->exec_symbol_info_size + 31) >> 5);
50 simplify->output_execs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
51 return simplify;
52}
53
54static void _ccv_nnc_symbolic_graph_simplify_apply(ccv_nnc_symbolic_graph_simplify_t* const simplify)
55{
56 int i, j;
57 for (i = 0; i < simplify->exec_symbol_info_size; i++)
58 if (simplify->exec_dead[i >> 5] & (1u << (i & 0x1f)))
59 ccv_nnc_graph_exec_symbol_free(simplify->graph, (ccv_nnc_graph_exec_symbol_t){
60 .d = i,
61 .graph = simplify->graph,
62 });
63 else // If it is not marked as dead, go through to unmark tensor
64 for (j = 0; j < simplify->exec_symbol_info[i].output_size; j++)
65 {
66 const int d = simplify->exec_symbol_info[i].outputs[j];
67 if (d >= 0)
68 simplify->tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
69 }
70 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
71 if (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f)))
72 ccv_nnc_tensor_symbol_free(simplify->graph, (ccv_nnc_tensor_symbol_t){
73 .d = i,
74 .graph = simplify->graph,
75 });
76}
77
78static void _ccv_nnc_symbolic_graph_simplify_free(ccv_nnc_symbolic_graph_simplify_t* const simplify)
79{
80 ccv_nnc_graph_visit_free(simplify->visit);
81 ccfreefree(simplify->tensor_symbol_info);
82 ccfreefree(simplify->exec_symbol_info);
83 ccfreefree(simplify->exec_dead);
84 ccfreefree(simplify->output_execs);
85 ccfreefree(simplify);
86}
87
88typedef struct {
89 int d;
90 int ifbit;
91 uint64_t hash;
92} ccv_nnc_cse_hash_t;
93
94static int _ccv_nnc_cse_hash_find(ccv_nnc_cse_hash_t* const hash_map, const uint64_t hash, const int map_size)
95{
96 assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if (
hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c"
, 96, __extension__ __PRETTY_FUNCTION__); }))
;
97 int i, j;
98 i = (hash - 1) % map_size;
99 for (j = 0; ; j++, i++)
100 {
101 if (i >= map_size)
102 i = 0;
103 if (j > hash_map[i].ifbit)
104 return -1;
105 if (hash_map[i].hash == hash)
106 return hash_map[i].d;
107 }
108}
109
110static void _ccv_nnc_cse_hash_add(ccv_nnc_cse_hash_t* const hash_map, uint64_t hash, int d, const int map_size)
111{
112 assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if (
hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c"
, 112, __extension__ __PRETTY_FUNCTION__); }))
;
113 int i, j;
114 i = (hash - 1) % map_size;
115 for (j = 0; ; j++, i++)
116 {
117 if (i >= map_size)
118 i = 0;
119 if (hash_map[i].hash == hash) // Already exists, do nothing.
120 return;
121 if (hash_map[i].hash == 0)
122 {
123 // Insert.
124 hash_map[i].d = d;
125 hash_map[i].ifbit = j;
126 hash_map[i].hash = hash;
127 return;
128 }
129 if (j > hash_map[i].ifbit)
130 {
131 const ccv_nnc_cse_hash_t old_hash = hash_map[i];
132 // Swap, and continue, until find an empty slot.
133 hash_map[i].d = d;
134 hash_map[i].ifbit = j;
135 hash_map[i].hash = hash;
136 d = old_hash.d;
137 j = old_hash.ifbit;
138 hash = old_hash.hash;
139 }
140 }
141}
142
143static int _ccv_nnc_symbolic_graph_update_refs(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const int* const refs, const int output_exec_ref_dead)
144{
145 int i, j;
146 // Go over refs, if a tensor is an alias, mark it reference to the new one.
147 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
148 if (refs[i] >= 0)
149 // Mark this tensor as dead.
150 simplify->tensor_dead[i >> 5] |= (1u << (i & 0x1f));
151 else if (simplify->tensor_symbol_info[i].alias_ref && refs[simplify->tensor_symbol_info[i].alias_ref - 1] >= 0) {
152 const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1;
153 simplify->tensor_symbol_info[i].alias_ref = refs[alias_ref] + 1;
154 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->alias_ref = refs[alias_ref] + 1;
155 }
156 for (i = 0; i < output_size; i++)
157 // If the output is an alias, that's fine, because if the alias is re-binded, we are good.
158 simplify->tensor_dead[outputs[i].d >> 5] &= ~(1u << (outputs[i].d & 0x1f)); // Undead for output tensor symbols.
159 // Merge s_refs if the tensor is dead.
160 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
161 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))))
162 {
163 const int ref = refs[i];
164 if (simplify->tensor_symbol_info[i].s_ref && simplify->tensor_symbol_info[i].s_ref->rnum)
165 {
166 if (!simplify->tensor_symbol_info[ref].s_ref) // If there is no s_ref, simple, just assign the pointer and set the old one to nil.
167 {
168 simplify->tensor_symbol_info[ref].s_ref = simplify->tensor_symbol_info[i].s_ref;
169 simplify->tensor_symbol_info[i].s_ref = 0;
170 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->s_ref = 0;
171 } else {
172 ccv_array_t* const ref_s_ref = simplify->tensor_symbol_info[ref].s_ref;
173 ccv_array_t* const i_s_ref = simplify->tensor_symbol_info[i].s_ref;
174 const int ref_s_ref_rnum = ref_s_ref->rnum;
175 int flag = 0;
176 // Detect conflict, if there is, undead.
177 for (j = 0; !flag && j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref
->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; }
)
; j++)
178 {
179 const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
;
180 const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(j)))
;
181 // If for the same sub-graph, they have different tensors linked, we cannot merge these two.
182 flag = (ref_s_ref_k > 0 && i_s_ref_k > 0 && ref_s_ref_k != i_s_ref_k);
183 }
184 if (flag)
185 {
186 simplify->tensor_dead[i >> 5] &= ~(1u << (i & 0x1f)); // Undead
187 continue;
188 }
189 if (ref_s_ref_rnum < i_s_ref->rnum)
190 {
191 ccv_array_resize(ref_s_ref, i_s_ref->rnum);
192 memcpy(ccv_array_get(ref_s_ref, ref_s_ref_rnum)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(ref_s_ref_rnum)))
, ccv_array_get(i_s_ref, ref_s_ref_rnum)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(ref_s_ref_rnum)))
, sizeof(int) * (i_s_ref->rnum - ref_s_ref_rnum));
193 }
194 for (j = 0; j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref
->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; }
)
; j++)
195 {
196 const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
;
197 const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(j)))
;
198 assert(ref_s_ref_k == 0 || i_s_ref_k == 0)((void) sizeof ((ref_s_ref_k == 0 || i_s_ref_k == 0) ? 1 : 0)
, __extension__ ({ if (ref_s_ref_k == 0 || i_s_ref_k == 0) ; else
__assert_fail ("ref_s_ref_k == 0 || i_s_ref_k == 0", "ccv_nnc_symbolic_graph_simplify.c"
, 198, __extension__ __PRETTY_FUNCTION__); }))
;
199 if (i_s_ref_k)
200 *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
= i_s_ref_k;
201 }
202 ccv_array_free(simplify->tensor_symbol_info[i].s_ref);
203 simplify->tensor_symbol_info[i].s_ref = 0;
204 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->s_ref = 0;
205 for (j = 0; j < ref_s_ref->rnum; j++)
206 {
207 const int ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
- 1;
208 if (ref_k >= 0)
209 {
210 ccv_nnc_symbolic_graph_t* const sub_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(simplify->graph->sub_graphs, j)((void*)(((char*)((simplify->graph->sub_graphs)->data
)) + (size_t)(simplify->graph->sub_graphs)->rsize * (
size_t)(j)))
;
211 assert(sub_graph)((void) sizeof ((sub_graph) ? 1 : 0), __extension__ ({ if (sub_graph
) ; else __assert_fail ("sub_graph", "ccv_nnc_symbolic_graph_simplify.c"
, 211, __extension__ __PRETTY_FUNCTION__); }))
;
212 // Update its p_ref.
213 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, ref_k)((void*)(((char*)((sub_graph->tensor_symbol_info)->data
)) + (size_t)(sub_graph->tensor_symbol_info)->rsize * (
size_t)(ref_k)))
)->p_ref = ref + 1;
214 }
215 }
216 }
217 assert(simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[i].s_ref == (
(ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify->
graph->tensor_symbol_info)->data)) + (size_t)(simplify->
graph->tensor_symbol_info)->rsize * (size_t)(i))))->
s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info
[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*
)((simplify->graph->tensor_symbol_info)->data)) + (size_t
)(simplify->graph->tensor_symbol_info)->rsize * (size_t
)(i))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 217, __extension__ __PRETTY_FUNCTION__
); }))
;
218 assert(simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[ref].s_ref ==
((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify->
graph->tensor_symbol_info)->data)) + (size_t)(simplify->
graph->tensor_symbol_info)->rsize * (size_t)(ref))))->
s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info
[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char
*)((simplify->graph->tensor_symbol_info)->data)) + (
size_t)(simplify->graph->tensor_symbol_info)->rsize *
(size_t)(ref))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 218, __extension__ __PRETTY_FUNCTION__
); }))
;
219 }
220 }
221 // Going through refs that we are updating, going through its p_ref to make sure both are updated.
222 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
223 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))) && simplify->tensor_symbol_info[i].p_ref)
224 {
225 const int ref = refs[i];
226 const int p_ref = simplify->tensor_symbol_info[i].p_ref - 1;
227 assert(p_ref >= 0)((void) sizeof ((p_ref >= 0) ? 1 : 0), __extension__ ({ if
(p_ref >= 0) ; else __assert_fail ("p_ref >= 0", "ccv_nnc_symbolic_graph_simplify.c"
, 227, __extension__ __PRETTY_FUNCTION__); }))
;
228 assert(simplify->graph->p)((void) sizeof ((simplify->graph->p) ? 1 : 0), __extension__
({ if (simplify->graph->p) ; else __assert_fail ("simplify->graph->p"
, "ccv_nnc_symbolic_graph_simplify.c", 228, __extension__ __PRETTY_FUNCTION__
); }))
;
229 ccv_array_t* const s_ref = ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->p->tensor_symbol_info, p_ref)((void*)(((char*)((simplify->graph->p->tensor_symbol_info
)->data)) + (size_t)(simplify->graph->p->tensor_symbol_info
)->rsize * (size_t)(p_ref)))
)->s_ref;
230 const int s_idx = simplify->graph->p_idx - 1;
231 assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if
(s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_simplify.c"
, 231, __extension__ __PRETTY_FUNCTION__); }))
;
232 assert(s_ref && s_ref->rnum > s_idx)((void) sizeof ((s_ref && s_ref->rnum > s_idx) ?
1 : 0), __extension__ ({ if (s_ref && s_ref->rnum
> s_idx) ; else __assert_fail ("s_ref && s_ref->rnum > s_idx"
, "ccv_nnc_symbolic_graph_simplify.c", 232, __extension__ __PRETTY_FUNCTION__
); }))
;
233 *(int*)ccv_array_get(s_ref, s_idx)((void*)(((char*)((s_ref)->data)) + (size_t)(s_ref)->rsize
* (size_t)(s_idx)))
= ref + 1; // Update so it references to the new s_ref.
234 assert(!simplify->tensor_symbol_info[ref].p_ref)((void) sizeof ((!simplify->tensor_symbol_info[ref].p_ref)
? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[ref].p_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[ref].p_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 234, __extension__ __PRETTY_FUNCTION__
); }))
;
235 simplify->tensor_symbol_info[ref].p_ref = p_ref + 1;
236 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(ref)))
)->p_ref = p_ref + 1;
237 }
238 // Now go over exec to mark them as dead because we don't need these to generate refs.
239 if (output_exec_ref_dead)
240 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
241 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))))
242 {
243 const int output_exec = simplify->output_execs[i];
244 assert(output_exec >= 0)((void) sizeof ((output_exec >= 0) ? 1 : 0), __extension__
({ if (output_exec >= 0) ; else __assert_fail ("output_exec >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 244, __extension__ __PRETTY_FUNCTION__
); }))
;
245 const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = simplify->exec_symbol_info + output_exec;
246 int flag = 0;
247 for (j = 0; !flag && j < symbol_info->output_size; j++)
248 {
249 const int d = symbol_info->outputs[j];
250 if (d >= 0)
251 flag = (!(simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))); // If some of the output is not dead, we cannot proceed.
252 }
253 if (!flag) // If all outputs are dead, mark the exec as dead.
254 simplify->exec_dead[output_exec >> 5] |= (1u << (output_exec & 0x1f));
255 }
256 int updated_refs = 0;
257 // Go over replace inputs / outputs.
258 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
259 for (i = 0; i < node->input_size; i++)
260 {
261 const int d = node->inputs[i];
262 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
263 {
264 node->inputs[i] = refs[d]; // It can be replaced.
265 updated_refs = 1;
266 }
267 }
268 for (i = 0; i < node->output_size; i++)
269 {
270 const int d = node->outputs[i];
271 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
272 {
273 node->outputs[i] = refs[d]; // It can be replaced.
274 updated_refs = 1;
275 }
276 }
277 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(idx))))->inputs == node->inputs) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)
((simplify->graph->exec_symbol_info)->data)) + (size_t
)(simplify->graph->exec_symbol_info)->rsize * (size_t
)(idx))))->inputs == node->inputs) ; else __assert_fail
("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs"
, "ccv_nnc_symbolic_graph_simplify.c", 277, __extension__ __PRETTY_FUNCTION__
); }))
;
278 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(idx))))->outputs == node->outputs) ? 1 : 0)
, __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t*)((
void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx))))->outputs == node->outputs) ; else
__assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs"
, "ccv_nnc_symbolic_graph_simplify.c", 278, __extension__ __PRETTY_FUNCTION__
); }))
;
279 } ccv_nnc_graph_visit_endfor} }
280 const ccv_nnc_graph_exec_symbol_info_t* const p_node_info = simplify->graph->p ? (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->p->exec_symbol_info, simplify->graph->exec_idx - 1)((void*)(((char*)((simplify->graph->p->exec_symbol_info
)->data)) + (size_t)(simplify->graph->p->exec_symbol_info
)->rsize * (size_t)(simplify->graph->exec_idx - 1)))
: 0;
281 if (p_node_info && (p_node_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE))
282 // Go over the while inputs as well.
283 for (i = 0; i < p_node_info->p_while.input_size; i++)
284 {
285 const int d = p_node_info->p_while.inputs[i];
286 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
287 {
288 p_node_info->p_while.inputs[i] = refs[d];
289 updated_refs = 1;
290 }
291 }
292 return updated_refs;
293}
294
295// This is a simple common sub-expression elimination implementation, particularly, we only replace the later computed output
296// with the identical earlier computed output, and let the "elimination" part to the graph pruning.
297static void _ccv_nnc_symbolic_graph_common_subexpression_elimination(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
298{
299 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
300 // tensor_hash starts with 0s, and it is either marked with the tensor index + 1, or the hash of the computations.
301 uint64_t* const tensor_hash = (uint64_t*)cccalloccalloc(simplify->tensor_symbol_info_size, sizeof(uint64_t));
302 int i;
303 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
304 // If already marked as dead, skip.
305 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
306 continue;
307 for (i = 0; i < node->input_size; i++)
308 {
309 assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 309, __extension__ __PRETTY_FUNCTION__
); }))
;
310 // If no hash for the input, use the index + 1 as the hash.
311 if (node->inputs[i] >= 0 && tensor_hash[node->inputs[i]] == 0)
312 tensor_hash[node->inputs[i]] = node->inputs[i] + 1;
313 }
314 // We cannot support graph / custom command (we cannot model them properly).
315 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
316 node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
317 node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
318 node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD ||
319 // No need to process a opt disabled node.
320 (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT))
321 continue;
322 uint64_t hashout, hashin[3];
323 siphash((uint8_t*)&hashin[0], (const uint8_t*)&node->cmd.info, sizeof(node->cmd.info), key_siphash);
324 siphash((uint8_t*)&hashin[1], (const uint8_t*)&node->hint, sizeof(node->hint), key_siphash);
325 hashin[2] = node->cmd.cmd; // Now actually hash the cmd name.
326 siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash);
327 // First, hash the cmd and the hints with the cmd.
328 // Note on alias, we cannot really generate proper hash for alias (yet). Thus, just treat alias as normal tensors.
329 for (i = 0; i < node->input_size; i++)
330 {
331 assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 331, __extension__ __PRETTY_FUNCTION__
); }))
;
332 if (node->inputs[i] >= 0)
333 {
334 // Hash using the tensor hash.
335 hashin[0] = hashout;
336 hashin[1] = i; // Encode the positional information.
337 hashin[2] = tensor_hash[node->inputs[i]];
338 } else {
339 // Hash using the input integer (could be special integer).
340 hashin[0] = hashout;
341 hashin[1] = i; // Encode the positional information.
342 hashin[2] = node->inputs[i];
343 }
344 siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash);
345 }
346 for (i = 0; i < node->output_size; i++)
347 if (node->outputs[i] >= 0)
348 {
349 assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 349, __extension__ __PRETTY_FUNCTION__
); }))
;
350 // Assigning once, especially now we don't consider aliases.
351 assert(tensor_hash[node->outputs[i]] == 0)((void) sizeof ((tensor_hash[node->outputs[i]] == 0) ? 1 :
0), __extension__ ({ if (tensor_hash[node->outputs[i]] ==
0) ; else __assert_fail ("tensor_hash[node->outputs[i]] == 0"
, "ccv_nnc_symbolic_graph_simplify.c", 351, __extension__ __PRETTY_FUNCTION__
); }))
;
352 hashin[0] = hashout;
353 hashin[1] = i; // Positional information.
354 siphash((uint8_t*)&hashin[2], (const uint8_t*)&simplify->tensor_symbol_info[node->outputs[i]].info,
355 sizeof(simplify->tensor_symbol_info[node->outputs[i]].info), key_siphash);
356 // Generate hash for the output.
357 siphash((uint8_t*)&tensor_hash[node->outputs[i]], (const uint8_t*)hashin, sizeof(hashin), key_siphash);
358 }
359 } ccv_nnc_graph_visit_endfor} }
360 // Allocate 3 / 2 as much space, for the simple robin-hood open address hash map.
361 const int map_size = (simplify->tensor_symbol_info_size * 3 + 1) / 2;
362 int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
363 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
364 refs[i] = -1;
365 ccv_nnc_cse_hash_t* const hash_map = (ccv_nnc_cse_hash_t*)cccalloccalloc(map_size, sizeof(ccv_nnc_cse_hash_t));
366 // Now, all tensors are hashed, identify tensors with the same hash code, replace the ones that accessed later.
367 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
368 // If already marked as dead, skip.
369 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
370 continue;
371 // No need to visit a opt disabled node.
372 if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT)
373 continue;
374 for (i = 0; i < node->input_size; i++)
375 if (node->inputs[i] >= 0)
376 {
377 const int d = node->inputs[i];
378 assert(tensor_hash[d])((void) sizeof ((tensor_hash[d]) ? 1 : 0), __extension__ ({ if
(tensor_hash[d]) ; else __assert_fail ("tensor_hash[d]", "ccv_nnc_symbolic_graph_simplify.c"
, 378, __extension__ __PRETTY_FUNCTION__); }))
;
379 const int new_d = _ccv_nnc_cse_hash_find(hash_map, tensor_hash[d], map_size);
380 if (new_d >= 0 && new_d != d)
381 {
382 // Check whether this can be replaced.
383 assert(refs[d] == -1 || refs[d] == new_d)((void) sizeof ((refs[d] == -1 || refs[d] == new_d) ? 1 : 0),
__extension__ ({ if (refs[d] == -1 || refs[d] == new_d) ; else
__assert_fail ("refs[d] == -1 || refs[d] == new_d", "ccv_nnc_symbolic_graph_simplify.c"
, 383, __extension__ __PRETTY_FUNCTION__); }))
;
384 assert(!simplify->tensor_symbol_info[d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 384, __extension__ __PRETTY_FUNCTION__
); }))
;
385 assert(!simplify->tensor_symbol_info[d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].r_assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].r_assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 385, __extension__ __PRETTY_FUNCTION__
); }))
;
386 assert(!simplify->tensor_symbol_info[d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].bypass_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].bypass_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 386, __extension__ __PRETTY_FUNCTION__
); }))
;
387 assert(!simplify->tensor_symbol_info[new_d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 387, __extension__ __PRETTY_FUNCTION__
); }))
;
388 assert(!simplify->tensor_symbol_info[new_d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].r_assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].r_assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 388, __extension__ __PRETTY_FUNCTION__
); }))
;
389 assert(!simplify->tensor_symbol_info[new_d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].bypass_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].bypass_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 389, __extension__ __PRETTY_FUNCTION__
); }))
;
390 // Ignore if there is a pair_ref (again, pair_ref has side effect that is deeper (using tape))
391 if (simplify->tensor_symbol_info[d].pair_ref)
392 continue;
393 // If both have p_ref, we cannot merge.
394 if (simplify->tensor_symbol_info[d].p_ref && simplify->tensor_symbol_info[new_d].p_ref)
395 continue;
396 // Merge s_refs from ref[d] later.
397 if (refs[d] != new_d)
398 refs[d] = new_d;
399 assert(simplify->output_execs[new_d] >= 0)((void) sizeof ((simplify->output_execs[new_d] >= 0) ? 1
: 0), __extension__ ({ if (simplify->output_execs[new_d] >=
0) ; else __assert_fail ("simplify->output_execs[new_d] >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 399, __extension__ __PRETTY_FUNCTION__
); }))
;
400 // Establish new dependency.
401 ccv_nnc_graph_exec_symbol_concat(simplify->graph, (ccv_nnc_graph_exec_symbol_t){
402 .d = simplify->output_execs[new_d],
403 .graph = simplify->graph,
404 }, (ccv_nnc_graph_exec_symbol_t){
405 .d = idx,
406 .graph = simplify->graph,
407 });
408 }
409 }
410 // We can reuse the input, but we cannot do that for output of these commands.
411 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
412 node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
413 node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
414 node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
415 continue;
416 for (i = 0; i < node->output_size; i++)
417 if (node->outputs[i] >= 0) // This tensor can be reused by others.
418 _ccv_nnc_cse_hash_add(hash_map, tensor_hash[node->outputs[i]], node->outputs[i], map_size);
419 } ccv_nnc_graph_visit_endfor} }
420 _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 1 /* For these exec that generates refs, we don't need them any more. */);
421 ccfreefree(tensor_hash);
422 ccfreefree(hash_map);
423 ccfreefree(refs);
424}
425
426static void _ccv_nnc_symbolic_graph_data_transfer_opt(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
427{
428 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
429 uint32_t* const exec_dead = simplify->exec_dead;
430 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = simplify->tensor_symbol_info;
431 int i;
432 uint32_t* const has_alias = cccalloccalloc(2 * ((simplify->tensor_symbol_info_size + 31) >> 5), sizeof(uint32_t));
433 uint32_t* const has_binds = has_alias + ((simplify->tensor_symbol_info_size + 31) >> 5);
434 for (i = 0; i < bind_size; i++)
435 has_binds[binds[i].d >> 5] |= (1u << (binds[i].d & 0x1f));
436 for (i = 0; i < output_size; i++)
437 has_binds[outputs[i].d >> 5] |= (1u << (outputs[i].d & 0x1f));
438 int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
439 int updated_refs;
440 do {
441 memset(has_alias, 0, sizeof(uint32_t) * ((simplify->tensor_symbol_info_size + 31) >> 5));
442 // Go through until no updates is possible. This won't result an infinite loop because every time,
443 // a tensor is eliminated.
444 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
445 {
446 refs[i] = -1;
447 if (tensor_symbol_info[i].alias_ref)
448 {
449 const int alias_ref = tensor_symbol_info[i].alias_ref - 1;
450 has_alias[alias_ref >> 5] |= (1u << (alias_ref & 0x1f));
451 }
452 }
453 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
454 // If already marked as dead, skip.
455 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
456 continue;
457 if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD &&
458 node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD &&
459 // Conversion, if the datatype is the same, it is the data transfer op.
460 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD &&
461 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD &&
462 // Format transform, if the format is the same, it is the data transfer op.
463 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD &&
464 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD)
465 continue;
466 if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) // If optimization pass disabled, skip.
467 continue;
468 for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size.
469 if (node->inputs[i] >= 0 && node->outputs[i] >= 0)
470 {
471 assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 471, __extension__ __PRETTY_FUNCTION__
); }))
;
472 assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 472, __extension__ __PRETTY_FUNCTION__
); }))
;
473 int input_ref = node->inputs[i];
474 while (refs[input_ref] >= 0)
475 input_ref = refs[input_ref];
476 int output_ref = node->outputs[i];
477 while (refs[output_ref] >= 0)
478 output_ref = refs[output_ref];
479 if (input_ref == output_ref)
480 continue;
481 const ccv_nnc_tensor_symbol_info_t* const input = tensor_symbol_info + input_ref;
482 const ccv_nnc_tensor_symbol_info_t* const output = tensor_symbol_info + output_ref;
483 // If they are not the same data type, skip. (Likely data conversion op).
484 if (input->info.datatype != output->info.datatype)
485 continue;
486 // If they are not the same format, skip. (Likely format transform op).
487 if (input->info.format != output->info.format)
488 continue;
489 // If they are not on the same device (even for NUMA), skip.
490 if (input->info.type != output->info.type)
491 continue;
492 // If both are alias, we cannot consolidate this.
493 if (input->alias_ref && output->alias_ref)
494 continue;
495 // If input is alias, and output has alias reference to it, output cannot be the same as input.
496 if (input->alias_ref && (has_alias[output_ref >> 5] & (1u << (output_ref & 0x1f))))
497 continue;
498 // If output is alias, and input has alias reference to it, input cannot be the same as output.
499 if (output->alias_ref && (has_alias[input_ref >> 5] & (1u << (input_ref & 0x1f))))
500 continue;
501 // If either are carry overs (for while), we cannot do anything.
502 if (input->assign_ref || output->assign_ref ||
503 input->r_assign_ref || output->r_assign_ref)
504 continue;
505 // If either are bypasses (for case..of), we cannot do anything.
506 if (input->bypass_ref || output->bypass_ref ||
507 input->r_bypass_ref || output->r_bypass_ref)
508 continue;
509 // If either are inputs / outputs connecting the parent graph, we cannot do anything.
510 if (input->p_ref || output->p_ref)
511 continue;
512 // If the type is the same, check which one is the alias.
513 // We always prefer alias.
514 if (output->alias_ref)
515 {
516 // Input cannot be binds.
517 if (has_binds[input_ref >> 5] & (1u << (input_ref & 0x1f)))
518 continue;
519 refs[input_ref] = output_ref;
520 } else { // if (input->alias_ref), else
521 // Output cannot be binds.
522 if (has_binds[output_ref >> 5] & (1u << (output_ref & 0x1f)))
523 continue;
524 refs[output_ref] = input_ref;
525 }
526 }
527 } ccv_nnc_graph_visit_endfor} }
528 // Make sure refs reference to the end.
529 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
530 if (refs[i] >= 0)
531 {
532 int ref = refs[i];
533 while (refs[ref] >= 0)
534 ref = refs[ref];
535 refs[i] = ref;
536 }
537 updated_refs = _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 0 /* We still need these exec that generates the refs. */);
538 } while (updated_refs);
539 ccfreefree(refs);
540 ccfreefree(has_alias);
541 // Now, all references updated, remove data transfers that sources and destinations are the same.
542 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
543 // If already marked as dead, skip.
544 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
545 continue;
546 if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD &&
547 node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD &&
548 // Conversion, if the datatype is the same, it is the data transfer op.
549 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD &&
550 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD &&
551 // Format transform, if the format is the same, it is the data transfer op.
552 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD &&
553 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD)
554 continue;
555 for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size.
556 if (node->inputs[i] == node->outputs[i])
557 {
558 if (i + 1 < node->output_size)
559 {
560 node->inputs[i] = node->inputs[node->output_size - 1];
561 node->outputs[i] = node->outputs[node->output_size - 1];
562 }
563 --node->output_size;
564 --i;
565 }
566 node->input_size = node->output_size;
567 ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
)->input_size = node->input_size;
568 ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
)->output_size = node->output_size;
569 // Remove this data transfer node if it has no outputs.
570 if (node->output_size == 0)
571 exec_dead[idx >> 5] |= (1u << (idx & 0x1f));
572 } ccv_nnc_graph_visit_endfor} }
573}
574
575typedef struct {
576 uint32_t fused_op; // The final fused op id.
577 uint32_t ops_seq[2]; // The sequence of commands to identify.
578 int ops_seq_size;
579 int ops_info_select; // Which ops' info will be selected.
580 struct {
581 int type; // Whether it is input, or output. It doesn't make sense for example, input in ops_seq, but output in fused_op.
582 int op_idx; // Index into the ops_seq.
583 int from; // The index in ops_seq.
584 int to; // The index in fused_op.
585 } pos[4]; // maps of positions from ops seq to fused_op for inputs (outputs).
586 int pos_size;
587} ccv_nnc_ops_fusion_t;
588
589enum {
590 CCV_NNC_OPS_FUSION_INPUT_INDEX,
591 CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
592};
593
594const static int ccv_nnc_ops_fusion_io_size = 2;
595const static ccv_nnc_ops_fusion_t ccv_nnc_ops_fusions[] = {
596 {
597 .fused_op = CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD,
598 .ops_seq = {
599 CCV_NNC_SOFTMAX_FORWARD, CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD,
600 },
601 .ops_seq_size = 2,
602 .ops_info_select = 1,
603 .pos = {
604 {
605 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
606 .op_idx = 0,
607 .from = 0,
608 .to = 0,
609 },
610 {
611 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
612 .op_idx = 1,
613 .from = 1,
614 .to = 1,
615 },
616 {
617 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
618 .op_idx = 0,
619 .from = 0,
620 .to = 1,
621 },
622 {
623 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
624 .op_idx = 1,
625 .from = 0,
626 .to = 0,
627 },
628 },
629 .pos_size = 4,
630 },
631 {
632 .fused_op = CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD,
633 .ops_seq = {
634 CCV_NNC_SIGMOID_FORWARD, CCV_NNC_BINARY_CROSSENTROPY_FORWARD,
635 },
636 .ops_seq_size = 2,
637 .ops_info_select = 1,
638 .pos = {
639 {
640 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
641 .op_idx = 0,
642 .from = 0,
643 .to = 0,
644 },
645 {
646 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
647 .op_idx = 1,
648 .from = 1,
649 .to = 1,
650 },
651 {
652 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
653 .op_idx = 0,
654 .from = 0,
655 .to = 1,
656 },
657 {
658 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
659 .op_idx = 1,
660 .from = 0,
661 .to = 0,
662 },
663 },
664 .pos_size = 4,
665 }
666};
667
668static int _ccv_nnc_find_ops_for_fusion(const ccv_nnc_ops_fusion_t* const fusion, const int ops_idx, const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const uint32_t* const exec_dead, const int exec_idx, int* const fusing_exec_symbols)
669{
670 if (exec_dead[exec_idx >> 5] & (1u << (exec_idx & 0x1f)))
671 return 0;
672 const ccv_nnc_graph_exec_symbol_info_t* const node = exec_symbol_info + exec_idx;
673 // Doesn't match the ops_seq, return 0.
674 if (fusion->ops_seq[ops_idx] != node->cmd.cmd)
675 return 0;
676 fusing_exec_symbols[ops_idx] = exec_idx;
677 // If already reached the end, we are good.
678 if (ops_idx == fusion->ops_seq_size - 1)
679 return 1;
680 // Otherwise, we need to go on, but don't have any to follow-up.
681 if (!node->outgoings || !node->outgoings->rnum)
682 return 0;
683 int i;
684 for (i = 0; i < node->outgoings->rnum; i++)
685 if (_ccv_nnc_find_ops_for_fusion(fusion, ops_idx + 1, exec_symbol_info, exec_dead, *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
, fusing_exec_symbols))
686 return 1;
687 return 0;
688}
689
690static void _ccv_nnc_symbolic_graph_ops_fusion(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
691{
692 uint32_t* const exec_dead = simplify->exec_dead;
693 ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info;
694 int i, j;
695 int fusing_exec_symbols[sizeof(ccv_nnc_ops_fusions->ops_seq)];
696 int fused_inputs[ccv_nnc_ops_fusion_io_size]; // 2 is just a number based on the ops_fusion.
697 int fused_outputs[ccv_nnc_ops_fusion_io_size];
698 ccv_nnc_graph_visit_for(simplify->visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(exec_symbol_info)) const node __attribute__((unused)) = (exec_symbol_info
) + idx;
{
699 // If already marked as dead, skip.
700 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
701 continue;
702 if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) // If optimization pass disabled, skip.
703 continue;
704 // Run through rudimentary pattern matching for ops_seq. There are better ways to do this (immediately come to mind, Boyer-Moore). However, this is simpler to code.
705 // If I am running into performance issues with this, I would be very happy.
706 for (i = 0; i < sizeof(ccv_nnc_ops_fusions) / sizeof(ccv_nnc_ops_fusion_t); i++)
707 {
708 const ccv_nnc_ops_fusion_t* const ops_fusion = ccv_nnc_ops_fusions + i;
709 // Check to see if a list of symbols are possible.
710 if (ops_fusion->ops_seq[0] == node->cmd.cmd &&
711 _ccv_nnc_find_ops_for_fusion(ops_fusion, 0, exec_symbol_info, exec_dead, idx, fusing_exec_symbols))
712 {
713 // Go through all the inputs and outputs, check if they exists and are mapped.
714 // TODO: the mapping can be more sophisticated than what we have here.
715 // Also, I need to check if some inputs / outputs cannot be mapped, then we cannot continue.
716 for (j = 0; j < ccv_nnc_ops_fusion_io_size; j++)
717 fused_inputs[j] = fused_outputs[j] = CCV_NNC_NO_TENSOR_SYMBOL;
718 int input_size = 0, output_size = 0;
719 for (j = 0; j < ops_fusion->pos_size; j++)
720 {
721 ccv_nnc_graph_exec_symbol_info_t* const fusing_op = exec_symbol_info + fusing_exec_symbols[ops_fusion->pos[j].op_idx];
722 switch (ops_fusion->pos[j].type)
723 {
724 case CCV_NNC_OPS_FUSION_INPUT_INDEX:
725 fused_inputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->input_size ? fusing_op->inputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL;
726 input_size = ccv_max(input_size, ops_fusion->pos[j].to + 1)({ typeof (input_size) _a = (input_size); typeof (ops_fusion->
pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a > _b
) ? _a : _b; })
;
727 break;
728 case CCV_NNC_OPS_FUSION_OUTPUT_INDEX:
729 fused_outputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->output_size ? fusing_op->outputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL;
730 output_size = ccv_max(output_size, ops_fusion->pos[j].to + 1)({ typeof (output_size) _a = (output_size); typeof (ops_fusion
->pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a >
_b) ? _a : _b; })
;
731 break;
732 }
733 }
734 const ccv_nnc_cmd_param_t info = exec_symbol_info[fusing_exec_symbols[ops_fusion->ops_info_select]].cmd.info;
735 // Modify the first node so it is the correct type and value. I need to look back to the actual graph to get the info.
736 ccv_nnc_graph_exec_symbol_info_t* const actual_node = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
;
737 actual_node->cmd.cmd = node->cmd.cmd = ops_fusion->fused_op;
738 actual_node->cmd.info = node->cmd.info = info;
739 if (node->input_size + node->output_size < input_size + output_size)
740 actual_node->inputs = node->inputs = node->inputs ? ccreallocrealloc(node->inputs, sizeof(int) * (input_size + output_size)) : ccmallocmalloc(sizeof(int) * (input_size + output_size));
741 actual_node->outputs = node->outputs = node->inputs + input_size;
742 actual_node->input_size = node->input_size = input_size;
743 actual_node->output_size = node->output_size = output_size;
744 memcpy(node->inputs, fused_inputs, sizeof(int) * input_size);
745 memcpy(node->outputs, fused_outputs, sizeof(int) * output_size);
746 // Afterwards, mark the rest as dead.
747 for (j = 1; j < ops_fusion->ops_seq_size; j++)
748 exec_dead[fusing_exec_symbols[j] >> 5] |= (1u << (fusing_exec_symbols[j] & 0x1f));
749 break;
750 }
751 }
752 } ccv_nnc_graph_visit_endfor} }
753}
754
755static void _ccv_nnc_symbolic_graph_pruning_undead_exec(ccv_nnc_symbolic_graph_simplify_t* const simplify, const int exec_idx, uint32_t* const tensor_visited, ccv_array_t* const next)
756{
757 assert(exec_idx >= 0)((void) sizeof ((exec_idx >= 0) ? 1 : 0), __extension__ ({
if (exec_idx >= 0) ; else __assert_fail ("exec_idx >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 757, __extension__ __PRETTY_FUNCTION__
); }))
;
25
Taking true branch
758 uint32_t* const exec_dead = simplify->exec_dead;
759 uint32_t* const tensor_dead = simplify->tensor_dead;
760 exec_dead[exec_idx >> 5] &= ~(1u << (exec_idx & 0x1f)); // Undead the exec.
761 ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info + exec_idx;
762 int i;
763 if (exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
26
Assuming field 'cmd' is not equal to CCV_NNC_GRAPH_FORWARD
30
Taking false branch
764 exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
27
Assuming field 'cmd' is not equal to CCV_NNC_GRAPH_BACKWARD
765 exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
28
Assuming field 'cmd' is not equal to CCV_NNC_CUSTOM_FORWARD
766 exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
29
Assuming field 'cmd' is not equal to CCV_NNC_CUSTOM_BACKWARD
767 {
768 // All of its inputs / outputs need to be undead for these commands.
769 for (i = 0; i < exec_symbol_info->input_size; i++)
770 {
771 const int d = exec_symbol_info->inputs[i];
772 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
773 {
774 ccv_array_push(next, &d); // Push to the next round to be undead.
775 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
776 }
777 }
778 for (i = 0; i < exec_symbol_info->output_size; i++)
779 {
780 const int d = exec_symbol_info->outputs[i];
781 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
782 {
783 ccv_array_push(next, &d); // Push to the next round to be undead.
784 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
785 }
786 }
787 return;
788 }
789 // Go through the input / output, to make sure that all of them can be available.
790 const int input_bitmask_size = (exec_symbol_info->input_size + 63) >> 6;
791 const int output_bitmask_size = (exec_symbol_info->output_size + 63) >> 6;
792 uint64_t input_bitmasks[ccv_max(1, input_bitmask_size)({ typeof (1) _a = (1); typeof (input_bitmask_size) _b = (input_bitmask_size
); (_a > _b) ? _a : _b; })
];
31
Assuming '_a' is > '_b'
32
'?' condition is true
793 for (i = 0; i
32.1
'i' is >= 'input_bitmask_size'
< input_bitmask_size; i++)
33
Loop condition is false. Execution continues on line 795
794 input_bitmasks[i] = 0;
795 uint64_t output_bitmasks[ccv_max(1, output_bitmask_size)({ typeof (1) _a = (1); typeof (output_bitmask_size) _b = (output_bitmask_size
); (_a > _b) ? _a : _b; })
];
34
Assuming '_a' is > '_b'
35
'?' condition is true
796 for (i = 0; i
35.1
'i' is >= 'output_bitmask_size'
< output_bitmask_size; i++)
36
Loop condition is false. Execution continues on line 798
797 output_bitmasks[i] = 0;
798 for (i = 0; i < exec_symbol_info->input_size; i++)
37
Assuming 'i' is < field 'input_size'
38
Loop condition is true. Entering loop body
799 if (exec_symbol_info->inputs[i] >= 0)
39
Assuming the condition is true
40
Taking true branch
800 input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
41
The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage
801 for (i = 0; i < exec_symbol_info->output_size; i++)
802 if (exec_symbol_info->outputs[i] >= 0)
803 output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
804 // First, mark everything with bitmasks, and verify it works.
805 assert(ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))((void) sizeof ((ccv_nnc_cmd_bitmask(exec_symbol_info->cmd
, exec_symbol_info->input_size, exec_symbol_info->output_size
, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size
)) ? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_bitmask(exec_symbol_info
->cmd, exec_symbol_info->input_size, exec_symbol_info->
output_size, input_bitmasks, input_bitmask_size, output_bitmasks
, output_bitmask_size)) ; else __assert_fail ("ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)"
, "ccv_nnc_symbolic_graph_simplify.c", 805, __extension__ __PRETTY_FUNCTION__
); }))
;
806 int flag;
807 do {
808 flag = 0;
809 // Try to eliminate one at a time. Go over output first.
810 for (i = 0; i < exec_symbol_info->output_size; i++)
811 {
812 const int d = exec_symbol_info->outputs[i];
813 // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all.
814 if (d >= 0 && (tensor_dead[d >> 5] & (1u << (d & 0x1f))) &&
815 (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))))
816 {
817 output_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
818 if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))
819 flag = 1;
820 else // Reset the bitmask.
821 output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
822 }
823 }
824 // Only check if there are inputs we can remove if it is backward.
825 if (!ccv_nnc_cmd_is_forward(exec_symbol_info->cmd))
826 // For inputs, no matter if it s dead or not, we try to limit our input to the smallest number.
827 for (i = 0; i < exec_symbol_info->input_size; i++)
828 {
829 const int d = exec_symbol_info->inputs[i];
830 // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all.
831 if (d >= 0 && (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))))
832 {
833 input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
834 if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))
835 flag = 1;
836 else // Reset the bitmask.
837 input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
838 }
839 }
840 } while (flag);
841 // Now we know which one to keep, which one to undead.
842 for (i = 0; i < exec_symbol_info->input_size; i++)
843 if (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
844 {
845 const int d = exec_symbol_info->inputs[i];
846 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
847 {
848 ccv_array_push(next, &d); // Push to the next round to be undead.
849 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
850 }
851 } else {
852 // Clean up the inputs.
853 exec_symbol_info->inputs[i] = CCV_NNC_NO_TENSOR_SYMBOL;
854 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(exec_idx))))->inputs == exec_symbol_info->inputs
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((simplify->graph->exec_symbol_info)
->data)) + (size_t)(simplify->graph->exec_symbol_info
)->rsize * (size_t)(exec_idx))))->inputs == exec_symbol_info
->inputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs"
, "ccv_nnc_symbolic_graph_simplify.c", 854, __extension__ __PRETTY_FUNCTION__
); }))
;
855 }
856 for (i = 0; i < exec_symbol_info->output_size; i++)
857 if (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
858 {
859 const int d = exec_symbol_info->outputs[i];
860 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
861 {
862 ccv_array_push(next, &d); // Push to the next round to be undead.
863 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
864 }
865 } else {
866 // Clean up the outputs.
867 exec_symbol_info->outputs[i] = CCV_NNC_NO_TENSOR_SYMBOL;
868 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(exec_idx))))->outputs == exec_symbol_info->
outputs) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((simplify->graph->exec_symbol_info)
->data)) + (size_t)(simplify->graph->exec_symbol_info
)->rsize * (size_t)(exec_idx))))->outputs == exec_symbol_info
->outputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs"
, "ccv_nnc_symbolic_graph_simplify.c", 868, __extension__ __PRETTY_FUNCTION__
); }))
;
869 }
870}
871
872static void _ccv_nnc_symbolic_graph_pruning(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
873{
874 uint32_t* const tensor_visited = (uint32_t*)cccalloccalloc(sizeof(uint32_t), (simplify->tensor_symbol_info_size + 31) >> 5);
875 ccv_array_t* const preserve[2] = {
876 ccv_array_new(sizeof(int), output_size, 0),
877 ccv_array_new(sizeof(int), 0, 0),
878 };
879 int i, j;
880 ccv_array_t** const r_alias_refs = (ccv_array_t**)cccalloccalloc(sizeof(ccv_array_t*), simplify->tensor_symbol_info_size);
881 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
5
Assuming 'i' is >= field 'tensor_symbol_info_size'
6
Loop condition is false. Execution continues on line 890
882 if (simplify->tensor_symbol_info[i].alias_ref)
883 {
884 const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1;
885 assert(alias_ref < simplify->tensor_symbol_info_size)((void) sizeof ((alias_ref < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (alias_ref < simplify->
tensor_symbol_info_size) ; else __assert_fail ("alias_ref < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 885, __extension__ __PRETTY_FUNCTION__
); }))
;
886 if (!r_alias_refs[alias_ref])
887 r_alias_refs[alias_ref] = ccv_array_new(sizeof(int), 1, 0);
888 ccv_array_push(r_alias_refs[alias_ref], &i);
889 }
890 uint32_t* const exec_dead = simplify->exec_dead;
891 uint32_t* const tensor_dead = simplify->tensor_dead;
892 int* const output_execs = simplify->output_execs;
893 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
894 // Mark everything visited as dead.
895 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
7
Loop condition is false. Execution continues on line 913
896 if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) // If optimization pass disabled, skip.
897 continue;
898 exec_dead[idx >> 5] |= (1u << (idx & 0x1f));
899 for (i = 0; i < node->input_size; i++)
900 {
901 const int d = node->inputs[i];
902 if (d >= 0)
903 tensor_dead[d >> 5] |= (1u << (d & 0x1f));
904 }
905 for (i = 0; i < node->output_size; i++)
906 {
907 const int d = node->outputs[i];
908 if (d >= 0)
909 tensor_dead[d >> 5] |= (1u << (d & 0x1f));
910 }
911 } ccv_nnc_graph_visit_endfor} }
912 // If the tensor symbol is used by other exec that is not visited, unmark it.
913 for (i = 0; i < simplify->exec_symbol_info_size; i++)
8
Assuming 'i' is >= field 'exec_symbol_info_size'
9
Loop condition is false. Execution continues on line 933
914 {
915 if (exec_dead[i >> 5] & (1u << (i & 0x1f)))
916 continue;
917 const ccv_nnc_graph_exec_symbol_info_t* const node = simplify->exec_symbol_info + i;
918 for (j = 0; j < node->input_size; j++)
919 {
920 const int d = node->inputs[j];
921 // Undead it.
922 if (d >= 0)
923 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
924 }
925 for (j = 0; j < node->output_size; j++)
926 {
927 const int d = node->outputs[j];
928 // Undead it.
929 if (d >= 0)
930 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
931 }
932 }
933 for (i = 0; i < output_size; i++)
10
Assuming 'i' is >= 'output_size'
11
Loop condition is false. Execution continues on line 935
934 ccv_array_push(preserve[0], &outputs[i].d);
935 int p = 0, q = 1;
936 // BFS to mark execs / tensors as not dead.
937 while (preserve[p]->rnum > 0)
12
Assuming field 'rnum' is > 0
13
Loop condition is true. Entering loop body
938 {
939 ccv_array_clear(preserve[q]);
940 // First, undead all relevant tensors.
941 for (i = 0; i
13.1
'i' is < field 'rnum'
< preserve[p]->rnum
; i++)
14
Loop condition is true. Entering loop body
19
Assuming 'i' is >= field 'rnum'
20
Loop condition is false. Execution continues on line 961
942 {
943 const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve
[p])->rsize * (size_t)(i)))
;
944 // Undead the outputs.
945 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
946 int alias_ref = d;
947 if (simplify->tensor_symbol_info[d].alias_ref)
15
Assuming field 'alias_ref' is 0
16
Taking false branch
948 {
949 alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1;
950 tensor_dead[alias_ref >> 5] &= ~(1u << (alias_ref & 0x1f));
951 assert(r_alias_refs[alias_ref])((void) sizeof ((r_alias_refs[alias_ref]) ? 1 : 0), __extension__
({ if (r_alias_refs[alias_ref]) ; else __assert_fail ("r_alias_refs[alias_ref]"
, "ccv_nnc_symbolic_graph_simplify.c", 951, __extension__ __PRETTY_FUNCTION__
); }))
;
952 }
953 if (r_alias_refs[alias_ref])
17
Assuming the condition is false
18
Taking false branch
954 for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++)
955 {
956 const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t
)(r_alias_refs[alias_ref])->rsize * (size_t)(j)))
;
957 if (output_execs[b] >= 0) // Only revive if it is written alias.
958 tensor_dead[b >> 5] &= ~(1u << (b & 0x1f));
959 }
960 }
961 for (i = 0; i < preserve[p]->rnum; i++)
21
Loop condition is true. Entering loop body
962 {
963 const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve
[p])->rsize * (size_t)(i)))
;
964 const int output_exec = output_execs[d];
965 // Undead the exec.
966 if (output_exec >= 0)
22
Assuming 'output_exec' is >= 0
23
Taking true branch
967 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
24
Calling '_ccv_nnc_symbolic_graph_pruning_undead_exec'
968 int alias_ref = d;
969 if (simplify->tensor_symbol_info[d].alias_ref)
970 {
971 alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1;
972 const int output_exec = output_execs[alias_ref];
973 if (output_exec >= 0)
974 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
975 }
976 if (r_alias_refs[alias_ref])
977 for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++)
978 {
979 const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t
)(r_alias_refs[alias_ref])->rsize * (size_t)(j)))
;
980 const int output_exec = output_execs[b];
981 if (output_exec >= 0)
982 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
983 }
984 }
985 CCV_SWAP(p, q, i)((i) = (p), (p) = (q), (q) = (i));
986 }
987 ccfreefree(tensor_visited);
988 ccv_array_free(preserve[0]);
989 ccv_array_free(preserve[1]);
990 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
991 if (r_alias_refs[i])
992 ccv_array_free(r_alias_refs[i]);
993 ccfreefree(r_alias_refs);
994}
995
996void ccv_nnc_symbolic_graph_simplify(ccv_nnc_symbolic_graph_t* const graph, const int* const passes, const int pass_size, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
997{
998 ccv_nnc_symbolic_graph_simplify_t* const simplify = _ccv_nnc_symbolic_graph_simplify_new(graph, sources, source_size, destinations, destination_size);
999 int i;
1000 for (i = 0; i < pass_size; i++)
1
Assuming 'i' is < 'pass_size'
2
Loop condition is true. Entering loop body
1001 switch (passes[i])
3
Control jumps to 'case CCV_NNC_SIMPLIFY_GRAPH_PRUNING:' at line 1009
1002 {
1003 case CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION:
1004 _ccv_nnc_symbolic_graph_common_subexpression_elimination(simplify, outputs, output_size);
1005 break;
1006 case CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT:
1007 _ccv_nnc_symbolic_graph_data_transfer_opt(simplify, binds, bind_size, outputs, output_size);
1008 break;
1009 case CCV_NNC_SIMPLIFY_GRAPH_PRUNING:
1010 _ccv_nnc_symbolic_graph_pruning(simplify, outputs, output_size);
4
Calling '_ccv_nnc_symbolic_graph_pruning'
1011 break;
1012 case CCV_NNC_SIMPLIFY_OPS_FUSION:
1013 _ccv_nnc_symbolic_graph_ops_fusion(simplify, outputs, output_size);
1014 break;
1015 }
1016 _ccv_nnc_symbolic_graph_simplify_apply(simplify);
1017 _ccv_nnc_symbolic_graph_simplify_free(simplify);
1018}