Bug Summary

File:nnc/ccv_nnc_symbolic_graph_simplify.c
Warning:line 801, column 28
The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_symbolic_graph_simplify.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/18 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -D HAVE_CUDA_SM80 -I /usr/local/include -internal-isystem /usr/local/lib/clang/18/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-09-15-184933-339151-1 -x c ccv_nnc_symbolic_graph_simplify.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_symbolic_graph.h"
6#include "3rdparty/siphash/siphash24.h"
7
8// MARK - Level-3.5 API
9
10static uint8_t key_siphash[16] = "graphcsekvlibnnc";
11
12typedef struct {
13 int tensor_symbol_info_size;
14 int exec_symbol_info_size;
15 ccv_nnc_symbolic_graph_t* graph;
16 ccv_nnc_graph_visit_t* visit;
17 ccv_nnc_tensor_symbol_info_t* tensor_symbol_info;
18 ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info;
19 uint32_t* exec_dead; // Mark a exec is dead and need to be cleared, each bit represent a exec.
20 uint32_t* tensor_dead; // Mark a tensor is dead and need to be cleared, each bit represent a tensor.
21 int* output_execs; // Mapping from tensor to the exec that generates this tensor.
22} ccv_nnc_symbolic_graph_simplify_t;
23
24static void _ccv_nnc_symbolic_graph_simplify_update_output_execs(ccv_nnc_symbolic_graph_simplify_t* const simplify)
25{
26 int i;
27 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
28 simplify->output_execs[i] = -1;
29 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
30 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
31 continue;
32 for (i = 0; i < node->output_size; i++)
33 if (node->outputs[i] >= 0)
34 simplify->output_execs[node->outputs[i]] = idx; // A tensor can only be written once.
35 } ccv_nnc_graph_visit_endfor} }
36}
37
38static ccv_nnc_symbolic_graph_simplify_t* _ccv_nnc_symbolic_graph_simplify_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
39{
40 ccv_nnc_symbolic_graph_simplify_t* const simplify = (ccv_nnc_symbolic_graph_simplify_t*)ccmallocmalloc(sizeof(ccv_nnc_symbolic_graph_simplify_t));
41 simplify->graph = graph;
42 simplify->visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0), graph->exec_symbol_info->rnum, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_symbol_info->rnum) - 1)); _visit_->size
= 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_symbol_info->
rnum); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_i_]
.outgoings->rnum : 0; const int _heap_mem_ = ((graph->exec_symbol_info
->rnum) + _incoming_edges_ > 1024); ccv_nnc_incoming_t*
_incomings_; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t
*)malloc(sizeof(ccv_nnc_incoming_t) * (graph->exec_symbol_info
->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info->
rnum) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (graph->exec_symbol_info
->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info->
rnum) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof
(ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum))
; int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (graph->
exec_symbol_info->rnum)), (int32_t*)(_incomings_ + (graph->
exec_symbol_info->rnum)) + (graph->exec_symbol_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_symbol_info
->rnum); for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == graph) ; else __assert_fail (
"(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(sources
)[_i_].d].r = 1; _exists_[0][_i_] = (sources)[_i_].d; } int _exist_size_
[2] = { (source_size), 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 1) continue; _incomings_[
_idx_].r = 2; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*
)(((char*)((graph->exec_symbol_info)->data)) + (size_t)
(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_
[d].c; if (_incomings_[d].r != 0) continue; _incomings_[d].r =
1; ((void) sizeof ((_exist_size_[_q_] < (graph->exec_symbol_info
->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] <
(graph->exec_symbol_info->rnum)) ; else __assert_fail (
"_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof ((
(sources)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if (
(sources)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(sources)[_i_].d].r = 3; _exists_[0][_i_]
= (sources)[_i_].d; } _exist_size_[0] = (source_size); _exist_size_
[1] = 0; _p_ = 0, _q_ = 1; int _bump_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 3) continue; _incomings_[
_idx_].r = 4; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*
)(((char*)((graph->exec_symbol_info)->data)) + (size_t)
(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_
[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (graph->exec_symbol_info->rnum))
? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations
)[_i_].d].r = 5; _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_
[0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0;
for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 5) continue
; _incomings_[_idx_].r = 6; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_
[d].r != 4) continue; _incomings_[d].r = 5; ((void) sizeof ((
_exist_size_[_q_] < (graph->exec_symbol_info->rnum))
? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for
(_i_ = 0; _i_ < (destination_size); _i_++) { ((void) sizeof
(((destinations)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((destinations)[_i_].graph == graph) ; else __assert_fail
("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); _incomings_[(destinations
)[_i_].d].d = 1; } for (_i_ = 0; _i_ < (source_size); _i_++
) { ((void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0),
__extension__ ({ if ((sources)[_i_].graph == graph) ; else __assert_fail
("(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (sources)[_i_].d; } _p_ = 0; _q_ = 1; _exist_size_[0] = (
source_size); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_
]; _visit_->node[_visit_->size].index = ((_idx_)); _visit_
->node[_visit_->size].term = ((_incomings_[_idx_].d)); ++
_visit_->size;; if (_incomings_[_idx_].d) { ++_d_; _incomings_
[_idx_].r = 7; } if (((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings->rnum == 1) { const int d = *(int*)((void*)(((
char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)
((graph->exec_symbol_info)->data)) + (size_t)(graph->
exec_symbol_info)->rsize * (size_t)(0))))[_idx_].outgoings
)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t*)(
(void*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->rsize * (size_t)(0))); --_incomings_[d].c; if
(_incomings_[d].c == 0 && _incomings_[d].r == 6 &&
_d_ < (destination_size)) { _exists_[_p_][_i_] = d; continue
; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
6 && _d_ < (destination_size)) { ((void) sizeof (
(_exist_size_[_q_] < (graph->exec_symbol_info->rnum)
) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[
_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (
_i_)); } for (_i_ = 0; _i_ < (destination_size); _i_++) { (
(void) sizeof (((destinations)[_i_].graph == graph) ? 1 : 0),
__extension__ ({ if ((destinations)[_i_].graph == graph) ; else
__assert_fail ("(destinations)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); if (_incomings_
[(destinations)[_i_].d].r == 7) continue; if (!(0)) { ((void)
sizeof ((_incomings_[(destinations)[_i_].d].c == 0) ? 1 : 0)
, __extension__ ({ if (_incomings_[(destinations)[_i_].d].c ==
0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__
({ if (_visit_->size <= (graph->exec_symbol_info->
rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
43 simplify->tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_info_t) * graph->tensor_symbol_info->rnum);
44 simplify->exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_info_t) * graph->exec_symbol_info->rnum);
45 ccv_nnc_symbolic_graph_symbol_infer(graph, simplify->visit, sources, source_size, destinations, destination_size, 0, 0, simplify->tensor_symbol_info, simplify->exec_symbol_info);
46 simplify->tensor_symbol_info_size = graph->tensor_symbol_info->rnum;
47 simplify->exec_symbol_info_size = graph->exec_symbol_info->rnum;
48 simplify->exec_dead = cccalloccalloc(((simplify->exec_symbol_info_size + 31) >> 5) + ((simplify->tensor_symbol_info_size + 31) >> 5), sizeof(uint32_t));
49 simplify->tensor_dead = simplify->exec_dead + ((simplify->exec_symbol_info_size + 31) >> 5);
50 simplify->output_execs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
51 return simplify;
52}
53
54static void _ccv_nnc_symbolic_graph_simplify_apply(ccv_nnc_symbolic_graph_simplify_t* const simplify)
55{
56 int i, j;
57 for (i = 0; i < simplify->exec_symbol_info_size; i++)
58 if (simplify->exec_dead[i >> 5] & (1u << (i & 0x1f)))
59 ccv_nnc_graph_exec_symbol_free(simplify->graph, (ccv_nnc_graph_exec_symbol_t){
60 .d = i,
61 .graph = simplify->graph,
62 });
63 else // If it is not marked as dead, go through to unmark tensor
64 for (j = 0; j < simplify->exec_symbol_info[i].output_size; j++)
65 {
66 const int d = simplify->exec_symbol_info[i].outputs[j];
67 if (d >= 0)
68 simplify->tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
69 }
70 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
71 if (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f)))
72 ccv_nnc_tensor_symbol_free(simplify->graph, (ccv_nnc_tensor_symbol_t){
73 .d = i,
74 .graph = simplify->graph,
75 });
76}
77
78static void _ccv_nnc_symbolic_graph_simplify_free(ccv_nnc_symbolic_graph_simplify_t* const simplify)
79{
80 ccv_nnc_graph_visit_free(simplify->visit);
81 ccfreefree(simplify->tensor_symbol_info);
82 ccfreefree(simplify->exec_symbol_info);
83 ccfreefree(simplify->exec_dead);
84 ccfreefree(simplify->output_execs);
85 ccfreefree(simplify);
86}
87
88typedef struct {
89 int d;
90 int ifbit;
91 uint64_t hash;
92} ccv_nnc_cse_hash_t;
93
94static int _ccv_nnc_cse_hash_find(ccv_nnc_cse_hash_t* const hash_map, const uint64_t hash, const int map_size)
95{
96 assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if (
hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c"
, 96, __extension__ __PRETTY_FUNCTION__); }))
;
97 int i, j;
98 i = (hash - 1) % map_size;
99 for (j = 0; ; j++, i++)
100 {
101 if (i >= map_size)
102 i = 0;
103 if (j > hash_map[i].ifbit)
104 return -1;
105 if (hash_map[i].hash == hash)
106 return hash_map[i].d;
107 }
108}
109
110static void _ccv_nnc_cse_hash_add(ccv_nnc_cse_hash_t* const hash_map, uint64_t hash, int d, const int map_size)
111{
112 assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if (
hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c"
, 112, __extension__ __PRETTY_FUNCTION__); }))
;
113 int i, j;
114 i = (hash - 1) % map_size;
115 for (j = 0; ; j++, i++)
116 {
117 if (i >= map_size)
118 i = 0;
119 if (hash_map[i].hash == hash) // Already exists, do nothing.
120 return;
121 if (hash_map[i].hash == 0)
122 {
123 // Insert.
124 hash_map[i].d = d;
125 hash_map[i].ifbit = j;
126 hash_map[i].hash = hash;
127 return;
128 }
129 if (j > hash_map[i].ifbit)
130 {
131 const ccv_nnc_cse_hash_t old_hash = hash_map[i];
132 // Swap, and continue, until find an empty slot.
133 hash_map[i].d = d;
134 hash_map[i].ifbit = j;
135 hash_map[i].hash = hash;
136 d = old_hash.d;
137 j = old_hash.ifbit;
138 hash = old_hash.hash;
139 }
140 }
141}
142
143static int _ccv_nnc_symbolic_graph_update_refs(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const int* const refs, const int output_exec_ref_dead)
144{
145 int i, j;
146 // Go over refs, if a tensor is an alias, mark it reference to the new one.
147 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
148 if (refs[i] >= 0)
149 // Mark this tensor as dead.
150 simplify->tensor_dead[i >> 5] |= (1u << (i & 0x1f));
151 else if (simplify->tensor_symbol_info[i].alias_ref && refs[simplify->tensor_symbol_info[i].alias_ref - 1] >= 0) {
152 const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1;
153 simplify->tensor_symbol_info[i].alias_ref = refs[alias_ref] + 1;
154 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->alias_ref = refs[alias_ref] + 1;
155 }
156 for (i = 0; i < output_size; i++)
157 // If the output is an alias, that's fine, because if the alias is re-binded, we are good.
158 simplify->tensor_dead[outputs[i].d >> 5] &= ~(1u << (outputs[i].d & 0x1f)); // Undead for output tensor symbols.
159 // Merge s_refs if the tensor is dead.
160 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
161 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))))
162 {
163 const int ref = refs[i];
164 if (simplify->tensor_symbol_info[i].s_ref && simplify->tensor_symbol_info[i].s_ref->rnum)
165 {
166 if (!simplify->tensor_symbol_info[ref].s_ref) // If there is no s_ref, simple, just assign the pointer and set the old one to nil.
167 {
168 simplify->tensor_symbol_info[ref].s_ref = simplify->tensor_symbol_info[i].s_ref;
169 simplify->tensor_symbol_info[i].s_ref = 0;
170 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->s_ref = 0;
171 } else {
172 ccv_array_t* const ref_s_ref = simplify->tensor_symbol_info[ref].s_ref;
173 ccv_array_t* const i_s_ref = simplify->tensor_symbol_info[i].s_ref;
174 const int ref_s_ref_rnum = ref_s_ref->rnum;
175 int flag = 0;
176 // Detect conflict, if there is, undead.
177 for (j = 0; !flag && j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref
->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; }
)
; j++)
178 {
179 const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
;
180 const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(j)))
;
181 // If for the same sub-graph, they have different tensors linked, we cannot merge these two.
182 flag = (ref_s_ref_k > 0 && i_s_ref_k > 0 && ref_s_ref_k != i_s_ref_k);
183 }
184 if (flag)
185 {
186 simplify->tensor_dead[i >> 5] &= ~(1u << (i & 0x1f)); // Undead
187 continue;
188 }
189 if (ref_s_ref_rnum < i_s_ref->rnum)
190 {
191 ccv_array_resize(ref_s_ref, i_s_ref->rnum);
192 memcpy(ccv_array_get(ref_s_ref, ref_s_ref_rnum)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(ref_s_ref_rnum)))
, ccv_array_get(i_s_ref, ref_s_ref_rnum)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(ref_s_ref_rnum)))
, sizeof(int) * (i_s_ref->rnum - ref_s_ref_rnum));
193 }
194 for (j = 0; j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref
->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; }
)
; j++)
195 {
196 const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
;
197 const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(j)))
;
198 assert(ref_s_ref_k == 0 || i_s_ref_k == 0)((void) sizeof ((ref_s_ref_k == 0 || i_s_ref_k == 0) ? 1 : 0)
, __extension__ ({ if (ref_s_ref_k == 0 || i_s_ref_k == 0) ; else
__assert_fail ("ref_s_ref_k == 0 || i_s_ref_k == 0", "ccv_nnc_symbolic_graph_simplify.c"
, 198, __extension__ __PRETTY_FUNCTION__); }))
;
199 if (i_s_ref_k)
200 *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
= i_s_ref_k;
201 }
202 ccv_array_free(simplify->tensor_symbol_info[i].s_ref);
203 simplify->tensor_symbol_info[i].s_ref = 0;
204 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->s_ref = 0;
205 for (j = 0; j < ref_s_ref->rnum; j++)
206 {
207 const int ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
- 1;
208 if (ref_k >= 0)
209 {
210 ccv_nnc_symbolic_graph_t* const sub_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(simplify->graph->sub_graphs, j)((void*)(((char*)((simplify->graph->sub_graphs)->data
)) + (size_t)(simplify->graph->sub_graphs)->rsize * (
size_t)(j)))
;
211 assert(sub_graph)((void) sizeof ((sub_graph) ? 1 : 0), __extension__ ({ if (sub_graph
) ; else __assert_fail ("sub_graph", "ccv_nnc_symbolic_graph_simplify.c"
, 211, __extension__ __PRETTY_FUNCTION__); }))
;
212 // Update its p_ref.
213 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, ref_k)((void*)(((char*)((sub_graph->tensor_symbol_info)->data
)) + (size_t)(sub_graph->tensor_symbol_info)->rsize * (
size_t)(ref_k)))
)->p_ref = ref + 1;
214 }
215 }
216 }
217 assert(simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[i].s_ref == (
(ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify->
graph->tensor_symbol_info)->data)) + (size_t)(simplify->
graph->tensor_symbol_info)->rsize * (size_t)(i))))->
s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info
[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*
)((simplify->graph->tensor_symbol_info)->data)) + (size_t
)(simplify->graph->tensor_symbol_info)->rsize * (size_t
)(i))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 217, __extension__ __PRETTY_FUNCTION__
); }))
;
218 assert(simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[ref].s_ref ==
((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify->
graph->tensor_symbol_info)->data)) + (size_t)(simplify->
graph->tensor_symbol_info)->rsize * (size_t)(ref))))->
s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info
[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char
*)((simplify->graph->tensor_symbol_info)->data)) + (
size_t)(simplify->graph->tensor_symbol_info)->rsize *
(size_t)(ref))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 218, __extension__ __PRETTY_FUNCTION__
); }))
;
219 }
220 }
221 // Going through refs that we are updating, going through its p_ref to make sure both are updated.
222 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
223 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))) && simplify->tensor_symbol_info[i].p_ref)
224 {
225 const int ref = refs[i];
226 const int p_ref = simplify->tensor_symbol_info[i].p_ref - 1;
227 assert(p_ref >= 0)((void) sizeof ((p_ref >= 0) ? 1 : 0), __extension__ ({ if
(p_ref >= 0) ; else __assert_fail ("p_ref >= 0", "ccv_nnc_symbolic_graph_simplify.c"
, 227, __extension__ __PRETTY_FUNCTION__); }))
;
228 assert(simplify->graph->p)((void) sizeof ((simplify->graph->p) ? 1 : 0), __extension__
({ if (simplify->graph->p) ; else __assert_fail ("simplify->graph->p"
, "ccv_nnc_symbolic_graph_simplify.c", 228, __extension__ __PRETTY_FUNCTION__
); }))
;
229 ccv_array_t* const s_ref = ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->p->tensor_symbol_info, p_ref)((void*)(((char*)((simplify->graph->p->tensor_symbol_info
)->data)) + (size_t)(simplify->graph->p->tensor_symbol_info
)->rsize * (size_t)(p_ref)))
)->s_ref;
230 const int s_idx = simplify->graph->p_idx - 1;
231 assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if
(s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_simplify.c"
, 231, __extension__ __PRETTY_FUNCTION__); }))
;
232 assert(s_ref && s_ref->rnum > s_idx)((void) sizeof ((s_ref && s_ref->rnum > s_idx) ?
1 : 0), __extension__ ({ if (s_ref && s_ref->rnum
> s_idx) ; else __assert_fail ("s_ref && s_ref->rnum > s_idx"
, "ccv_nnc_symbolic_graph_simplify.c", 232, __extension__ __PRETTY_FUNCTION__
); }))
;
233 *(int*)ccv_array_get(s_ref, s_idx)((void*)(((char*)((s_ref)->data)) + (size_t)(s_ref)->rsize
* (size_t)(s_idx)))
= ref + 1; // Update so it references to the new s_ref.
234 assert(!simplify->tensor_symbol_info[ref].p_ref)((void) sizeof ((!simplify->tensor_symbol_info[ref].p_ref)
? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[ref].p_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[ref].p_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 234, __extension__ __PRETTY_FUNCTION__
); }))
;
235 simplify->tensor_symbol_info[ref].p_ref = p_ref + 1;
236 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(ref)))
)->p_ref = p_ref + 1;
237 }
238 // Now go over exec to mark them as dead because we don't need these to generate refs.
239 if (output_exec_ref_dead)
240 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
241 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))))
242 {
243 const int output_exec = simplify->output_execs[i];
244 assert(output_exec >= 0)((void) sizeof ((output_exec >= 0) ? 1 : 0), __extension__
({ if (output_exec >= 0) ; else __assert_fail ("output_exec >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 244, __extension__ __PRETTY_FUNCTION__
); }))
;
245 const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = simplify->exec_symbol_info + output_exec;
246 int flag = 0;
247 for (j = 0; !flag && j < symbol_info->output_size; j++)
248 {
249 const int d = symbol_info->outputs[j];
250 if (d >= 0)
251 flag = (!(simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))); // If some of the output is not dead, we cannot proceed.
252 }
253 if (!flag) // If all outputs are dead, mark the exec as dead.
254 simplify->exec_dead[output_exec >> 5] |= (1u << (output_exec & 0x1f));
255 }
256 int updated_refs = 0;
257 // Go over replace inputs / outputs.
258 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
259 for (i = 0; i < node->input_size; i++)
260 {
261 const int d = node->inputs[i];
262 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
263 {
264 node->inputs[i] = refs[d]; // It can be replaced.
265 updated_refs = 1;
266 }
267 }
268 for (i = 0; i < node->output_size; i++)
269 {
270 const int d = node->outputs[i];
271 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
272 {
273 node->outputs[i] = refs[d]; // It can be replaced.
274 updated_refs = 1;
275 }
276 }
277 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(idx))))->inputs == node->inputs) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)
((simplify->graph->exec_symbol_info)->data)) + (size_t
)(simplify->graph->exec_symbol_info)->rsize * (size_t
)(idx))))->inputs == node->inputs) ; else __assert_fail
("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs"
, "ccv_nnc_symbolic_graph_simplify.c", 277, __extension__ __PRETTY_FUNCTION__
); }))
;
278 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(idx))))->outputs == node->outputs) ? 1 : 0)
, __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t*)((
void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx))))->outputs == node->outputs) ; else
__assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs"
, "ccv_nnc_symbolic_graph_simplify.c", 278, __extension__ __PRETTY_FUNCTION__
); }))
;
279 } ccv_nnc_graph_visit_endfor} }
280 const ccv_nnc_graph_exec_symbol_info_t* const p_node_info = simplify->graph->p ? (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->p->exec_symbol_info, simplify->graph->exec_idx - 1)((void*)(((char*)((simplify->graph->p->exec_symbol_info
)->data)) + (size_t)(simplify->graph->p->exec_symbol_info
)->rsize * (size_t)(simplify->graph->exec_idx - 1)))
: 0;
281 if (p_node_info && (p_node_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE))
282 // Go over the while inputs as well.
283 for (i = 0; i < p_node_info->p_while.input_size; i++)
284 {
285 const int d = p_node_info->p_while.inputs[i];
286 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
287 {
288 p_node_info->p_while.inputs[i] = refs[d];
289 updated_refs = 1;
290 }
291 }
292 return updated_refs;
293}
294
295// This is a simple common sub-expression elimination implementation, particularly, we only replace the later computed output
296// with the identical earlier computed output, and let the "elimination" part to the graph pruning.
297static void _ccv_nnc_symbolic_graph_common_subexpression_elimination(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
298{
299 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
300 // tensor_hash starts with 0s, and it is either marked with the tensor index + 1, or the hash of the computations.
301 uint64_t* const tensor_hash = (uint64_t*)cccalloccalloc(simplify->tensor_symbol_info_size, sizeof(uint64_t));
302 int i;
303 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
304 // If already marked as dead, skip.
305 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
306 continue;
307 for (i = 0; i < node->input_size; i++)
308 {
309 assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 309, __extension__ __PRETTY_FUNCTION__
); }))
;
310 // If no hash for the input, use the index + 1 as the hash.
311 if (node->inputs[i] >= 0 && tensor_hash[node->inputs[i]] == 0)
312 tensor_hash[node->inputs[i]] = node->inputs[i] + 1;
313 }
314 // We cannot support graph / custom command (we cannot model them properly).
315 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
316 node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
317 node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
318 node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD ||
319 // No need to process a opt disabled node.
320 (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT))
321 continue;
322 uint64_t hashout, hashin[3];
323 siphash((uint8_t*)&hashin[0], (const uint8_t*)&node->cmd.info, sizeof(node->cmd.info), key_siphash);
324 siphash((uint8_t*)&hashin[1], (const uint8_t*)&node->hint, sizeof(node->hint), key_siphash);
325 hashin[2] = node->cmd.cmd; // Now actually hash the cmd name.
326 siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash);
327 // First, hash the cmd and the hints with the cmd.
328 // Note on alias, we cannot really generate proper hash for alias (yet). Thus, just treat alias as normal tensors.
329 for (i = 0; i < node->input_size; i++)
330 {
331 assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 331, __extension__ __PRETTY_FUNCTION__
); }))
;
332 if (node->inputs[i] >= 0)
333 {
334 // Hash using the tensor hash.
335 hashin[0] = hashout;
336 hashin[1] = i; // Encode the positional information.
337 hashin[2] = tensor_hash[node->inputs[i]];
338 } else {
339 // Hash using the input integer (could be special integer).
340 hashin[0] = hashout;
341 hashin[1] = i; // Encode the positional information.
342 hashin[2] = node->inputs[i];
343 }
344 siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash);
345 }
346 for (i = 0; i < node->output_size; i++)
347 if (node->outputs[i] >= 0)
348 {
349 assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 349, __extension__ __PRETTY_FUNCTION__
); }))
;
350 // Assigning once, especially now we don't consider aliases.
351 assert(tensor_hash[node->outputs[i]] == 0)((void) sizeof ((tensor_hash[node->outputs[i]] == 0) ? 1 :
0), __extension__ ({ if (tensor_hash[node->outputs[i]] ==
0) ; else __assert_fail ("tensor_hash[node->outputs[i]] == 0"
, "ccv_nnc_symbolic_graph_simplify.c", 351, __extension__ __PRETTY_FUNCTION__
); }))
;
352 hashin[0] = hashout;
353 hashin[1] = i; // Positional information.
354 siphash((uint8_t*)&hashin[2], (const uint8_t*)&simplify->tensor_symbol_info[node->outputs[i]].info,
355 sizeof(simplify->tensor_symbol_info[node->outputs[i]].info), key_siphash);
356 // Generate hash for the output.
357 siphash((uint8_t*)&tensor_hash[node->outputs[i]], (const uint8_t*)hashin, sizeof(hashin), key_siphash);
358 }
359 } ccv_nnc_graph_visit_endfor} }
360 // Allocate 3 / 2 as much space, for the simple robin-hood open address hash map.
361 const int map_size = (simplify->tensor_symbol_info_size * 3 + 1) / 2;
362 int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
363 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
364 refs[i] = -1;
365 ccv_nnc_cse_hash_t* const hash_map = (ccv_nnc_cse_hash_t*)cccalloccalloc(map_size, sizeof(ccv_nnc_cse_hash_t));
366 // Now, all tensors are hashed, identify tensors with the same hash code, replace the ones that accessed later.
367 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
368 // If already marked as dead, skip.
369 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
370 continue;
371 // No need to visit a opt disabled node.
372 if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT)
373 continue;
374 for (i = 0; i < node->input_size; i++)
375 if (node->inputs[i] >= 0)
376 {
377 const int d = node->inputs[i];
378 assert(tensor_hash[d])((void) sizeof ((tensor_hash[d]) ? 1 : 0), __extension__ ({ if
(tensor_hash[d]) ; else __assert_fail ("tensor_hash[d]", "ccv_nnc_symbolic_graph_simplify.c"
, 378, __extension__ __PRETTY_FUNCTION__); }))
;
379 const int new_d = _ccv_nnc_cse_hash_find(hash_map, tensor_hash[d], map_size);
380 if (new_d >= 0 && new_d != d)
381 {
382 // Check whether this can be replaced.
383 assert(refs[d] == -1 || refs[d] == new_d)((void) sizeof ((refs[d] == -1 || refs[d] == new_d) ? 1 : 0),
__extension__ ({ if (refs[d] == -1 || refs[d] == new_d) ; else
__assert_fail ("refs[d] == -1 || refs[d] == new_d", "ccv_nnc_symbolic_graph_simplify.c"
, 383, __extension__ __PRETTY_FUNCTION__); }))
;
384 assert(!simplify->tensor_symbol_info[d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 384, __extension__ __PRETTY_FUNCTION__
); }))
;
385 assert(!simplify->tensor_symbol_info[d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].r_assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].r_assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 385, __extension__ __PRETTY_FUNCTION__
); }))
;
386 assert(!simplify->tensor_symbol_info[d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].bypass_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].bypass_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 386, __extension__ __PRETTY_FUNCTION__
); }))
;
387 assert(!simplify->tensor_symbol_info[new_d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 387, __extension__ __PRETTY_FUNCTION__
); }))
;
388 assert(!simplify->tensor_symbol_info[new_d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].r_assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].r_assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 388, __extension__ __PRETTY_FUNCTION__
); }))
;
389 assert(!simplify->tensor_symbol_info[new_d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].bypass_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].bypass_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 389, __extension__ __PRETTY_FUNCTION__
); }))
;
390 // Ignore if there is a pair_ref (again, pair_ref has side effect that is deeper (using tape))
391 if (simplify->tensor_symbol_info[d].pair_ref)
392 continue;
393 // If both have p_ref, we cannot merge.
394 if (simplify->tensor_symbol_info[d].p_ref && simplify->tensor_symbol_info[new_d].p_ref)
395 continue;
396 // Merge s_refs from ref[d] later.
397 if (refs[d] != new_d)
398 refs[d] = new_d;
399 assert(simplify->output_execs[new_d] >= 0)((void) sizeof ((simplify->output_execs[new_d] >= 0) ? 1
: 0), __extension__ ({ if (simplify->output_execs[new_d] >=
0) ; else __assert_fail ("simplify->output_execs[new_d] >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 399, __extension__ __PRETTY_FUNCTION__
); }))
;
400 // Establish new dependency.
401 ccv_nnc_graph_exec_symbol_concat(simplify->graph, (ccv_nnc_graph_exec_symbol_t){
402 .d = simplify->output_execs[new_d],
403 .graph = simplify->graph,
404 }, (ccv_nnc_graph_exec_symbol_t){
405 .d = idx,
406 .graph = simplify->graph,
407 });
408 }
409 }
410 // We can reuse the input, but we cannot do that for output of these commands.
411 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
412 node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
413 node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
414 node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
415 continue;
416 for (i = 0; i < node->output_size; i++)
417 if (node->outputs[i] >= 0) // This tensor can be reused by others.
418 _ccv_nnc_cse_hash_add(hash_map, tensor_hash[node->outputs[i]], node->outputs[i], map_size);
419 } ccv_nnc_graph_visit_endfor} }
420 _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 1 /* For these exec that generates refs, we don't need them any more. */);
421 ccfreefree(tensor_hash);
422 ccfreefree(hash_map);
423 ccfreefree(refs);
424}
425
426static void _ccv_nnc_symbolic_graph_data_transfer_opt(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
427{
428 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
429 uint32_t* const exec_dead = simplify->exec_dead;
430 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = simplify->tensor_symbol_info;
431 int i;
432 uint32_t* const has_alias = cccalloccalloc(2 * ((simplify->tensor_symbol_info_size + 31) >> 5), sizeof(uint32_t));
433 uint32_t* const has_binds = has_alias + ((simplify->tensor_symbol_info_size + 31) >> 5);
434 for (i = 0; i < bind_size; i++)
435 has_binds[binds[i].d >> 5] |= (1u << (binds[i].d & 0x1f));
436 for (i = 0; i < output_size; i++)
437 has_binds[outputs[i].d >> 5] |= (1u << (outputs[i].d & 0x1f));
438 int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
439 int updated_refs;
440 do {
441 memset(has_alias, 0, sizeof(uint32_t) * ((simplify->tensor_symbol_info_size + 31) >> 5));
442 // Go through until no updates is possible. This won't result an infinite loop because every time,
443 // a tensor is eliminated.
444 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
445 {
446 refs[i] = -1;
447 if (tensor_symbol_info[i].alias_ref)
448 {
449 const int alias_ref = tensor_symbol_info[i].alias_ref - 1;
450 has_alias[alias_ref >> 5] |= (1u << (alias_ref & 0x1f));
451 }
452 }
453 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
454 // If already marked as dead, skip.
455 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
456 continue;
457 if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD &&
458 node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD &&
459 // Conversion, if the datatype is the same, it is the data transfer op.
460 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD &&
461 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD &&
462 // Format transform, if the format is the same, it is the data transfer op.
463 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD &&
464 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD)
465 continue;
466 if (node->flags & CCV_NNC_GRAPH_EXEC_DISABLE_OPT) // If optimization pass disabled, skip.
467 continue;
468 for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size.
469 if (node->inputs[i] >= 0 && node->outputs[i] >= 0)
470 {
471 assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 471, __extension__ __PRETTY_FUNCTION__
); }))
;
472 assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 472, __extension__ __PRETTY_FUNCTION__
); }))
;
473 int input_ref = node->inputs[i];
474 while (refs[input_ref] >= 0)
475 input_ref = refs[input_ref];
476 int output_ref = node->outputs[i];
477 while (refs[output_ref] >= 0)
478 output_ref = refs[output_ref];
479 if (input_ref == output_ref)
480 continue;
481 const ccv_nnc_tensor_symbol_info_t* const input = tensor_symbol_info + input_ref;
482 const ccv_nnc_tensor_symbol_info_t* const output = tensor_symbol_info + output_ref;
483 // If they are not the same data type, skip. (Likely data conversion op).
484 if (input->info.datatype != output->info.datatype)
485 continue;
486 // If they are not the same format, skip. (Likely format transform op).
487 if (input->info.format != output->info.format)
488 continue;
489 // If they are not on the same device (even for NUMA), skip.
490 if (input->info.type != output->info.type)
491 continue;
492 // If both are alias, we cannot consolidate this.
493 if (input->alias_ref && output->alias_ref)
494 continue;
495 // If input is alias, and output has alias reference to it, output cannot be the same as input.
496 if (input->alias_ref && (has_alias[output_ref >> 5] & (1u << (output_ref & 0x1f))))
497 continue;
498 // If output is alias, and input has alias reference to it, input cannot be the same as output.
499 if (output->alias_ref && (has_alias[input_ref >> 5] & (1u << (input_ref & 0x1f))))
500 continue;
501 // If either are carry overs (for while), we cannot do anything.
502 if (input->assign_ref || output->assign_ref ||
503 input->r_assign_ref || output->r_assign_ref)
504 continue;
505 // If either are bypasses (for case..of), we cannot do anything.
506 if (input->bypass_ref || output->bypass_ref ||
507 input->r_bypass_ref || output->r_bypass_ref)
508 continue;
509 // If either are inputs / outputs connecting the parent graph, we cannot do anything.
510 if (input->p_ref || output->p_ref)
511 continue;
512 // If the type is the same, check which one is the alias.
513 // We always prefer alias.
514 if (output->alias_ref)
515 {
516 // Input cannot be binds.
517 if (has_binds[input_ref >> 5] & (1u << (input_ref & 0x1f)))
518 continue;
519 refs[input_ref] = output_ref;
520 } else { // if (input->alias_ref), else
521 // Output cannot be binds.
522 if (has_binds[output_ref >> 5] & (1u << (output_ref & 0x1f)))
523 continue;
524 refs[output_ref] = input_ref;
525 }
526 }
527 } ccv_nnc_graph_visit_endfor} }
528 // Make sure refs reference to the end.
529 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
530 if (refs[i] >= 0)
531 {
532 int ref = refs[i];
533 while (refs[ref] >= 0)
534 ref = refs[ref];
535 refs[i] = ref;
536 }
537 updated_refs = _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 0 /* We still need these exec that generates the refs. */);
538 } while (updated_refs);
539 ccfreefree(refs);
540 ccfreefree(has_alias);
541 // Now, all references updated, remove data transfers that sources and destinations are the same.
542 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
543 // If already marked as dead, skip.
544 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
545 continue;
546 if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD &&
547 node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD &&
548 // Conversion, if the datatype is the same, it is the data transfer op.
549 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_FORWARD &&
550 node->cmd.cmd != CCV_NNC_DATATYPE_CONVERSION_BACKWARD &&
551 // Format transform, if the format is the same, it is the data transfer op.
552 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_FORWARD &&
553 node->cmd.cmd != CCV_NNC_FORMAT_TRANSFORM_BACKWARD)
554 continue;
555 for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size.
556 if (node->inputs[i] == node->outputs[i])
557 {
558 if (i + 1 < node->output_size)
559 {
560 node->inputs[i] = node->inputs[node->output_size - 1];
561 node->outputs[i] = node->outputs[node->output_size - 1];
562 }
563 --node->output_size;
564 --i;
565 }
566 node->input_size = node->output_size;
567 ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
)->input_size = node->input_size;
568 ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
)->output_size = node->output_size;
569 // Remove this data transfer node if it has no outputs.
570 if (node->output_size == 0)
571 exec_dead[idx >> 5] |= (1u << (idx & 0x1f));
572 } ccv_nnc_graph_visit_endfor} }
573}
574
575typedef struct {
576 uint32_t fused_op; // The final fused op id.
577 uint32_t ops_seq[2]; // The sequence of commands to identify.
578 int ops_seq_size;
579 int ops_info_select; // Which ops' info will be selected.
580 struct {
581 int type; // Whether it is input, or output. It doesn't make sense for example, input in ops_seq, but output in fused_op.
582 int op_idx; // Index into the ops_seq.
583 int from; // The index in ops_seq.
584 int to; // The index in fused_op.
585 } pos[4]; // maps of positions from ops seq to fused_op for inputs (outputs).
586 int pos_size;
587} ccv_nnc_ops_fusion_t;
588
589enum {
590 CCV_NNC_OPS_FUSION_INPUT_INDEX,
591 CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
592};
593
594const static int ccv_nnc_ops_fusion_io_size = 2;
595const static ccv_nnc_ops_fusion_t ccv_nnc_ops_fusions[] = {
596 {
597 .fused_op = CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD,
598 .ops_seq = {
599 CCV_NNC_SOFTMAX_FORWARD, CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD,
600 },
601 .ops_seq_size = 2,
602 .ops_info_select = 1,
603 .pos = {
604 {
605 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
606 .op_idx = 0,
607 .from = 0,
608 .to = 0,
609 },
610 {
611 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
612 .op_idx = 1,
613 .from = 1,
614 .to = 1,
615 },
616 {
617 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
618 .op_idx = 0,
619 .from = 0,
620 .to = 1,
621 },
622 {
623 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
624 .op_idx = 1,
625 .from = 0,
626 .to = 0,
627 },
628 },
629 .pos_size = 4,
630 },
631 {
632 .fused_op = CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD,
633 .ops_seq = {
634 CCV_NNC_SIGMOID_FORWARD, CCV_NNC_BINARY_CROSSENTROPY_FORWARD,
635 },
636 .ops_seq_size = 2,
637 .ops_info_select = 1,
638 .pos = {
639 {
640 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
641 .op_idx = 0,
642 .from = 0,
643 .to = 0,
644 },
645 {
646 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
647 .op_idx = 1,
648 .from = 1,
649 .to = 1,
650 },
651 {
652 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
653 .op_idx = 0,
654 .from = 0,
655 .to = 1,
656 },
657 {
658 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
659 .op_idx = 1,
660 .from = 0,
661 .to = 0,
662 },
663 },
664 .pos_size = 4,
665 }
666};
667
668static int _ccv_nnc_find_ops_for_fusion(const ccv_nnc_ops_fusion_t* const fusion, const int ops_idx, const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const uint32_t* const exec_dead, const int exec_idx, int* const fusing_exec_symbols)
669{
670 if (exec_dead[exec_idx >> 5] & (1u << (exec_idx & 0x1f)))
671 return 0;
672 const ccv_nnc_graph_exec_symbol_info_t* const node = exec_symbol_info + exec_idx;
673 // Doesn't match the ops_seq, return 0.
674 if (fusion->ops_seq[ops_idx] != node->cmd.cmd)
675 return 0;
676 fusing_exec_symbols[ops_idx] = exec_idx;
677 // If already reached the end, we are good.
678 if (ops_idx == fusion->ops_seq_size - 1)
679 return 1;
680 // Otherwise, we need to go on, but don't have any to follow-up.
681 if (!node->outgoings || !node->outgoings->rnum)
682 return 0;
683 int i;
684 for (i = 0; i < node->outgoings->rnum; i++)
685 if (_ccv_nnc_find_ops_for_fusion(fusion, ops_idx + 1, exec_symbol_info, exec_dead, *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
, fusing_exec_symbols))
686 return 1;
687 return 0;
688}
689
690static void _ccv_nnc_symbolic_graph_ops_fusion(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
691{
692 uint32_t* const exec_dead = simplify->exec_dead;
693 ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info;
694 int i, j;
695 int fusing_exec_symbols[sizeof(ccv_nnc_ops_fusions->ops_seq)];
696 int fused_inputs[ccv_nnc_ops_fusion_io_size]; // 2 is just a number based on the ops_fusion.
697 int fused_outputs[ccv_nnc_ops_fusion_io_size];
698 ccv_nnc_graph_visit_for(simplify->visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(exec_symbol_info)) const node __attribute__((unused)) = (exec_symbol_info
) + idx;
{
699 // If already marked as dead, skip.
700 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
701 continue;
702 // Run through rudimentary pattern matching for ops_seq. There are better ways to do this (immediately come to mind, Boyer-Moore). However, this is simpler to code.
703 // If I am running into performance issues with this, I would be very happy.
704 for (i = 0; i < sizeof(ccv_nnc_ops_fusions) / sizeof(ccv_nnc_ops_fusion_t); i++)
705 {
706 const ccv_nnc_ops_fusion_t* const ops_fusion = ccv_nnc_ops_fusions + i;
707 // Check to see if a list of symbols are possible.
708 if (ops_fusion->ops_seq[0] == node->cmd.cmd &&
709 _ccv_nnc_find_ops_for_fusion(ops_fusion, 0, exec_symbol_info, exec_dead, idx, fusing_exec_symbols))
710 {
711 // Go through all the inputs and outputs, check if they exists and are mapped.
712 // TODO: the mapping can be more sophisticated than what we have here.
713 // Also, I need to check if some inputs / outputs cannot be mapped, then we cannot continue.
714 for (j = 0; j < ccv_nnc_ops_fusion_io_size; j++)
715 fused_inputs[j] = fused_outputs[j] = CCV_NNC_NO_TENSOR_SYMBOL;
716 int input_size = 0, output_size = 0;
717 for (j = 0; j < ops_fusion->pos_size; j++)
718 {
719 ccv_nnc_graph_exec_symbol_info_t* const fusing_op = exec_symbol_info + fusing_exec_symbols[ops_fusion->pos[j].op_idx];
720 switch (ops_fusion->pos[j].type)
721 {
722 case CCV_NNC_OPS_FUSION_INPUT_INDEX:
723 fused_inputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->input_size ? fusing_op->inputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL;
724 input_size = ccv_max(input_size, ops_fusion->pos[j].to + 1)({ typeof (input_size) _a = (input_size); typeof (ops_fusion->
pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a > _b
) ? _a : _b; })
;
725 break;
726 case CCV_NNC_OPS_FUSION_OUTPUT_INDEX:
727 fused_outputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->output_size ? fusing_op->outputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL;
728 output_size = ccv_max(output_size, ops_fusion->pos[j].to + 1)({ typeof (output_size) _a = (output_size); typeof (ops_fusion
->pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a >
_b) ? _a : _b; })
;
729 break;
730 }
731 }
732 const ccv_nnc_cmd_param_t info = exec_symbol_info[fusing_exec_symbols[ops_fusion->ops_info_select]].cmd.info;
733 // Modify the first node so it is the correct type and value. I need to look back to the actual graph to get the info.
734 ccv_nnc_graph_exec_symbol_info_t* const actual_node = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
;
735 actual_node->cmd.cmd = node->cmd.cmd = ops_fusion->fused_op;
736 actual_node->cmd.info = node->cmd.info = info;
737 if (node->input_size + node->output_size < input_size + output_size)
738 actual_node->inputs = node->inputs = node->inputs ? ccreallocrealloc(node->inputs, sizeof(int) * (input_size + output_size)) : ccmallocmalloc(sizeof(int) * (input_size + output_size));
739 actual_node->outputs = node->outputs = node->inputs + input_size;
740 actual_node->input_size = node->input_size = input_size;
741 actual_node->output_size = node->output_size = output_size;
742 memcpy(node->inputs, fused_inputs, sizeof(int) * input_size);
743 memcpy(node->outputs, fused_outputs, sizeof(int) * output_size);
744 // Afterwards, mark the rest as dead.
745 for (j = 1; j < ops_fusion->ops_seq_size; j++)
746 exec_dead[fusing_exec_symbols[j] >> 5] |= (1u << (fusing_exec_symbols[j] & 0x1f));
747 break;
748 }
749 }
750 } ccv_nnc_graph_visit_endfor} }
751}
752
753static void _ccv_nnc_symbolic_graph_pruning_undead_exec(ccv_nnc_symbolic_graph_simplify_t* const simplify, const int exec_idx, uint32_t* const tensor_visited, ccv_array_t* const next)
754{
755 assert(exec_idx >= 0)((void) sizeof ((exec_idx >= 0) ? 1 : 0), __extension__ ({
if (exec_idx >= 0) ; else __assert_fail ("exec_idx >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 755, __extension__ __PRETTY_FUNCTION__
); }))
;
25
Taking true branch
756 uint32_t* const exec_dead = simplify->exec_dead;
757 uint32_t* const tensor_dead = simplify->tensor_dead;
758 exec_dead[exec_idx >> 5] &= ~(1u << (exec_idx & 0x1f)); // Undead the exec.
759 ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info + exec_idx;
760 int i;
761 if (exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
26
Assuming field 'cmd' is not equal to CCV_NNC_GRAPH_FORWARD
30
Taking false branch
762 exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
27
Assuming field 'cmd' is not equal to CCV_NNC_GRAPH_BACKWARD
763 exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
28
Assuming field 'cmd' is not equal to CCV_NNC_CUSTOM_FORWARD
764 exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
29
Assuming field 'cmd' is not equal to CCV_NNC_CUSTOM_BACKWARD
765 {
766 // All of its inputs / outputs need to be undead for these commands.
767 for (i = 0; i < exec_symbol_info->input_size; i++)
768 {
769 const int d = exec_symbol_info->inputs[i];
770 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
771 {
772 ccv_array_push(next, &d); // Push to the next round to be undead.
773 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
774 }
775 }
776 for (i = 0; i < exec_symbol_info->output_size; i++)
777 {
778 const int d = exec_symbol_info->outputs[i];
779 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
780 {
781 ccv_array_push(next, &d); // Push to the next round to be undead.
782 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
783 }
784 }
785 return;
786 }
787 // Go through the input / output, to make sure that all of them can be available.
788 const int input_bitmask_size = (exec_symbol_info->input_size + 63) >> 6;
789 const int output_bitmask_size = (exec_symbol_info->output_size + 63) >> 6;
790 uint64_t input_bitmasks[ccv_max(1, input_bitmask_size)({ typeof (1) _a = (1); typeof (input_bitmask_size) _b = (input_bitmask_size
); (_a > _b) ? _a : _b; })
];
31
Assuming '_a' is > '_b'
32
'?' condition is true
791 for (i = 0; i
32.1
'i' is >= 'input_bitmask_size'
< input_bitmask_size; i++)
33
Loop condition is false. Execution continues on line 793
792 input_bitmasks[i] = 0;
793 uint64_t output_bitmasks[ccv_max(1, output_bitmask_size)({ typeof (1) _a = (1); typeof (output_bitmask_size) _b = (output_bitmask_size
); (_a > _b) ? _a : _b; })
];
34
Assuming '_a' is > '_b'
35
'?' condition is true
794 for (i = 0; i
35.1
'i' is >= 'output_bitmask_size'
< output_bitmask_size; i++)
36
Loop condition is false. Execution continues on line 796
795 output_bitmasks[i] = 0;
796 for (i = 0; i < exec_symbol_info->input_size; i++)
37
Assuming 'i' is >= field 'input_size'
38
Loop condition is false. Execution continues on line 799
797 if (exec_symbol_info->inputs[i] >= 0)
798 input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
799 for (i = 0; i < exec_symbol_info->output_size; i++)
39
Assuming 'i' is < field 'output_size'
40
Loop condition is true. Entering loop body
800 if (exec_symbol_info->outputs[i] >= 0)
41
Assuming the condition is true
42
Taking true branch
801 output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
43
The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage
802 // First, mark everything with bitmasks, and verify it works.
803 assert(ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))((void) sizeof ((ccv_nnc_cmd_bitmask(exec_symbol_info->cmd
, exec_symbol_info->input_size, exec_symbol_info->output_size
, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size
)) ? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_bitmask(exec_symbol_info
->cmd, exec_symbol_info->input_size, exec_symbol_info->
output_size, input_bitmasks, input_bitmask_size, output_bitmasks
, output_bitmask_size)) ; else __assert_fail ("ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)"
, "ccv_nnc_symbolic_graph_simplify.c", 803, __extension__ __PRETTY_FUNCTION__
); }))
;
804 int flag;
805 do {
806 flag = 0;
807 // Try to eliminate one at a time. Go over output first.
808 for (i = 0; i < exec_symbol_info->output_size; i++)
809 {
810 const int d = exec_symbol_info->outputs[i];
811 // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all.
812 if (d >= 0 && (tensor_dead[d >> 5] & (1u << (d & 0x1f))) &&
813 (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))))
814 {
815 output_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
816 if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))
817 flag = 1;
818 else // Reset the bitmask.
819 output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
820 }
821 }
822 // Only check if there are inputs we can remove if it is backward.
823 if (!ccv_nnc_cmd_is_forward(exec_symbol_info->cmd))
824 // For inputs, no matter if it s dead or not, we try to limit our input to the smallest number.
825 for (i = 0; i < exec_symbol_info->input_size; i++)
826 {
827 const int d = exec_symbol_info->inputs[i];
828 // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all.
829 if (d >= 0 && (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))))
830 {
831 input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
832 if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))
833 flag = 1;
834 else // Reset the bitmask.
835 input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
836 }
837 }
838 } while (flag);
839 // Now we know which one to keep, which one to undead.
840 for (i = 0; i < exec_symbol_info->input_size; i++)
841 if (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
842 {
843 const int d = exec_symbol_info->inputs[i];
844 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
845 {
846 ccv_array_push(next, &d); // Push to the next round to be undead.
847 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
848 }
849 } else {
850 // Clean up the inputs.
851 exec_symbol_info->inputs[i] = CCV_NNC_NO_TENSOR_SYMBOL;
852 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(exec_idx))))->inputs == exec_symbol_info->inputs
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((simplify->graph->exec_symbol_info)
->data)) + (size_t)(simplify->graph->exec_symbol_info
)->rsize * (size_t)(exec_idx))))->inputs == exec_symbol_info
->inputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs"
, "ccv_nnc_symbolic_graph_simplify.c", 852, __extension__ __PRETTY_FUNCTION__
); }))
;
853 }
854 for (i = 0; i < exec_symbol_info->output_size; i++)
855 if (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
856 {
857 const int d = exec_symbol_info->outputs[i];
858 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
859 {
860 ccv_array_push(next, &d); // Push to the next round to be undead.
861 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
862 }
863 } else {
864 // Clean up the outputs.
865 exec_symbol_info->outputs[i] = CCV_NNC_NO_TENSOR_SYMBOL;
866 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(exec_idx))))->outputs == exec_symbol_info->
outputs) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((simplify->graph->exec_symbol_info)
->data)) + (size_t)(simplify->graph->exec_symbol_info
)->rsize * (size_t)(exec_idx))))->outputs == exec_symbol_info
->outputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs"
, "ccv_nnc_symbolic_graph_simplify.c", 866, __extension__ __PRETTY_FUNCTION__
); }))
;
867 }
868}
869
870static void _ccv_nnc_symbolic_graph_pruning(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
871{
872 uint32_t* const tensor_visited = (uint32_t*)cccalloccalloc(sizeof(uint32_t), (simplify->tensor_symbol_info_size + 31) >> 5);
873 ccv_array_t* const preserve[2] = {
874 ccv_array_new(sizeof(int), output_size, 0),
875 ccv_array_new(sizeof(int), 0, 0),
876 };
877 int i, j;
878 ccv_array_t** const r_alias_refs = (ccv_array_t**)cccalloccalloc(sizeof(ccv_array_t*), simplify->tensor_symbol_info_size);
879 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
5
Assuming 'i' is >= field 'tensor_symbol_info_size'
6
Loop condition is false. Execution continues on line 888
880 if (simplify->tensor_symbol_info[i].alias_ref)
881 {
882 const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1;
883 assert(alias_ref < simplify->tensor_symbol_info_size)((void) sizeof ((alias_ref < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (alias_ref < simplify->
tensor_symbol_info_size) ; else __assert_fail ("alias_ref < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 883, __extension__ __PRETTY_FUNCTION__
); }))
;
884 if (!r_alias_refs[alias_ref])
885 r_alias_refs[alias_ref] = ccv_array_new(sizeof(int), 1, 0);
886 ccv_array_push(r_alias_refs[alias_ref], &i);
887 }
888 uint32_t* const exec_dead = simplify->exec_dead;
889 uint32_t* const tensor_dead = simplify->tensor_dead;
890 int* const output_execs = simplify->output_execs;
891 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
892 // Mark everything visited as dead.
893 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
7
Loop condition is false. Execution continues on line 909
894 exec_dead[idx >> 5] |= (1u << (idx & 0x1f));
895 for (i = 0; i < node->input_size; i++)
896 {
897 const int d = node->inputs[i];
898 if (d >= 0)
899 tensor_dead[d >> 5] |= (1u << (d & 0x1f));
900 }
901 for (i = 0; i < node->output_size; i++)
902 {
903 const int d = node->outputs[i];
904 if (d >= 0)
905 tensor_dead[d >> 5] |= (1u << (d & 0x1f));
906 }
907 } ccv_nnc_graph_visit_endfor} }
908 // If the tensor symbol is used by other exec that is not visited, unmark it.
909 for (i = 0; i < simplify->exec_symbol_info_size; i++)
8
Assuming 'i' is >= field 'exec_symbol_info_size'
9
Loop condition is false. Execution continues on line 929
910 {
911 if (exec_dead[i >> 5] & (1u << (i & 0x1f)))
912 continue;
913 const ccv_nnc_graph_exec_symbol_info_t* const node = simplify->exec_symbol_info + i;
914 for (j = 0; j < node->input_size; j++)
915 {
916 const int d = node->inputs[j];
917 // Undead it.
918 if (d >= 0)
919 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
920 }
921 for (j = 0; j < node->output_size; j++)
922 {
923 const int d = node->outputs[j];
924 // Undead it.
925 if (d >= 0)
926 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
927 }
928 }
929 for (i = 0; i < output_size; i++)
10
Assuming 'i' is >= 'output_size'
11
Loop condition is false. Execution continues on line 931
930 ccv_array_push(preserve[0], &outputs[i].d);
931 int p = 0, q = 1;
932 // BFS to mark execs / tensors as not dead.
933 while (preserve[p]->rnum > 0)
12
Assuming field 'rnum' is > 0
13
Loop condition is true. Entering loop body
934 {
935 ccv_array_clear(preserve[q]);
936 // First, undead all relevant tensors.
937 for (i = 0; i
13.1
'i' is < field 'rnum'
< preserve[p]->rnum
; i++)
14
Loop condition is true. Entering loop body
19
Assuming 'i' is >= field 'rnum'
20
Loop condition is false. Execution continues on line 957
938 {
939 const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve
[p])->rsize * (size_t)(i)))
;
940 // Undead the outputs.
941 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
942 int alias_ref = d;
943 if (simplify->tensor_symbol_info[d].alias_ref)
15
Assuming field 'alias_ref' is 0
16
Taking false branch
944 {
945 alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1;
946 tensor_dead[alias_ref >> 5] &= ~(1u << (alias_ref & 0x1f));
947 assert(r_alias_refs[alias_ref])((void) sizeof ((r_alias_refs[alias_ref]) ? 1 : 0), __extension__
({ if (r_alias_refs[alias_ref]) ; else __assert_fail ("r_alias_refs[alias_ref]"
, "ccv_nnc_symbolic_graph_simplify.c", 947, __extension__ __PRETTY_FUNCTION__
); }))
;
948 }
949 if (r_alias_refs[alias_ref])
17
Assuming the condition is false
18
Taking false branch
950 for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++)
951 {
952 const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t
)(r_alias_refs[alias_ref])->rsize * (size_t)(j)))
;
953 if (output_execs[b] >= 0) // Only revive if it is written alias.
954 tensor_dead[b >> 5] &= ~(1u << (b & 0x1f));
955 }
956 }
957 for (i = 0; i < preserve[p]->rnum; i++)
21
Loop condition is true. Entering loop body
958 {
959 const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve
[p])->rsize * (size_t)(i)))
;
960 const int output_exec = output_execs[d];
961 // Undead the exec.
962 if (output_exec >= 0)
22
Assuming 'output_exec' is >= 0
23
Taking true branch
963 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
24
Calling '_ccv_nnc_symbolic_graph_pruning_undead_exec'
964 int alias_ref = d;
965 if (simplify->tensor_symbol_info[d].alias_ref)
966 {
967 alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1;
968 const int output_exec = output_execs[alias_ref];
969 if (output_exec >= 0)
970 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
971 }
972 if (r_alias_refs[alias_ref])
973 for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++)
974 {
975 const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t
)(r_alias_refs[alias_ref])->rsize * (size_t)(j)))
;
976 const int output_exec = output_execs[b];
977 if (output_exec >= 0)
978 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
979 }
980 }
981 CCV_SWAP(p, q, i)((i) = (p), (p) = (q), (q) = (i));
982 }
983 ccfreefree(tensor_visited);
984 ccv_array_free(preserve[0]);
985 ccv_array_free(preserve[1]);
986 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
987 if (r_alias_refs[i])
988 ccv_array_free(r_alias_refs[i]);
989 ccfreefree(r_alias_refs);
990}
991
992void ccv_nnc_symbolic_graph_simplify(ccv_nnc_symbolic_graph_t* const graph, const int* const passes, const int pass_size, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
993{
994 ccv_nnc_symbolic_graph_simplify_t* const simplify = _ccv_nnc_symbolic_graph_simplify_new(graph, sources, source_size, destinations, destination_size);
995 int i;
996 for (i = 0; i < pass_size; i++)
1
Assuming 'i' is < 'pass_size'
2
Loop condition is true. Entering loop body
997 switch (passes[i])
3
Control jumps to 'case CCV_NNC_SIMPLIFY_GRAPH_PRUNING:' at line 1005
998 {
999 case CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION:
1000 _ccv_nnc_symbolic_graph_common_subexpression_elimination(simplify, outputs, output_size);
1001 break;
1002 case CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT:
1003 _ccv_nnc_symbolic_graph_data_transfer_opt(simplify, binds, bind_size, outputs, output_size);
1004 break;
1005 case CCV_NNC_SIMPLIFY_GRAPH_PRUNING:
1006 _ccv_nnc_symbolic_graph_pruning(simplify, outputs, output_size);
4
Calling '_ccv_nnc_symbolic_graph_pruning'
1007 break;
1008 case CCV_NNC_SIMPLIFY_OPS_FUSION:
1009 _ccv_nnc_symbolic_graph_ops_fusion(simplify, outputs, output_size);
1010 break;
1011 }
1012 _ccv_nnc_symbolic_graph_simplify_apply(simplify);
1013 _ccv_nnc_symbolic_graph_simplify_free(simplify);
1014}