Bug Summary

File:nnc/ccv_nnc_symbolic_graph_simplify.c
Warning:line 720, column 28
The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_symbolic_graph_simplify.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model static -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -target-feature +sse2 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -resource-dir /usr/local/lib/clang/8.0.0 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_UCONTEXT -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -I /usr/local/include -internal-isystem /usr/local/include -internal-isystem /usr/local/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir /home/liu/buildslave/linux-x64-runtests/build/lib/nnc -ferror-limit 19 -fmessage-length 0 -fblocks -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -o /home/liu/buildslave/public_html/analyze/2019-07-03-215927-77989-1 -x c ccv_nnc_symbolic_graph_simplify.c -faddrsig
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_nnc_symbolic_graph.h"
6#include "3rdparty/siphash/siphash24.h"
7
8#pragma mark - Level-3.5 API
9
10static uint8_t key_siphash[16] = "graphcsekvlibnnc";
11
12typedef struct {
13 int tensor_symbol_info_size;
14 int exec_symbol_info_size;
15 ccv_nnc_symbolic_graph_t* graph;
16 ccv_nnc_graph_visit_t* visit;
17 ccv_nnc_tensor_symbol_info_t* tensor_symbol_info;
18 ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info;
19 uint32_t* exec_dead; // Mark a exec is dead and need to be cleared, each bit represent a exec.
20 uint32_t* tensor_dead; // Mark a tensor is dead and need to be cleared, each bit represent a tensor.
21 int* output_execs; // Mapping from tensor to the exec that generates this tensor.
22} ccv_nnc_symbolic_graph_simplify_t;
23
24static void _ccv_nnc_symbolic_graph_simplify_update_output_execs(ccv_nnc_symbolic_graph_simplify_t* const simplify)
25{
26 int i;
27 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
28 simplify->output_execs[i] = -1;
29 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
30 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
31 continue;
32 for (i = 0; i < node->output_size; i++)
33 if (node->outputs[i] >= 0)
34 simplify->output_execs[node->outputs[i]] = idx; // A tensor can only be written once.
35 } ccv_nnc_graph_visit_endfor} }
36}
37
38static ccv_nnc_symbolic_graph_simplify_t* _ccv_nnc_symbolic_graph_simplify_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
39{
40 ccv_nnc_symbolic_graph_simplify_t* const simplify = (ccv_nnc_symbolic_graph_simplify_t*)ccmallocmalloc(sizeof(ccv_nnc_symbolic_graph_simplify_t));
41 simplify->graph = graph;
42 simplify->visit = ccv_nnc_graph_visit_new(graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0), graph->exec_symbol_info->rnum, sources, source_size, destinations, destination_size, 0)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_symbol_info->rnum) - 1)); _visit_->size
= 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_symbol_info->
rnum); _i_++) _incoming_edges_ += (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_i_].outgoings) ? ((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_i_]
.outgoings->rnum : 0; const int _heap_mem_ = (graph->exec_symbol_info
->rnum + _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum) +
sizeof(int32_t) * ((graph->exec_symbol_info->rnum) * 2
+ _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (graph->exec_symbol_info
->rnum) + sizeof(int32_t) * ((graph->exec_symbol_info->
rnum) * 2 + _incoming_edges_)); memset(_incomings_, 0, sizeof
(ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum))
; int32_t* _exists_[2] = { (int32_t*)(_incomings_ + (graph->
exec_symbol_info->rnum)), (int32_t*)(_incomings_ + (graph->
exec_symbol_info->rnum)) + (graph->exec_symbol_info->
rnum), }; int32_t* const _edges_ = _exists_[1] + (graph->exec_symbol_info
->rnum); for (_i_ = 0; _i_ < (source_size); _i_++) { ((
void) sizeof (((sources)[_i_].graph == graph) ? 1 : 0), __extension__
({ if ((sources)[_i_].graph == graph) ; else __assert_fail (
"(sources)[_i_].graph == graph", "ccv_nnc_symbolic_graph_simplify.c"
, 42, __extension__ __PRETTY_FUNCTION__); })); _exists_[0][_i_
] = (sources)[_i_].d; } int _exist_size_[2] = { (source_size)
, 0, }; int _p_ = 0, _q_ = 1; while (_exist_size_[_p_] > 0
) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_][_i_]; if (
_incomings_[_idx_].r == 1) continue; _incomings_[_idx_].r = 1
; if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)((
graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info
)->rsize * (size_t)(0))))[_idx_].outgoings) for (_j_ = 0; _j_
< ((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)((
graph->exec_symbol_info)->data)) + (size_t)(graph->exec_symbol_info
)->rsize * (size_t)(0))))[_idx_].outgoings->rnum; _j_++
) { const int d = *(int*)((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); ++_incomings_
[d].c; _exists_[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_
]; } } ((_i_) = (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (
_i_ = 0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _exist_size_[0
] = (source_size); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int
_bump_ = 1; while (_exist_size_[_p_] > 0) { _exist_size_[
_q_] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const
int32_t _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r
== 2) continue; _incomings_[_idx_].r = 2; if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings) for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); if (_incomings_
[d].edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; _exists_[
_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) =
(_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (destinations)[_i_].d; } _exist_size_
[0] = (destination_size); _exist_size_[1] = 0; _p_ = 0, _q_ =
1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0;
for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 2) continue
; _incomings_[_idx_].r = 3; if (_incomings_[_idx_].edges >
0) for (_j_ = 0; _j_ < _incomings_[_idx_].c; _j_++) { const
int d = _edges_[_incomings_[_idx_].edges - 1 + _j_]; _exists_
[_q_][_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_)
= (_p_), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ <
(destination_size); _i_++) { ((void) sizeof (((destinations)
[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((destinations
)[_i_].graph == graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _incomings_[(destinations)[_i_].d].d = 1; } for (_i_ =
0; _i_ < (source_size); _i_++) { ((void) sizeof (((sources
)[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((sources
)[_i_].graph == graph) ; else __assert_fail ("(sources)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _exists_[0][_i_] = (sources)[_i_].d; } _p_ = 0; _q_ =
1; _exist_size_[0] = (source_size); _exist_size_[1] = 0; int
_d_ = 0; while (_exist_size_[_p_] > 0) { _exist_size_[_q_
] = 0; for (_i_ = 0; _i_ < _exist_size_[_p_];) { const int32_t
_idx_ = _exists_[_p_][_i_]; _visit_->node[_visit_->size
].index = ((_idx_)); _visit_->node[_visit_->size].term =
((_incomings_[_idx_].d)); ++_visit_->size;; if (_incomings_
[_idx_].d) { ++_d_; _incomings_[_idx_].r = 4; } if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings) { if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum == 1) { const int d = *(int*)((
void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((graph->exec_symbol_info)->data)) + (size_t)(
graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_]
.outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
3 && _d_ < (destination_size)) { _exists_[_p_][_i_
] = d; continue; } } else for (_j_ = 0; _j_ < ((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)
((void*)(((char*)((((ccv_nnc_graph_exec_symbol_info_t*)((void
*)(((char*)((graph->exec_symbol_info)->data)) + (size_t
)(graph->exec_symbol_info)->rsize * (size_t)(0))))[_idx_
].outgoings)->data)) + (size_t)(((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((graph->exec_symbol_info)->data)) +
(size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)
)))[_idx_].outgoings)->rsize * (size_t)(_j_))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
3 && _d_ < (destination_size)) { _exists_[_q_][_exist_size_
[_q_]] = d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (destination_size
); _i_++) { ((void) sizeof (((destinations)[_i_].graph == graph
) ? 1 : 0), __extension__ ({ if ((destinations)[_i_].graph ==
graph) ; else __assert_fail ("(destinations)[_i_].graph == graph"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); if (_incomings_[(destinations)[_i_].d].r == 4) continue
; if (!(0)) { ((void) sizeof ((_incomings_[(destinations)[_i_
].d].c == 0) ? 1 : 0), __extension__ ({ if (_incomings_[(destinations
)[_i_].d].c == 0) ; else __assert_fail ("_incomings_[(destinations)[_i_].d].c == 0"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); } else if (_incomings_[(destinations)[_i_].d].c > 0
) continue; _visit_->node[_visit_->size].index = (((destinations
)[_i_].d)); _visit_->node[_visit_->size].term = ((_incomings_
[(destinations)[_i_].d].d)); ++_visit_->size;; } if (_heap_mem_
) free(_incomings_); } while (0);; ((void) sizeof ((_visit_->
size <= (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__
({ if (_visit_->size <= (graph->exec_symbol_info->
rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_symbol_info->rnum)"
, "ccv_nnc_symbolic_graph_simplify.c", 42, __extension__ __PRETTY_FUNCTION__
); })); _visit_; })
;
43 simplify->tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_info_t) * graph->tensor_symbol_info->rnum);
44 simplify->exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccmallocmalloc(sizeof(ccv_nnc_graph_exec_symbol_info_t) * graph->exec_symbol_info->rnum);
45 ccv_nnc_symbolic_graph_symbol_infer(graph, simplify->visit, sources, source_size, destinations, destination_size, 0, 0, simplify->tensor_symbol_info, simplify->exec_symbol_info);
46 simplify->tensor_symbol_info_size = graph->tensor_symbol_info->rnum;
47 simplify->exec_symbol_info_size = graph->exec_symbol_info->rnum;
48 simplify->exec_dead = cccalloccalloc(((simplify->exec_symbol_info_size + 31) >> 5) + ((simplify->tensor_symbol_info_size + 31) >> 5), sizeof(uint32_t));
49 simplify->tensor_dead = simplify->exec_dead + ((simplify->exec_symbol_info_size + 31) >> 5);
50 simplify->output_execs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
51 return simplify;
52}
53
54static void _ccv_nnc_symbolic_graph_simplify_apply(ccv_nnc_symbolic_graph_simplify_t* const simplify)
55{
56 int i, j;
57 for (i = 0; i < simplify->exec_symbol_info_size; i++)
58 if (simplify->exec_dead[i >> 5] & (1u << (i & 0x1f)))
59 ccv_nnc_graph_exec_symbol_free(simplify->graph, (ccv_nnc_graph_exec_symbol_t){
60 .d = i,
61 .graph = simplify->graph,
62 });
63 else // If it is not marked as dead, go through to unmark tensor
64 for (j = 0; j < simplify->exec_symbol_info[i].output_size; j++)
65 {
66 const int d = simplify->exec_symbol_info[i].outputs[j];
67 if (d >= 0)
68 simplify->tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
69 }
70 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
71 if (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f)))
72 ccv_nnc_tensor_symbol_free(simplify->graph, (ccv_nnc_tensor_symbol_t){
73 .d = i,
74 .graph = simplify->graph,
75 });
76 // autogen the sources / destinations.
77 ccv_nnc_graph_exec_symbol_autogen(simplify->graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
78}
79
80static void _ccv_nnc_symbolic_graph_simplify_free(ccv_nnc_symbolic_graph_simplify_t* const simplify)
81{
82 ccv_nnc_graph_visit_free(simplify->visit);
83 ccfreefree(simplify->tensor_symbol_info);
84 ccfreefree(simplify->exec_symbol_info);
85 ccfreefree(simplify->exec_dead);
86 ccfreefree(simplify->output_execs);
87 ccfreefree(simplify);
88}
89
90typedef struct {
91 int d;
92 int ifbit;
93 uint64_t hash;
94} ccv_nnc_cse_hash_t;
95
96static int _ccv_nnc_cse_hash_find(ccv_nnc_cse_hash_t* const hash_map, const uint64_t hash, const int map_size)
97{
98 assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if (
hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c"
, 98, __extension__ __PRETTY_FUNCTION__); }))
;
99 int i, j;
100 i = (hash - 1) % map_size;
101 for (j = 0; ; j++, i++)
102 {
103 if (i >= map_size)
104 i = 0;
105 if (j > hash_map[i].ifbit)
106 return -1;
107 if (hash_map[i].hash == hash)
108 return hash_map[i].d;
109 }
110}
111
112static void _ccv_nnc_cse_hash_add(ccv_nnc_cse_hash_t* const hash_map, uint64_t hash, int d, const int map_size)
113{
114 assert(hash > 0)((void) sizeof ((hash > 0) ? 1 : 0), __extension__ ({ if (
hash > 0) ; else __assert_fail ("hash > 0", "ccv_nnc_symbolic_graph_simplify.c"
, 114, __extension__ __PRETTY_FUNCTION__); }))
;
115 int i, j;
116 i = (hash - 1) % map_size;
117 for (j = 0; ; j++, i++)
118 {
119 if (i >= map_size)
120 i = 0;
121 if (hash_map[i].hash == hash) // Already exists, do nothing.
122 return;
123 if (hash_map[i].hash == 0)
124 {
125 // Insert.
126 hash_map[i].d = d;
127 hash_map[i].ifbit = j;
128 hash_map[i].hash = hash;
129 return;
130 }
131 if (j > hash_map[i].ifbit)
132 {
133 const ccv_nnc_cse_hash_t old_hash = hash_map[i];
134 // Swap, and continue, until find an empty slot.
135 hash_map[i].d = d;
136 hash_map[i].ifbit = j;
137 hash_map[i].hash = hash;
138 d = old_hash.d;
139 j = old_hash.ifbit;
140 hash = old_hash.hash;
141 }
142 }
143}
144
145static int _ccv_nnc_symbolic_graph_update_refs(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const int* const refs, const int output_exec_ref_dead)
146{
147 int i, j;
148 // Go over refs, if a tensor is an alias, mark it reference to the new one.
149 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
150 if (refs[i] >= 0)
151 // Mark this tensor as dead.
152 simplify->tensor_dead[i >> 5] |= (1u << (i & 0x1f));
153 else if (simplify->tensor_symbol_info[i].alias_ref && refs[simplify->tensor_symbol_info[i].alias_ref - 1] >= 0) {
154 const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1;
155 simplify->tensor_symbol_info[i].alias_ref = refs[alias_ref] + 1;
156 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->alias_ref = refs[alias_ref] + 1;
157 }
158 for (i = 0; i < output_size; i++)
159 // If the output is an alias, that's fine, because if the alias is re-binded, we are good.
160 simplify->tensor_dead[outputs[i].d >> 5] &= ~(1u << (outputs[i].d & 0x1f)); // Undead for output tensor symbols.
161 // Merge s_refs if the tensor is dead.
162 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
163 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))))
164 {
165 const int ref = refs[i];
166 if (simplify->tensor_symbol_info[i].s_ref && simplify->tensor_symbol_info[i].s_ref->rnum)
167 {
168 if (!simplify->tensor_symbol_info[ref].s_ref) // If there is no s_ref, simple, just assign the pointer and set the old one to nil.
169 {
170 simplify->tensor_symbol_info[ref].s_ref = simplify->tensor_symbol_info[i].s_ref;
171 simplify->tensor_symbol_info[i].s_ref = 0;
172 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->s_ref = 0;
173 } else {
174 ccv_array_t* const ref_s_ref = simplify->tensor_symbol_info[ref].s_ref;
175 ccv_array_t* const i_s_ref = simplify->tensor_symbol_info[i].s_ref;
176 const int ref_s_ref_rnum = ref_s_ref->rnum;
177 int flag = 0;
178 // Detect conflict, if there is, undead.
179 for (j = 0; !flag && j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref
->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; }
)
; j++)
180 {
181 const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
;
182 const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(j)))
;
183 // If for the same sub-graph, they have different tensors linked, we cannot merge these two.
184 flag = (ref_s_ref_k > 0 && i_s_ref_k > 0 && ref_s_ref_k != i_s_ref_k);
185 }
186 if (flag)
187 {
188 simplify->tensor_dead[i >> 5] &= ~(1u << (i & 0x1f)); // Undead
189 continue;
190 }
191 if (ref_s_ref_rnum < i_s_ref->rnum)
192 {
193 ccv_array_resize(ref_s_ref, i_s_ref->rnum);
194 memcpy(ccv_array_get(ref_s_ref, ref_s_ref_rnum)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(ref_s_ref_rnum)))
, ccv_array_get(i_s_ref, ref_s_ref_rnum)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(ref_s_ref_rnum)))
, sizeof(int) * (i_s_ref->rnum - ref_s_ref_rnum));
195 }
196 for (j = 0; j < ccv_min(ref_s_ref_rnum, i_s_ref->rnum)({ typeof (ref_s_ref_rnum) _a = (ref_s_ref_rnum); typeof (i_s_ref
->rnum) _b = (i_s_ref->rnum); (_a < _b) ? _a : _b; }
)
; j++)
197 {
198 const int ref_s_ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
;
199 const int i_s_ref_k = *(int*)ccv_array_get(i_s_ref, j)((void*)(((char*)((i_s_ref)->data)) + (size_t)(i_s_ref)->
rsize * (size_t)(j)))
;
200 assert(ref_s_ref_k == 0 || i_s_ref_k == 0)((void) sizeof ((ref_s_ref_k == 0 || i_s_ref_k == 0) ? 1 : 0)
, __extension__ ({ if (ref_s_ref_k == 0 || i_s_ref_k == 0) ; else
__assert_fail ("ref_s_ref_k == 0 || i_s_ref_k == 0", "ccv_nnc_symbolic_graph_simplify.c"
, 200, __extension__ __PRETTY_FUNCTION__); }))
;
201 if (i_s_ref_k)
202 *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
= i_s_ref_k;
203 }
204 ccv_array_free(simplify->tensor_symbol_info[i].s_ref);
205 simplify->tensor_symbol_info[i].s_ref = 0;
206 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(i)))
)->s_ref = 0;
207 for (j = 0; j < ref_s_ref->rnum; j++)
208 {
209 const int ref_k = *(int*)ccv_array_get(ref_s_ref, j)((void*)(((char*)((ref_s_ref)->data)) + (size_t)(ref_s_ref
)->rsize * (size_t)(j)))
- 1;
210 if (ref_k >= 0)
211 {
212 ccv_nnc_symbolic_graph_t* const sub_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(simplify->graph->sub_graphs, j)((void*)(((char*)((simplify->graph->sub_graphs)->data
)) + (size_t)(simplify->graph->sub_graphs)->rsize * (
size_t)(j)))
;
213 assert(sub_graph)((void) sizeof ((sub_graph) ? 1 : 0), __extension__ ({ if (sub_graph
) ; else __assert_fail ("sub_graph", "ccv_nnc_symbolic_graph_simplify.c"
, 213, __extension__ __PRETTY_FUNCTION__); }))
;
214 // Update its p_ref.
215 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, ref_k)((void*)(((char*)((sub_graph->tensor_symbol_info)->data
)) + (size_t)(sub_graph->tensor_symbol_info)->rsize * (
size_t)(ref_k)))
)->p_ref = ref + 1;
216 }
217 }
218 }
219 assert(simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[i].s_ref == (
(ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify->
graph->tensor_symbol_info)->data)) + (size_t)(simplify->
graph->tensor_symbol_info)->rsize * (size_t)(i))))->
s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info
[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*
)((simplify->graph->tensor_symbol_info)->data)) + (size_t
)(simplify->graph->tensor_symbol_info)->rsize * (size_t
)(i))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[i].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, i))->s_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 219, __extension__ __PRETTY_FUNCTION__
); }))
;
220 assert(simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref)((void) sizeof ((simplify->tensor_symbol_info[ref].s_ref ==
((ccv_nnc_tensor_symbol_info_t*)((void*)(((char*)((simplify->
graph->tensor_symbol_info)->data)) + (size_t)(simplify->
graph->tensor_symbol_info)->rsize * (size_t)(ref))))->
s_ref) ? 1 : 0), __extension__ ({ if (simplify->tensor_symbol_info
[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)((void*)(((char
*)((simplify->graph->tensor_symbol_info)->data)) + (
size_t)(simplify->graph->tensor_symbol_info)->rsize *
(size_t)(ref))))->s_ref) ; else __assert_fail ("simplify->tensor_symbol_info[ref].s_ref == ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref))->s_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 220, __extension__ __PRETTY_FUNCTION__
); }))
;
221 }
222 }
223 // Going through refs that we are updating, going through its p_ref to make sure both are updated.
224 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
225 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))) && simplify->tensor_symbol_info[i].p_ref)
226 {
227 const int ref = refs[i];
228 const int p_ref = simplify->tensor_symbol_info[i].p_ref - 1;
229 assert(p_ref >= 0)((void) sizeof ((p_ref >= 0) ? 1 : 0), __extension__ ({ if
(p_ref >= 0) ; else __assert_fail ("p_ref >= 0", "ccv_nnc_symbolic_graph_simplify.c"
, 229, __extension__ __PRETTY_FUNCTION__); }))
;
230 assert(simplify->graph->p)((void) sizeof ((simplify->graph->p) ? 1 : 0), __extension__
({ if (simplify->graph->p) ; else __assert_fail ("simplify->graph->p"
, "ccv_nnc_symbolic_graph_simplify.c", 230, __extension__ __PRETTY_FUNCTION__
); }))
;
231 ccv_array_t* const s_ref = ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->p->tensor_symbol_info, p_ref)((void*)(((char*)((simplify->graph->p->tensor_symbol_info
)->data)) + (size_t)(simplify->graph->p->tensor_symbol_info
)->rsize * (size_t)(p_ref)))
)->s_ref;
232 const int s_idx = simplify->graph->p_idx - 1;
233 assert(s_idx >= 0)((void) sizeof ((s_idx >= 0) ? 1 : 0), __extension__ ({ if
(s_idx >= 0) ; else __assert_fail ("s_idx >= 0", "ccv_nnc_symbolic_graph_simplify.c"
, 233, __extension__ __PRETTY_FUNCTION__); }))
;
234 assert(s_ref && s_ref->rnum > s_idx)((void) sizeof ((s_ref && s_ref->rnum > s_idx) ?
1 : 0), __extension__ ({ if (s_ref && s_ref->rnum
> s_idx) ; else __assert_fail ("s_ref && s_ref->rnum > s_idx"
, "ccv_nnc_symbolic_graph_simplify.c", 234, __extension__ __PRETTY_FUNCTION__
); }))
;
235 *(int*)ccv_array_get(s_ref, s_idx)((void*)(((char*)((s_ref)->data)) + (size_t)(s_ref)->rsize
* (size_t)(s_idx)))
= ref + 1; // Update so it references to the new s_ref.
236 assert(!simplify->tensor_symbol_info[ref].p_ref)((void) sizeof ((!simplify->tensor_symbol_info[ref].p_ref)
? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[ref].p_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[ref].p_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 236, __extension__ __PRETTY_FUNCTION__
); }))
;
237 simplify->tensor_symbol_info[ref].p_ref = p_ref + 1;
238 ((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(simplify->graph->tensor_symbol_info, ref)((void*)(((char*)((simplify->graph->tensor_symbol_info)
->data)) + (size_t)(simplify->graph->tensor_symbol_info
)->rsize * (size_t)(ref)))
)->p_ref = p_ref + 1;
239 }
240 // Now go over exec to mark them as dead because we don't need these to generate refs.
241 if (output_exec_ref_dead)
242 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
243 if (refs[i] >= 0 && (simplify->tensor_dead[i >> 5] & (1u << (i & 0x1f))))
244 {
245 const int output_exec = simplify->output_execs[i];
246 assert(output_exec >= 0)((void) sizeof ((output_exec >= 0) ? 1 : 0), __extension__
({ if (output_exec >= 0) ; else __assert_fail ("output_exec >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 246, __extension__ __PRETTY_FUNCTION__
); }))
;
247 const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = simplify->exec_symbol_info + output_exec;
248 int flag = 0;
249 for (j = 0; !flag && j < symbol_info->output_size; j++)
250 {
251 const int d = symbol_info->outputs[j];
252 if (d >= 0)
253 flag = (!(simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f)))); // If some of the output is not dead, we cannot proceed.
254 }
255 if (!flag) // If all outputs are dead, mark the exec as dead.
256 simplify->exec_dead[output_exec >> 5] |= (1u << (output_exec & 0x1f));
257 }
258 int updated_refs = 0;
259 // Go over replace inputs / outputs.
260 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
261 for (i = 0; i < node->input_size; i++)
262 {
263 const int d = node->inputs[i];
264 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
265 {
266 node->inputs[i] = refs[d]; // It can be replaced.
267 updated_refs = 1;
268 }
269 }
270 for (i = 0; i < node->output_size; i++)
271 {
272 const int d = node->outputs[i];
273 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
274 {
275 node->outputs[i] = refs[d]; // It can be replaced.
276 updated_refs = 1;
277 }
278 }
279 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(idx))))->inputs == node->inputs) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_symbol_info_t*)((void*)(((char*)
((simplify->graph->exec_symbol_info)->data)) + (size_t
)(simplify->graph->exec_symbol_info)->rsize * (size_t
)(idx))))->inputs == node->inputs) ; else __assert_fail
("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->inputs == node->inputs"
, "ccv_nnc_symbolic_graph_simplify.c", 279, __extension__ __PRETTY_FUNCTION__
); }))
;
280 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(idx))))->outputs == node->outputs) ? 1 : 0)
, __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t*)((
void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx))))->outputs == node->outputs) ; else
__assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx))->outputs == node->outputs"
, "ccv_nnc_symbolic_graph_simplify.c", 280, __extension__ __PRETTY_FUNCTION__
); }))
;
281 } ccv_nnc_graph_visit_endfor} }
282 const ccv_nnc_graph_exec_symbol_info_t* const p_node_info = simplify->graph->p ? (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->p->exec_symbol_info, simplify->graph->exec_idx - 1)((void*)(((char*)((simplify->graph->p->exec_symbol_info
)->data)) + (size_t)(simplify->graph->p->exec_symbol_info
)->rsize * (size_t)(simplify->graph->exec_idx - 1)))
: 0;
283 if (p_node_info && (p_node_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE))
284 // Go over the while inputs as well.
285 for (i = 0; i < p_node_info->p_while.input_size; i++)
286 {
287 const int d = p_node_info->p_while.inputs[i];
288 if (d >= 0 && refs[d] >= 0 && (simplify->tensor_dead[d >> 5] & (1u << (d & 0x1f))))
289 {
290 p_node_info->p_while.inputs[i] = refs[d];
291 updated_refs = 1;
292 }
293 }
294 return updated_refs;
295}
296
297// This is a simple common sub-expression elimination implementation, particularly, we only replace the later computed output
298// with the identical earlier computed output, and let the "elimination" part to the graph pruning.
299static void _ccv_nnc_symbolic_graph_common_subexpression_elimination(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
300{
301 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
302 // tensor_hash starts with 0s, and it is either marked with the tensor index + 1, or the hash of the computations.
303 uint64_t* const tensor_hash = (uint64_t*)cccalloccalloc(simplify->tensor_symbol_info_size, sizeof(uint64_t));
304 int i;
305 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
306 // We cannot support graph / custom command (we cannot model them properly).
307 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
308 node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
309 node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
310 node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
311 continue;
312 // If already marked as dead, skip.
313 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
314 continue;
315 uint64_t hashout, hashin[3] = {};
316 siphash((uint8_t*)&hashin[0], (const uint8_t*)&node->cmd.info, sizeof(node->cmd.info), key_siphash);
317 siphash((uint8_t*)&hashin[1], (const uint8_t*)&node->hint, sizeof(node->hint), key_siphash);
318 hashin[2] = node->cmd.cmd; // Now actually hash the cmd name.
319 siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash);
320 // First, hash the cmd and the hints with the cmd.
321 // Note on alias, we cannot really generate proper hash for alias (yet). Thus, just treat alias as normal tensors.
322 for (i = 0; i < node->input_size; i++)
323 {
324 // If no hash for the input, use the index + 1 as the hash.
325 if (node->inputs[i] >= 0 && tensor_hash[node->inputs[i]] == 0)
326 tensor_hash[node->inputs[i]] = node->inputs[i] + 1;
327 if (node->inputs[i] >= 0)
328 {
329 // Hash using the tensor hash.
330 hashin[0] = hashout;
331 hashin[1] = i; // Encode the positional information.
332 hashin[2] = tensor_hash[node->inputs[i]];
333 } else {
334 // Hash using the input integer (could be special integer).
335 hashin[0] = hashout;
336 hashin[1] = i; // Encode the positional information.
337 hashin[2] = node->inputs[i];
338 }
339 siphash((uint8_t*)&hashout, (const uint8_t*)hashin, sizeof(hashin), key_siphash);
340 }
341 for (i = 0; i < node->output_size; i++)
342 if (node->outputs[i] >= 0)
343 {
344 // Assigning once, especially now we don't consider aliases.
345 assert(tensor_hash[node->outputs[i]] == 0)((void) sizeof ((tensor_hash[node->outputs[i]] == 0) ? 1 :
0), __extension__ ({ if (tensor_hash[node->outputs[i]] ==
0) ; else __assert_fail ("tensor_hash[node->outputs[i]] == 0"
, "ccv_nnc_symbolic_graph_simplify.c", 345, __extension__ __PRETTY_FUNCTION__
); }))
;
346 hashin[0] = hashout;
347 hashin[1] = i; // Positional information.
348 siphash((uint8_t*)&hashin[2], (const uint8_t*)&simplify->tensor_symbol_info[node->outputs[i]].info,
349 sizeof(simplify->tensor_symbol_info[node->outputs[i]].info), key_siphash);
350 // Generate hash for the output.
351 siphash((uint8_t*)&tensor_hash[node->outputs[i]], (const uint8_t*)hashin, sizeof(hashin), key_siphash);
352 }
353 } ccv_nnc_graph_visit_endfor} }
354 // Allocate 3 / 2 as much space, for the simple robin-hood open address hash map.
355 const int map_size = (simplify->tensor_symbol_info_size * 3 + 1) / 2;
356 int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
357 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
358 refs[i] = -1;
359 ccv_nnc_cse_hash_t* const hash_map = (ccv_nnc_cse_hash_t*)cccalloccalloc(map_size, sizeof(ccv_nnc_cse_hash_t));
360 // Now, all tensors are hashed, identify tensors with the same hash code, replace the ones that accessed later.
361 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
362 // If already marked as dead, skip.
363 if (simplify->exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
364 continue;
365 for (i = 0; i < node->input_size; i++)
366 if (node->inputs[i] >= 0)
367 {
368 const int d = node->inputs[i];
369 assert(tensor_hash[d])((void) sizeof ((tensor_hash[d]) ? 1 : 0), __extension__ ({ if
(tensor_hash[d]) ; else __assert_fail ("tensor_hash[d]", "ccv_nnc_symbolic_graph_simplify.c"
, 369, __extension__ __PRETTY_FUNCTION__); }))
;
370 const int new_d = _ccv_nnc_cse_hash_find(hash_map, tensor_hash[d], map_size);
371 if (new_d >= 0 && new_d != d)
372 {
373 // Check whether this can be replaced.
374 assert(refs[d] == -1 || refs[d] == new_d)((void) sizeof ((refs[d] == -1 || refs[d] == new_d) ? 1 : 0),
__extension__ ({ if (refs[d] == -1 || refs[d] == new_d) ; else
__assert_fail ("refs[d] == -1 || refs[d] == new_d", "ccv_nnc_symbolic_graph_simplify.c"
, 374, __extension__ __PRETTY_FUNCTION__); }))
;
375 assert(!simplify->tensor_symbol_info[d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 375, __extension__ __PRETTY_FUNCTION__
); }))
;
376 assert(!simplify->tensor_symbol_info[d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].r_assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].r_assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 376, __extension__ __PRETTY_FUNCTION__
); }))
;
377 assert(!simplify->tensor_symbol_info[d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[d].bypass_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[d].bypass_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 377, __extension__ __PRETTY_FUNCTION__
); }))
;
378 assert(!simplify->tensor_symbol_info[new_d].assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 378, __extension__ __PRETTY_FUNCTION__
); }))
;
379 assert(!simplify->tensor_symbol_info[new_d].r_assign_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].r_assign_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].r_assign_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].r_assign_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 379, __extension__ __PRETTY_FUNCTION__
); }))
;
380 assert(!simplify->tensor_symbol_info[new_d].bypass_ref)((void) sizeof ((!simplify->tensor_symbol_info[new_d].bypass_ref
) ? 1 : 0), __extension__ ({ if (!simplify->tensor_symbol_info
[new_d].bypass_ref) ; else __assert_fail ("!simplify->tensor_symbol_info[new_d].bypass_ref"
, "ccv_nnc_symbolic_graph_simplify.c", 380, __extension__ __PRETTY_FUNCTION__
); }))
;
381 // Ignore if there is a peer_ref (again, peer_ref has side effect that is deeper (using tape))
382 if (simplify->tensor_symbol_info[d].peer_ref)
383 continue;
384 // If both have p_ref, we cannot merge.
385 if (simplify->tensor_symbol_info[d].p_ref && simplify->tensor_symbol_info[new_d].p_ref)
386 continue;
387 // Merge s_refs from ref[d] later.
388 if (refs[d] != new_d)
389 refs[d] = new_d;
390 assert(simplify->output_execs[new_d] >= 0)((void) sizeof ((simplify->output_execs[new_d] >= 0) ? 1
: 0), __extension__ ({ if (simplify->output_execs[new_d] >=
0) ; else __assert_fail ("simplify->output_execs[new_d] >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 390, __extension__ __PRETTY_FUNCTION__
); }))
;
391 // Establish new dependency.
392 ccv_nnc_graph_exec_symbol_concat(simplify->graph, (ccv_nnc_graph_exec_symbol_t){
393 .d = simplify->output_execs[new_d],
394 .graph = simplify->graph,
395 }, (ccv_nnc_graph_exec_symbol_t){
396 .d = idx,
397 .graph = simplify->graph,
398 });
399 }
400 }
401 // We can reuse the input, but we cannot do that for output of these commands.
402 if (node->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
403 node->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
404 node->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
405 node->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
406 continue;
407 for (i = 0; i < node->output_size; i++)
408 if (node->outputs[i] >= 0) // This tensor can be reused by others.
409 _ccv_nnc_cse_hash_add(hash_map, tensor_hash[node->outputs[i]], node->outputs[i], map_size);
410 } ccv_nnc_graph_visit_endfor} }
411 _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 1 /* For these exec that generates refs, we don't need them any more. */);
412 ccfreefree(tensor_hash);
413 ccfreefree(hash_map);
414 ccfreefree(refs);
415}
416
417static void _ccv_nnc_symbolic_graph_data_transfer_opt(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
418{
419 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
420 uint32_t* const exec_dead = simplify->exec_dead;
421 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = simplify->tensor_symbol_info;
422 int i, j;
423 uint32_t* const has_alias = ccmallocmalloc(sizeof(uint32_t) * ((simplify->tensor_symbol_info_size + 31) >> 5));
424 int* const refs = (int*)ccmallocmalloc(sizeof(int) * simplify->tensor_symbol_info_size);
425 int updated_refs;
426 do {
427 memset(has_alias, 0, sizeof(uint32_t) * ((simplify->tensor_symbol_info_size + 31) >> 5));
428 // Go through until no updates is possible. This won't result an infinite loop because every time,
429 // a tensor is eliminated.
430 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
431 {
432 refs[i] = -1;
433 if (tensor_symbol_info[i].alias_ref)
434 {
435 const int alias_ref = tensor_symbol_info[i].alias_ref - 1;
436 has_alias[alias_ref >> 5] |= (1u << (alias_ref & 0x1f));
437 }
438 }
439 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
440 // If already marked as dead, skip.
441 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
442 continue;
443 if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD &&
444 node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD)
445 continue;
446 for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size.
447 if (node->inputs[i] >= 0 && node->outputs[i] >= 0)
448 {
449 assert(node->inputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->inputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->inputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->inputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 449, __extension__ __PRETTY_FUNCTION__
); }))
;
450 assert(node->outputs[i] < simplify->tensor_symbol_info_size)((void) sizeof ((node->outputs[i] < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (node->outputs[i] < simplify
->tensor_symbol_info_size) ; else __assert_fail ("node->outputs[i] < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 450, __extension__ __PRETTY_FUNCTION__
); }))
;
451 const ccv_nnc_tensor_symbol_info_t* const input = tensor_symbol_info + node->inputs[i];
452 const ccv_nnc_tensor_symbol_info_t* const output = tensor_symbol_info + node->outputs[i];
453 assert(input->info.datatype == output->info.datatype)((void) sizeof ((input->info.datatype == output->info.datatype
) ? 1 : 0), __extension__ ({ if (input->info.datatype == output
->info.datatype) ; else __assert_fail ("input->info.datatype == output->info.datatype"
, "ccv_nnc_symbolic_graph_simplify.c", 453, __extension__ __PRETTY_FUNCTION__
); }))
;
454 assert(input->info.format == output->info.format)((void) sizeof ((input->info.format == output->info.format
) ? 1 : 0), __extension__ ({ if (input->info.format == output
->info.format) ; else __assert_fail ("input->info.format == output->info.format"
, "ccv_nnc_symbolic_graph_simplify.c", 454, __extension__ __PRETTY_FUNCTION__
); }))
;
455 // If they are not on the same device (even for NUMA), skip.
456 if (input->info.type != output->info.type)
457 continue;
458 // If both are alias, we cannot consolidate this.
459 if (input->alias_ref && output->alias_ref)
460 continue;
461 // If input is alias, and output has alias reference to it, output cannot be the same as input.
462 if (input->alias_ref && (has_alias[node->outputs[i] >> 5] & (1u << (node->outputs[i] & 0x1f))))
463 continue;
464 // If output is alias, and input has alias reference to it, input cannot be the same as output.
465 if (output->alias_ref && (has_alias[node->inputs[i] >> 5] & (1u << (node->inputs[i] & 0x1f))))
466 continue;
467 // If either are carry overs (for while), we cannot do anything.
468 if (input->assign_ref || output->assign_ref ||
469 input->r_assign_ref || output->r_assign_ref)
470 continue;
471 // If either are bypasses (for case..of), we cannot do anything.
472 if (input->bypass_ref || output->bypass_ref ||
473 input->r_bypass_ref || output->r_bypass_ref)
474 continue;
475 // If either are inputs / outputs connecting the parent graph, we cannot do anything.
476 if (input->p_ref || output->p_ref)
477 continue;
478 int flag = 0;
479 for (j = 0; !flag && j < output_size; j++)
480 flag = (outputs[j].d == node->inputs[i] || outputs[j].d == node->outputs[i]);
481 // Either input or output cannot be in the outputs.
482 if (flag)
483 continue;
484 // If the type is the same, check which one is the alias.
485 // We always prefer alias.
486 if (output->alias_ref)
487 refs[node->inputs[i]] = node->outputs[i];
488 else // if (input->alias_ref), else
489 refs[node->outputs[i]] = node->inputs[i];
490 }
491 } ccv_nnc_graph_visit_endfor} }
492 // Make sure refs reference to the end.
493 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
494 if (refs[i] >= 0)
495 {
496 int ref = refs[i];
497 while (refs[ref] >= 0)
498 ref = refs[i];
499 refs[i] = ref;
500 }
501 updated_refs = _ccv_nnc_symbolic_graph_update_refs(simplify, outputs, output_size, refs, 0 /* We still need these exec that generates the refs. */);
502 } while (updated_refs);
503 ccfreefree(refs);
504 ccfreefree(has_alias);
505 // Now, all references updated, remove data transfers that sources and destinations are the same.
506 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
507 // If already marked as dead, skip.
508 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
509 continue;
510 if (node->cmd.cmd != CCV_NNC_DATA_TRANSFER_FORWARD &&
511 node->cmd.cmd != CCV_NNC_DATA_TRANSFER_BACKWARD)
512 continue;
513 for (i = 0; i < node->output_size; i++) // For data transfer, we only respect output size.
514 if (node->inputs[i] == node->outputs[i])
515 {
516 if (i + 1 < node->output_size)
517 {
518 node->inputs[i] = node->inputs[i + 1];
519 node->outputs[i] = node->outputs[i + 1];
520 }
521 --node->output_size;
522 --i;
523 }
524 node->input_size = node->output_size;
525 ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
)->input_size = node->input_size;
526 ((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
)->output_size = node->output_size;
527 // Remove this data transfer node if it has no outputs.
528 if (node->output_size == 0)
529 exec_dead[idx >> 5] |= (1u << (idx & 0x1f));
530 } ccv_nnc_graph_visit_endfor} }
531}
532
533typedef struct {
534 uint32_t fused_op; // The final fused op id.
535 uint32_t ops_seq[2]; // The sequence of commands to identify.
536 int ops_seq_size;
537 struct {
538 int type; // Whether it is input, or output. It doesn't make sense for example, input in ops_seq, but output in fused_op.
539 int op_idx; // Index into the ops_seq.
540 int from; // The index in ops_seq.
541 int to; // The index in fused_op.
542 } pos[4]; // maps of positions from ops seq to fused_op for inputs (outputs).
543 int pos_size;
544} ccv_nnc_ops_fusion_t;
545
546enum {
547 CCV_NNC_OPS_FUSION_INPUT_INDEX,
548 CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
549};
550
551const static int ccv_nnc_ops_fusion_io_size = 2;
552const static ccv_nnc_ops_fusion_t ccv_nnc_ops_fusions[] = {
553 {
554 .fused_op = CCV_NNC_SOFTMAX_CROSSENTROPY_FORWARD,
555 .ops_seq = {
556 CCV_NNC_SOFTMAX_FORWARD, CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD,
557 },
558 .ops_seq_size = 2,
559 .pos = {
560 {
561 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
562 .op_idx = 0,
563 .from = 0,
564 .to = 0,
565 },
566 {
567 .type = CCV_NNC_OPS_FUSION_INPUT_INDEX,
568 .op_idx = 1,
569 .from = 1,
570 .to = 1,
571 },
572 {
573 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
574 .op_idx = 0,
575 .from = 0,
576 .to = 1,
577 },
578 {
579 .type = CCV_NNC_OPS_FUSION_OUTPUT_INDEX,
580 .op_idx = 1,
581 .from = 0,
582 .to = 0,
583 },
584 },
585 .pos_size = 4,
586 }
587};
588
589static int _ccv_nnc_find_ops_for_fusion(const ccv_nnc_ops_fusion_t* const fusion, const int ops_idx, const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const uint32_t* const exec_dead, const int exec_idx, int* const fusing_exec_symbols)
590{
591 if (exec_dead[exec_idx >> 5] & (1u << (exec_idx & 0x1f)))
592 return 0;
593 const ccv_nnc_graph_exec_symbol_info_t* const node = exec_symbol_info + exec_idx;
594 // Doesn't match the ops_seq, return 0.
595 if (fusion->ops_seq[ops_idx] != node->cmd.cmd)
596 return 0;
597 fusing_exec_symbols[ops_idx] = exec_idx;
598 // If already reached the end, we are good.
599 if (ops_idx == fusion->ops_seq_size - 1)
600 return 1;
601 // Otherwise, we need to go on, but don't have any to follow-up.
602 if (!node->outgoings || !node->outgoings->rnum)
603 return 0;
604 int i;
605 for (i = 0; i < node->outgoings->rnum; i++)
606 if (_ccv_nnc_find_ops_for_fusion(fusion, ops_idx + 1, exec_symbol_info, exec_dead, *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
, fusing_exec_symbols))
607 return 1;
608 return 0;
609}
610
611static void _ccv_nnc_symbolic_graph_ops_fusion(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
612{
613 uint32_t* const exec_dead = simplify->exec_dead;
614 ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info;
615 int i, j;
616 int fusing_exec_symbols[sizeof(ccv_nnc_ops_fusions->ops_seq)];
617 int fused_inputs[ccv_nnc_ops_fusion_io_size]; // 2 is just a number based on the ops_fusion.
618 int fused_outputs[ccv_nnc_ops_fusion_io_size];
619 ccv_nnc_graph_visit_for(simplify->visit, exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(exec_symbol_info)) const node __attribute__((unused)) = (exec_symbol_info
) + idx;
{
620 // If already marked as dead, skip.
621 if (exec_dead[idx >> 5] & (1u << (idx & 0x1f)))
622 continue;
623 // Run through rudimentary pattern matching for ops_seq. There are better ways to do this (immediately come to mind, Boyer-Moore). However, this is simpler to code.
624 // If I am running into performance issues with this, I would be very happy.
625 for (i = 0; i < sizeof(ccv_nnc_ops_fusions) / sizeof(ccv_nnc_ops_fusion_t); i++)
626 {
627 const ccv_nnc_ops_fusion_t* const ops_fusion = ccv_nnc_ops_fusions + i;
628 // Check to see if a list of symbols are possible.
629 if (ops_fusion->ops_seq[0] == node->cmd.cmd &&
630 _ccv_nnc_find_ops_for_fusion(ops_fusion, 0, exec_symbol_info, exec_dead, idx, fusing_exec_symbols))
631 {
632 // Go through all the inputs and outputs, check if they exists and are mapped.
633 // TODO: the mapping can be more sophisticated than what we have here.
634 // Also, I need to check if some inputs / outputs cannot be mapped, then we cannot continue.
635 for (j = 0; j < ccv_nnc_ops_fusion_io_size; j++)
636 fused_inputs[j] = fused_outputs[j] = CCV_NNC_NO_TENSOR_SYMBOL;
637 int input_size = 0, output_size = 0;
638 for (j = 0; j < ops_fusion->pos_size; j++)
639 {
640 ccv_nnc_graph_exec_symbol_info_t* const fusing_op = exec_symbol_info + fusing_exec_symbols[ops_fusion->pos[j].op_idx];
641 switch (ops_fusion->pos[j].type)
642 {
643 case CCV_NNC_OPS_FUSION_INPUT_INDEX:
644 fused_inputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->input_size ? fusing_op->inputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL;
645 input_size = ccv_max(input_size, ops_fusion->pos[j].to + 1)({ typeof (input_size) _a = (input_size); typeof (ops_fusion->
pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a > _b
) ? _a : _b; })
;
646 break;
647 case CCV_NNC_OPS_FUSION_OUTPUT_INDEX:
648 fused_outputs[ops_fusion->pos[j].to] = ops_fusion->pos[j].from < fusing_op->output_size ? fusing_op->outputs[ops_fusion->pos[j].from] : CCV_NNC_NO_TENSOR_SYMBOL;
649 output_size = ccv_max(output_size, ops_fusion->pos[j].to + 1)({ typeof (output_size) _a = (output_size); typeof (ops_fusion
->pos[j].to + 1) _b = (ops_fusion->pos[j].to + 1); (_a >
_b) ? _a : _b; })
;
650 break;
651 }
652 }
653 // Modify the first node so it is the correct type and value.
654 ccv_nnc_graph_exec_symbol_info_t* const actual_node = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, idx)((void*)(((char*)((simplify->graph->exec_symbol_info)->
data)) + (size_t)(simplify->graph->exec_symbol_info)->
rsize * (size_t)(idx)))
;
655 actual_node->cmd.cmd = node->cmd.cmd = ops_fusion->fused_op;
656 if (node->input_size + node->output_size < input_size + output_size)
657 actual_node->inputs = node->inputs = node->inputs ? ccreallocrealloc(node->inputs, sizeof(int) * (input_size + output_size)) : ccmallocmalloc(sizeof(int) * (input_size + output_size));
658 actual_node->outputs = node->outputs = node->inputs + input_size;
659 actual_node->input_size = node->input_size = input_size;
660 actual_node->output_size = node->output_size = output_size;
661 memcpy(node->inputs, fused_inputs, sizeof(int) * input_size);
662 memcpy(node->outputs, fused_outputs, sizeof(int) * output_size);
663 // Afterwards, mark the rest as dead.
664 for (j = 1; j < ops_fusion->ops_seq_size; j++)
665 exec_dead[fusing_exec_symbols[j] >> 5] |= (1u << (fusing_exec_symbols[j] & 0x1f));
666 break;
667 }
668 }
669 } ccv_nnc_graph_visit_endfor} }
670}
671
672static void _ccv_nnc_symbolic_graph_pruning_undead_exec(ccv_nnc_symbolic_graph_simplify_t* const simplify, const int exec_idx, uint32_t* const tensor_visited, ccv_array_t* const next)
673{
674 assert(exec_idx >= 0)((void) sizeof ((exec_idx >= 0) ? 1 : 0), __extension__ ({
if (exec_idx >= 0) ; else __assert_fail ("exec_idx >= 0"
, "ccv_nnc_symbolic_graph_simplify.c", 674, __extension__ __PRETTY_FUNCTION__
); }))
;
24
Taking true branch
675 uint32_t* const exec_dead = simplify->exec_dead;
676 uint32_t* const tensor_dead = simplify->tensor_dead;
677 exec_dead[exec_idx >> 5] &= ~(1u << (exec_idx & 0x1f)); // Undead the exec.
678 ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = simplify->exec_symbol_info + exec_idx;
679 int i;
680 if (exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_FORWARD ||
25
Assuming the condition is false
29
Taking false branch
681 exec_symbol_info->cmd.cmd == CCV_NNC_GRAPH_BACKWARD ||
26
Assuming the condition is false
682 exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_FORWARD ||
27
Assuming the condition is false
683 exec_symbol_info->cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
28
Assuming the condition is false
684 {
685 // All of its inputs / outputs need to be undead for these commands.
686 for (i = 0; i < exec_symbol_info->input_size; i++)
687 {
688 const int d = exec_symbol_info->inputs[i];
689 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
690 {
691 ccv_array_push(next, &d); // Push to the next round to be undead.
692 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
693 }
694 }
695 for (i = 0; i < exec_symbol_info->output_size; i++)
696 {
697 const int d = exec_symbol_info->outputs[i];
698 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
699 {
700 ccv_array_push(next, &d); // Push to the next round to be undead.
701 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
702 }
703 }
704 return;
705 }
706 // Go through the input / output, to make sure that all of them can be available.
707 const int input_bitmask_size = (exec_symbol_info->input_size + 63) >> 6;
708 const int output_bitmask_size = (exec_symbol_info->output_size + 63) >> 6;
709 uint64_t input_bitmasks[ccv_max(1, input_bitmask_size)({ typeof (1) _a = (1); typeof (input_bitmask_size) _b = (input_bitmask_size
); (_a > _b) ? _a : _b; })
];
30
Assuming '_a' is > '_b'
31
'?' condition is true
710 for (i = 0; i < input_bitmask_size; i++)
32
Loop condition is false. Execution continues on line 712
711 input_bitmasks[i] = 0;
712 uint64_t output_bitmasks[ccv_max(1, output_bitmask_size)({ typeof (1) _a = (1); typeof (output_bitmask_size) _b = (output_bitmask_size
); (_a > _b) ? _a : _b; })
];
33
Assuming '_a' is > '_b'
34
'?' condition is true
713 for (i = 0; i < output_bitmask_size; i++)
35
Loop condition is false. Execution continues on line 715
714 output_bitmasks[i] = 0;
715 for (i = 0; i < exec_symbol_info->input_size; i++)
36
Assuming the condition is false
37
Loop condition is false. Execution continues on line 718
716 if (exec_symbol_info->inputs[i] >= 0)
717 input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
718 for (i = 0; i < exec_symbol_info->output_size; i++)
38
Assuming the condition is true
39
Loop condition is true. Entering loop body
719 if (exec_symbol_info->outputs[i] >= 0)
40
Assuming the condition is true
41
Taking true branch
720 output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
42
The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage
721 // First, mark everything with bitmasks, and verify it works.
722 assert(ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))((void) sizeof ((ccv_nnc_cmd_bitmask(exec_symbol_info->cmd
, exec_symbol_info->input_size, exec_symbol_info->output_size
, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size
)) ? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_bitmask(exec_symbol_info
->cmd, exec_symbol_info->input_size, exec_symbol_info->
output_size, input_bitmasks, input_bitmask_size, output_bitmasks
, output_bitmask_size)) ; else __assert_fail ("ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size)"
, "ccv_nnc_symbolic_graph_simplify.c", 722, __extension__ __PRETTY_FUNCTION__
); }))
;
723 int flag;
724 do {
725 flag = 0;
726 // Try to eliminate one at a time. Go over output first.
727 for (i = 0; i < exec_symbol_info->output_size; i++)
728 {
729 const int d = exec_symbol_info->outputs[i];
730 // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all.
731 if (d >= 0 && (tensor_dead[d >> 5] & (1u << (d & 0x1f))) &&
732 (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))))
733 {
734 output_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
735 if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))
736 flag = 1;
737 else // Reset the bitmask.
738 output_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
739 }
740 }
741 // For inputs, no matter if it s dead or not, we try to limit our input to the smallest number.
742 for (i = 0; i < exec_symbol_info->input_size; i++)
743 {
744 const int d = exec_symbol_info->inputs[i];
745 // If this tensor currently is marked as dead, try to see whether it works when we don't have this tensor at all.
746 if (d >= 0 && (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63))))
747 {
748 input_bitmasks[i >> 6] &= ~((uint64_t)1 << (i & 63));
749 if (ccv_nnc_cmd_bitmask(exec_symbol_info->cmd, exec_symbol_info->input_size, exec_symbol_info->output_size, input_bitmasks, input_bitmask_size, output_bitmasks, output_bitmask_size))
750 flag = 1;
751 else // Reset the bitmask.
752 input_bitmasks[i >> 6] |= ((uint64_t)1 << (i & 63));
753 }
754 }
755 } while (flag);
756 // Now we know which one to keep, which one to undead.
757 for (i = 0; i < exec_symbol_info->input_size; i++)
758 if (input_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
759 {
760 const int d = exec_symbol_info->inputs[i];
761 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
762 {
763 ccv_array_push(next, &d); // Push to the next round to be undead.
764 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
765 }
766 } else {
767 // Clean up the inputs.
768 exec_symbol_info->inputs[i] = CCV_NNC_NO_TENSOR_SYMBOL;
769 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(exec_idx))))->inputs == exec_symbol_info->inputs
) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((simplify->graph->exec_symbol_info)
->data)) + (size_t)(simplify->graph->exec_symbol_info
)->rsize * (size_t)(exec_idx))))->inputs == exec_symbol_info
->inputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->inputs == exec_symbol_info->inputs"
, "ccv_nnc_symbolic_graph_simplify.c", 769, __extension__ __PRETTY_FUNCTION__
); }))
;
770 }
771 for (i = 0; i < exec_symbol_info->output_size; i++)
772 if (output_bitmasks[i >> 6] & ((uint64_t)1 << (i & 63)))
773 {
774 const int d = exec_symbol_info->outputs[i];
775 if (d >= 0 && !(tensor_visited[d >> 5] & (1u << (d & 0x1f))))
776 {
777 ccv_array_push(next, &d); // Push to the next round to be undead.
778 tensor_visited[d >> 5] |= (1u << (d & 0x1f));
779 }
780 } else {
781 // Clean up the outputs.
782 exec_symbol_info->outputs[i] = CCV_NNC_NO_TENSOR_SYMBOL;
783 assert(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs)((void) sizeof ((((ccv_nnc_graph_exec_symbol_info_t*)((void*)
(((char*)((simplify->graph->exec_symbol_info)->data)
) + (size_t)(simplify->graph->exec_symbol_info)->rsize
* (size_t)(exec_idx))))->outputs == exec_symbol_info->
outputs) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_info_t
*)((void*)(((char*)((simplify->graph->exec_symbol_info)
->data)) + (size_t)(simplify->graph->exec_symbol_info
)->rsize * (size_t)(exec_idx))))->outputs == exec_symbol_info
->outputs) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(simplify->graph->exec_symbol_info, exec_idx))->outputs == exec_symbol_info->outputs"
, "ccv_nnc_symbolic_graph_simplify.c", 783, __extension__ __PRETTY_FUNCTION__
); }))
;
784 }
785}
786
787static void _ccv_nnc_symbolic_graph_pruning(ccv_nnc_symbolic_graph_simplify_t* const simplify, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
788{
789 uint32_t* const tensor_visited = (uint32_t*)cccalloccalloc(sizeof(uint32_t), (simplify->tensor_symbol_info_size + 31) >> 5);
790 ccv_array_t* const preserve[2] = {
791 ccv_array_new(sizeof(int), output_size, 0),
792 ccv_array_new(sizeof(int), 0, 0),
793 };
794 int i, j;
795 ccv_array_t** const r_alias_refs = (ccv_array_t**)cccalloccalloc(sizeof(ccv_array_t*), simplify->tensor_symbol_info_size);
796 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
5
Assuming the condition is false
6
Loop condition is false. Execution continues on line 805
797 if (simplify->tensor_symbol_info[i].alias_ref)
798 {
799 const int alias_ref = simplify->tensor_symbol_info[i].alias_ref - 1;
800 assert(alias_ref < simplify->tensor_symbol_info_size)((void) sizeof ((alias_ref < simplify->tensor_symbol_info_size
) ? 1 : 0), __extension__ ({ if (alias_ref < simplify->
tensor_symbol_info_size) ; else __assert_fail ("alias_ref < simplify->tensor_symbol_info_size"
, "ccv_nnc_symbolic_graph_simplify.c", 800, __extension__ __PRETTY_FUNCTION__
); }))
;
801 if (!r_alias_refs[alias_ref])
802 r_alias_refs[alias_ref] = ccv_array_new(sizeof(int), 1, 0);
803 ccv_array_push(r_alias_refs[alias_ref], &i);
804 }
805 uint32_t* const exec_dead = simplify->exec_dead;
806 uint32_t* const tensor_dead = simplify->tensor_dead;
807 int* const output_execs = simplify->output_execs;
808 _ccv_nnc_symbolic_graph_simplify_update_output_execs(simplify);
809 // Mark everything visited as dead.
810 ccv_nnc_graph_visit_for(simplify->visit, simplify->exec_symbol_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (simplify->visit)->size
; _i_++) { const int idx __attribute__((unused)) = (simplify->
visit)->node[_i_].index; const int _node_unused_ __attribute__
((unused)) = (simplify->visit)->node[_i_].term; typeof (
(simplify->exec_symbol_info)) const node __attribute__((unused
)) = (simplify->exec_symbol_info) + idx;
{
7
Loop condition is false. Execution continues on line 826
811 exec_dead[idx >> 5] |= (1u << (idx & 0x1f));
812 for (i = 0; i < node->input_size; i++)
813 {
814 const int d = node->inputs[i];
815 if (d >= 0)
816 tensor_dead[d >> 5] |= (1u << (d & 0x1f));
817 }
818 for (i = 0; i < node->output_size; i++)
819 {
820 const int d = node->outputs[i];
821 if (d >= 0)
822 tensor_dead[d >> 5] |= (1u << (d & 0x1f));
823 }
824 } ccv_nnc_graph_visit_endfor} }
825 // If the tensor symbol is used by other exec that is not visited, unmark it.
826 for (i = 0; i < simplify->exec_symbol_info_size; i++)
8
Assuming the condition is false
9
Loop condition is false. Execution continues on line 846
827 {
828 if (exec_dead[i >> 5] & (1u << (i & 0x1f)))
829 continue;
830 const ccv_nnc_graph_exec_symbol_info_t* const node = simplify->exec_symbol_info + i;
831 for (j = 0; j < node->input_size; j++)
832 {
833 const int d = node->inputs[j];
834 // Undead it.
835 if (d >= 0)
836 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
837 }
838 for (j = 0; j < node->output_size; j++)
839 {
840 const int d = node->outputs[j];
841 // Undead it.
842 if (d >= 0)
843 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
844 }
845 }
846 for (i = 0; i < output_size; i++)
10
Assuming 'i' is >= 'output_size'
11
Loop condition is false. Execution continues on line 848
847 ccv_array_push(preserve[0], &outputs[i].d);
848 int p = 0, q = 1;
849 // BFS to mark execs / tensors as not dead.
850 while (preserve[p]->rnum > 0)
12
Assuming the condition is true
13
Loop condition is true. Entering loop body
851 {
852 ccv_array_clear(preserve[q]);
853 // First, undead all relevant tensors.
854 for (i = 0; i < preserve[p]->rnum; i++)
14
Loop condition is true. Entering loop body
18
Assuming the condition is false
19
Loop condition is false. Execution continues on line 874
855 {
856 const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve
[p])->rsize * (size_t)(i)))
;
857 // Undead the outputs.
858 tensor_dead[d >> 5] &= ~(1u << (d & 0x1f));
859 int alias_ref = d;
860 if (simplify->tensor_symbol_info[d].alias_ref)
15
Assuming the condition is false
16
Taking false branch
861 {
862 alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1;
863 tensor_dead[alias_ref >> 5] &= ~(1u << (alias_ref & 0x1f));
864 assert(r_alias_refs[alias_ref])((void) sizeof ((r_alias_refs[alias_ref]) ? 1 : 0), __extension__
({ if (r_alias_refs[alias_ref]) ; else __assert_fail ("r_alias_refs[alias_ref]"
, "ccv_nnc_symbolic_graph_simplify.c", 864, __extension__ __PRETTY_FUNCTION__
); }))
;
865 }
866 if (r_alias_refs[alias_ref])
17
Taking false branch
867 for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++)
868 {
869 const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t
)(r_alias_refs[alias_ref])->rsize * (size_t)(j)))
;
870 if (output_execs[b] >= 0) // Only revive if it is written alias.
871 tensor_dead[b >> 5] &= ~(1u << (b & 0x1f));
872 }
873 }
874 for (i = 0; i < preserve[p]->rnum; i++)
20
Loop condition is true. Entering loop body
875 {
876 const int d = *(int*)ccv_array_get(preserve[p], i)((void*)(((char*)((preserve[p])->data)) + (size_t)(preserve
[p])->rsize * (size_t)(i)))
;
877 const int output_exec = output_execs[d];
878 // Undead the exec.
879 if (output_exec >= 0)
21
Assuming 'output_exec' is >= 0
22
Taking true branch
880 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
23
Calling '_ccv_nnc_symbolic_graph_pruning_undead_exec'
881 int alias_ref = d;
882 if (simplify->tensor_symbol_info[d].alias_ref)
883 {
884 alias_ref = simplify->tensor_symbol_info[d].alias_ref - 1;
885 const int output_exec = output_execs[alias_ref];
886 if (output_exec >= 0)
887 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
888 }
889 if (r_alias_refs[alias_ref])
890 for (j = 0; j < r_alias_refs[alias_ref]->rnum; j++)
891 {
892 const int b = *(int*)ccv_array_get(r_alias_refs[alias_ref], j)((void*)(((char*)((r_alias_refs[alias_ref])->data)) + (size_t
)(r_alias_refs[alias_ref])->rsize * (size_t)(j)))
;
893 const int output_exec = output_execs[b];
894 if (output_exec >= 0)
895 _ccv_nnc_symbolic_graph_pruning_undead_exec(simplify, output_exec, tensor_visited, preserve[q]);
896 }
897 }
898 CCV_SWAP(p, q, i)((i) = (p), (p) = (q), (q) = (i));
899 }
900 ccfreefree(tensor_visited);
901 ccv_array_free(preserve[0]);
902 ccv_array_free(preserve[1]);
903 for (i = 0; i < simplify->tensor_symbol_info_size; i++)
904 if (r_alias_refs[i])
905 ccv_array_free(r_alias_refs[i]);
906 ccfreefree(r_alias_refs);
907}
908
909void ccv_nnc_symbolic_graph_simplify(ccv_nnc_symbolic_graph_t* const graph, const int* const passes, const int pass_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
910{
911 ccv_nnc_symbolic_graph_simplify_t* const simplify = _ccv_nnc_symbolic_graph_simplify_new(graph, sources, source_size, destinations, destination_size);
912 int i;
913 for (i = 0; i < pass_size; i++)
1
Assuming 'i' is < 'pass_size'
2
Loop condition is true. Entering loop body
914 switch (passes[i])
3
Control jumps to 'case CCV_NNC_SIMPLIFY_GRAPH_PRUNING:' at line 922
915 {
916 case CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION:
917 _ccv_nnc_symbolic_graph_common_subexpression_elimination(simplify, outputs, output_size);
918 break;
919 case CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT:
920 _ccv_nnc_symbolic_graph_data_transfer_opt(simplify, outputs, output_size);
921 break;
922 case CCV_NNC_SIMPLIFY_GRAPH_PRUNING:
923 _ccv_nnc_symbolic_graph_pruning(simplify, outputs, output_size);
4
Calling '_ccv_nnc_symbolic_graph_pruning'
924 break;
925 case CCV_NNC_SIMPLIFY_OPS_FUSION:
926 _ccv_nnc_symbolic_graph_ops_fusion(simplify, outputs, output_size);
927 break;
928 }
929 _ccv_nnc_symbolic_graph_simplify_apply(simplify);
930 _ccv_nnc_symbolic_graph_simplify_free(simplify);
931}