/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_symbolic_graph_memory_reduction.c
Line | Count | Source |
1 | | #include "ccv_nnc.h" |
2 | | #include "ccv_nnc_easy.h" |
3 | | #include "ccv_nnc_internal.h" |
4 | | #include "ccv_internal.h" |
5 | | #include "_ccv_nnc_symbolic_graph.h" |
6 | | |
7 | | // MARK - Level-3.5 API |
8 | | |
9 | | static void _ccv_nnc_remove_unused_from_marked(const uint32_t* const tensor_used, const int size, uint32_t* const tensor_marked) |
10 | 0 | { |
11 | 0 | int i; |
12 | 0 | for (i = 0; i < size; i++) |
13 | 0 | tensor_marked[i] &= tensor_used[i]; |
14 | 0 | } |
15 | | |
16 | | typedef struct { |
17 | | int okay; |
18 | | int original; |
19 | | ccv_nnc_tensor_param_t info; |
20 | | ccv_array_t* old_conversion_nodes; |
21 | | struct { |
22 | | ccv_array_t* sources; |
23 | | ccv_array_t* nodes; |
24 | | } reconversion; |
25 | | } ccv_nnc_conversion_info_t; |
26 | | |
27 | | typedef struct { |
28 | | ccv_array_t* outgoings; |
29 | | } ccv_nnc_graph_exec_symbol_reverse_t; |
30 | | |
31 | | void ccv_nnc_symbolic_graph_memory_reduction(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size) |
32 | 0 | { |
33 | | // Note all these exec_symbol_info and tensor_symbol_info cannot be accessed once I start to mutate the graph. Therefore, I will do the |
34 | | // mutation at the last step, to carefully step away from that possibility. |
35 | 0 | ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0); |
36 | 0 | ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, 0); |
37 | 0 | ccv_nnc_graph_visit_t* const visit = ccv_nnc_graph_visit_new(graph, exec_symbol_info, graph->exec_symbol_info->rnum, sources, source_size, destinations, destination_size, 0); |
38 | 0 | ccv_nnc_symbolic_graph_symbol_infer(graph, visit, sources, source_size, destinations, destination_size, 0, 0, tensor_symbol_info, exec_symbol_info); |
39 | 0 | const int tensor_symbol_info_size = graph->tensor_symbol_info->rnum; |
40 | 0 | const int exec_symbol_info_size = graph->exec_symbol_info->rnum; |
41 | 0 | uint32_t* const tensor_marked = (uint32_t*)cccalloc(((tensor_symbol_info_size + 31) >> 5) * 2, sizeof(uint32_t)); |
42 | 0 | uint32_t* const tensor_used = tensor_marked + ((tensor_symbol_info_size + 31) >> 5); |
43 | 0 | ccv_nnc_graph_exec_symbol_reverse_t* const reversed_nodes = cccalloc(exec_symbol_info_size, sizeof(ccv_nnc_graph_exec_symbol_reverse_t)); |
44 | 0 | int i, j, k; |
45 | 0 | ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx) { |
46 | 0 | if (node->flags & CCV_NNC_GRAPH_EXEC_DEAD) |
47 | 0 | continue; |
48 | 0 | if (node->outgoings) |
49 | 0 | for (i = 0; i < node->outgoings->rnum; i++) |
50 | 0 | { |
51 | 0 | const int d = *(int*)ccv_array_get(node->outgoings, i); |
52 | 0 | if (!reversed_nodes[d].outgoings) |
53 | 0 | reversed_nodes[d].outgoings = ccv_array_new(sizeof(int), 1, 0); |
54 | 0 | ccv_array_add_unique_int(reversed_nodes[d].outgoings, idx); |
55 | 0 | } |
56 | 0 | if (node->cmd.cmd == CCV_NNC_DATATYPE_CONVERSION_FORWARD && node->output_size >= 1 && node->outputs[0] >= 0) |
57 | 0 | { |
58 | 0 | const int d = node->outputs[0]; |
59 | | // If this tensor is alias, or assigned (while loop), or bypassed (case..of), skip. |
60 | 0 | if (tensor_symbol_info[d].alias_ref || tensor_symbol_info[d].assign_ref || tensor_symbol_info[d].bypass_ref || |
61 | 0 | tensor_symbol_info[d].r_assign_ref || tensor_symbol_info[d].r_bypass_ref) |
62 | 0 | continue; |
63 | 0 | tensor_marked[d >> 5] |= (1u << (d & 0x1f)); |
64 | 0 | } else if (ccv_nnc_cmd_is_backward(node->cmd)) |
65 | 0 | for (i = 0; i < node->input_size; i++) |
66 | 0 | { |
67 | 0 | const int d = node->inputs[i]; |
68 | 0 | if (d >= 0) |
69 | 0 | tensor_used[d >> 5] |= (1u << (d & 0x1f)); |
70 | 0 | } |
71 | 0 | } ccv_nnc_graph_visit_endfor |
72 | | // If a tensor is marked but never used in backward pass, no need to reduce it. |
73 | 0 | _ccv_nnc_remove_unused_from_marked(tensor_used, (tensor_symbol_info_size + 31) >> 5, tensor_marked); |
74 | | // If this tensor is pointed to by an alias, we don't want to reconversion. |
75 | 0 | for (i = 0; i < tensor_symbol_info_size; i++) |
76 | 0 | if (tensor_symbol_info[i].alias_ref) |
77 | 0 | { |
78 | 0 | const int d = tensor_symbol_info[i].alias_ref - 1; |
79 | | // unmark. |
80 | 0 | if ((tensor_marked[d >> 5] & (1u << (d & 0x1f)))) |
81 | 0 | tensor_marked[d >> 5] &= ~(1u << (d & 0x1f)); |
82 | 0 | } |
83 | 0 | ccv_nnc_exec_dep_t exec_deps = ccv_nnc_exec_dep_new(graph, visit); |
84 | | // Now tensor_marked only contains the tensors that we think beneficial to reconvert. Find the best place to insert conversion. |
85 | 0 | ccv_nnc_conversion_info_t* const conversion_info = cccalloc(tensor_symbol_info_size, sizeof(ccv_nnc_conversion_info_t)); |
86 | 0 | ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx) { |
87 | 0 | if (node->cmd.cmd == CCV_NNC_DATATYPE_CONVERSION_FORWARD && node->output_size >= 1 && node->outputs[0] >= 0) |
88 | 0 | { |
89 | 0 | const int d = node->outputs[0]; |
90 | 0 | if (d >= 0 && (tensor_marked[d >> 5] & (1u << (d & 0x1f)))) |
91 | 0 | { |
92 | 0 | conversion_info[d].original = node->inputs[0]; |
93 | 0 | if (!conversion_info[d].old_conversion_nodes) |
94 | 0 | conversion_info[d].old_conversion_nodes = ccv_array_new(sizeof(int), 0, 0); |
95 | 0 | ccv_array_add_unique_int(conversion_info[d].old_conversion_nodes, idx); |
96 | 0 | } |
97 | 0 | } else if (ccv_nnc_cmd_is_backward(node->cmd)) |
98 | 0 | for (i = 0; i < node->input_size; i++) |
99 | 0 | { |
100 | 0 | const int d = node->inputs[i]; |
101 | 0 | if (d >= 0 && (tensor_marked[d >> 5] & (1u << (d & 0x1f)))) |
102 | 0 | { |
103 | 0 | if (!conversion_info[d].reconversion.nodes) |
104 | 0 | conversion_info[d].reconversion.nodes = ccv_array_new(sizeof(int), 0, 0); |
105 | 0 | ccv_array_add_unique_int(conversion_info[d].reconversion.nodes, idx); |
106 | 0 | } |
107 | 0 | } |
108 | 0 | } ccv_nnc_graph_visit_endfor |
109 | 0 | for (i = 0; i < tensor_symbol_info_size; i++) |
110 | 0 | { |
111 | 0 | if (!conversion_info[i].reconversion.nodes) |
112 | 0 | continue; |
113 | | // Check to see if it is beneficial for reconversion (i.e. the output is larger than the input). |
114 | 0 | const int original_datatype = tensor_symbol_info[conversion_info[i].original].info.datatype; |
115 | 0 | const int converted_datatype = tensor_symbol_info[i].info.datatype; |
116 | 0 | if (CCV_GET_DATA_TYPE_SIZE(original_datatype) >= CCV_GET_DATA_TYPE_SIZE(converted_datatype)) |
117 | 0 | continue; |
118 | | // If we have more than one destination, need to find the common ancestor. |
119 | 0 | ccv_array_t* const nodes = conversion_info[i].reconversion.nodes; |
120 | 0 | ccv_array_t* const old_conversion_nodes = conversion_info[i].old_conversion_nodes; |
121 | 0 | assert(nodes->rnum > 0); |
122 | 0 | assert(old_conversion_nodes && old_conversion_nodes->rnum > 0); |
123 | 0 | int flag = 0; |
124 | 0 | for (j = 0; j < nodes->rnum; j++) |
125 | 0 | { |
126 | 0 | const int d = *(int*)ccv_array_get(nodes, j); |
127 | 0 | ccv_sparse_matrix_vector_t* const vector = ccv_get_sparse_matrix_vector(exec_deps.deps, d); |
128 | 0 | assert(vector); |
129 | 0 | for (k = 0; k < old_conversion_nodes->rnum; k++) |
130 | 0 | { |
131 | 0 | const int dd = *(int*)ccv_array_get(old_conversion_nodes, k); |
132 | 0 | const int hop = ccv_nnc_exec_dep_hop(exec_deps, d, vector, dd); |
133 | 0 | if (hop >= 0 && hop <= 3) |
134 | 0 | flag = 1; |
135 | 0 | } |
136 | 0 | if (flag) |
137 | 0 | break; |
138 | 0 | } |
139 | | // If there is no need to reconvert. Abort the whole thing. |
140 | 0 | if (flag) |
141 | 0 | continue; |
142 | 0 | ccv_array_t* const reconversion_sources = ccv_array_new(sizeof(int), 0, 0); |
143 | 0 | for (j = 0; j < nodes->rnum; j++) |
144 | 0 | { |
145 | 0 | const int d = *(int*)ccv_array_get(nodes, j); |
146 | 0 | ccv_array_t* const outgoings = reversed_nodes[d].outgoings; |
147 | 0 | if (!outgoings) |
148 | 0 | continue; |
149 | 0 | int x, y; |
150 | 0 | for (x = 0; x < outgoings->rnum; x++) |
151 | 0 | { |
152 | 0 | const int dd = *(int*)ccv_array_get(outgoings, x); |
153 | 0 | int flag = 0; |
154 | 0 | for (y = 0; !flag && y < nodes->rnum; y++) |
155 | 0 | { |
156 | 0 | if (j == y) |
157 | 0 | continue; |
158 | 0 | const int ddd = *(int*)ccv_array_get(nodes, y); |
159 | | // If the outgoing is one of the nodes, we cannot add it as source. |
160 | 0 | if (ddd == dd) |
161 | 0 | { |
162 | 0 | flag = 1; |
163 | 0 | continue; |
164 | 0 | } |
165 | | // Check dependencies, if there is a dependency from y node to dd, dd cannot be source. |
166 | 0 | const int checked = ccv_nnc_exec_dep_check(exec_deps, dd, 0, ddd); |
167 | 0 | if (checked) |
168 | 0 | flag = 1; |
169 | 0 | } |
170 | 0 | if (!flag) |
171 | 0 | ccv_array_add_unique_int(reconversion_sources, dd); |
172 | 0 | } |
173 | 0 | } |
174 | | // If there is no sources. Abort the whole thing. |
175 | 0 | if (reconversion_sources->rnum == 0) |
176 | 0 | { |
177 | 0 | ccv_array_free(reconversion_sources); |
178 | 0 | continue; |
179 | 0 | } |
180 | | // Mark it as ready to be compressed. |
181 | 0 | conversion_info[i].reconversion.sources = reconversion_sources; |
182 | 0 | conversion_info[i].info = tensor_symbol_info[i].info; |
183 | 0 | conversion_info[i].okay = 1; |
184 | 0 | } |
185 | | // Do the graph mutation now based on the conversion info. |
186 | 0 | for (i = 0; i < tensor_symbol_info_size; i++) |
187 | 0 | if (conversion_info[i].okay) |
188 | 0 | { |
189 | 0 | const ccv_nnc_tensor_symbol_t reconverted = ccv_nnc_tensor_symbol_new(graph, conversion_info[i].info, 0); |
190 | 0 | const ccv_nnc_tensor_symbol_t original = (ccv_nnc_tensor_symbol_t){ |
191 | 0 | .graph = graph, |
192 | 0 | .d = conversion_info[i].original |
193 | 0 | }; |
194 | 0 | const ccv_nnc_graph_exec_symbol_t reconversion_node = ccv_nnc_graph_exec_symbol_new(graph, CMD_DATATYPE_CONVERSION_FORWARD(), TENSOR_SYMBOL_LIST(original), TENSOR_SYMBOL_LIST(reconverted), 0); |
195 | 0 | ccv_array_t* const nodes = conversion_info[i].reconversion.nodes; |
196 | 0 | assert(nodes && nodes->rnum > 0); |
197 | 0 | ccv_array_t* const sources = conversion_info[i].reconversion.sources; |
198 | 0 | assert(sources && sources->rnum > 0); |
199 | 0 | for (j = 0; j < sources->rnum; j++) |
200 | 0 | { |
201 | 0 | const int d = *(int*)ccv_array_get(sources, j); |
202 | 0 | ccv_nnc_graph_exec_symbol_concat(graph, (ccv_nnc_graph_exec_symbol_t){ |
203 | 0 | .graph = graph, |
204 | 0 | .d = d, |
205 | 0 | }, reconversion_node); |
206 | 0 | } |
207 | 0 | for (j = 0; j < nodes->rnum; j++) |
208 | 0 | { |
209 | 0 | const int d = *(int*)ccv_array_get(nodes, j); |
210 | 0 | ccv_nnc_graph_exec_symbol_concat(graph, reconversion_node, (ccv_nnc_graph_exec_symbol_t){ |
211 | 0 | .graph = graph, |
212 | 0 | .d = d |
213 | 0 | }); |
214 | 0 | ccv_nnc_graph_exec_symbol_info_t* const destination_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, d); |
215 | 0 | for (k = 0; k < destination_info->input_size; k++) |
216 | 0 | if (destination_info->inputs[k] == i) |
217 | 0 | destination_info->inputs[k] = reconverted.d; |
218 | 0 | } |
219 | 0 | } |
220 | 0 | ccv_nnc_graph_visit_free(visit); |
221 | 0 | ccv_nnc_exec_dep_free(exec_deps); |
222 | 0 | ccfree(tensor_marked); |
223 | 0 | for (i = 0; i < tensor_symbol_info_size; i++) |
224 | 0 | { |
225 | 0 | if (conversion_info[i].old_conversion_nodes) |
226 | 0 | ccv_array_free(conversion_info[i].old_conversion_nodes); |
227 | 0 | if (conversion_info[i].reconversion.nodes) |
228 | 0 | ccv_array_free(conversion_info[i].reconversion.nodes); |
229 | 0 | if (conversion_info[i].reconversion.sources) |
230 | 0 | ccv_array_free(conversion_info[i].reconversion.sources); |
231 | 0 | } |
232 | 0 | for (i = 0; i < exec_symbol_info_size; i++) |
233 | 0 | if (reversed_nodes[i].outgoings) |
234 | 0 | ccv_array_free(reversed_nodes[i].outgoings); |
235 | 0 | ccfree(reversed_nodes); |
236 | 0 | ccfree(conversion_info); |
237 | 0 | } |