/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor_tape.c
Line | Count | Source |
1 | | /********************************************************** |
2 | | * C-based/Cached/Core Computer Vision Library |
3 | | * Liu Liu, 2010-02-01 |
4 | | **********************************************************/ |
5 | | |
6 | | /********************************************************** |
7 | | * CCV - Neural Network Collection |
8 | | **********************************************************/ |
9 | | |
10 | | #include "_ccv_nnc_tensor_tape.h" |
11 | | #include "_ccv_nnc_graph.h" |
12 | | #include "ccv_nnc_internal.h" |
13 | | #include "ccv_nnc_easy.h" |
14 | | #ifdef HAVE_CUDA |
15 | | #include "gpu/ccv_nnc_compat.h" |
16 | | #elif defined(HAVE_MPS) |
17 | | #include "mps/ccv_nnc_mps.h" |
18 | | #endif |
19 | | |
20 | | ccv_nnc_tensor_tape_t* ccv_nnc_tensor_tape_new(void) |
21 | 4 | { |
22 | 4 | ccv_nnc_tensor_tape_t* tape = (ccv_nnc_tensor_tape_t*)ccmalloc(sizeof(ccv_nnc_tensor_tape_t)); |
23 | 4 | tape->tensor_data = ccv_array_new(sizeof(ccv_nnc_tape_tensor_data_array_t), 0, 0); |
24 | 4 | tape->exec_data = ccv_array_new(sizeof(ccv_nnc_tape_exec_data_array_t), 0, 0); |
25 | 4 | return tape; |
26 | 4 | } |
27 | | |
28 | | static ccv_nnc_tensor_t* _ccv_nnc_tensor_from_tensor_multiview(const ccv_nnc_graph_t* const* const graphs, const int graph_size, ccv_nnc_tensor_multiview_t* const mv) |
29 | 0 | { |
30 | 0 | int i; |
31 | 0 | ccv_nnc_tensor_t* tensor = (ccv_nnc_tensor_t*)mv; |
32 | 0 | for (i = 0; CCV_IS_TENSOR_MULTIVIEW(tensor) && i < graph_size; i++) |
33 | 0 | { |
34 | 0 | const int count = (int)graphs[i]->while_count; |
35 | 0 | while (CCV_IS_TENSOR_MULTIVIEW(tensor) && |
36 | 0 | (((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graphs[i] || |
37 | 0 | ((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graphs[i]->pair)) |
38 | 0 | { |
39 | 0 | ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor; |
40 | 0 | const int off = mv->kind; |
41 | 0 | const int mod = mv->repeat; |
42 | | // If reached the root. |
43 | 0 | tensor = CCV_NNC_MULTIVIEW_DATA(mv)[count >= off ? ((count - off) % mod) + off : count]; // Unwrap. |
44 | 0 | } |
45 | 0 | } |
46 | 0 | return tensor; |
47 | 0 | } |
48 | | |
49 | 62 | #define CCV_NNC_IS_TAPE_TENSOR_DATA_ARRAY_POS(ptr) ((uintptr_t)(ptr) & 1) |
50 | 33 | #define CCV_NUMERIC_DATA_NO_ALLOC(data) ((uintptr_t)(data.u8) & 1) |
51 | | // Align integer to 16-bytes. |
52 | 98 | #define ALIGN_16(x) (((x) + 3) & -4) |
53 | | |
54 | | // Simple allocator from ccv_array_t. |
55 | | static void _ccv_nnc_tape_tensor_data_array_pos_new(ccv_array_t* const tensor_data, int* const pos_ref, ccv_nnc_tape_tensor_data_array_t** const tape_tensor_data_ref) |
56 | 8 | { |
57 | 8 | int pos = tensor_data->rnum; |
58 | 8 | ccv_array_resize(tensor_data, pos + 1); |
59 | 8 | *pos_ref = (pos << 1) | 1; |
60 | 8 | ccv_nnc_tape_tensor_data_array_t* const tape_tensor_data = (ccv_nnc_tape_tensor_data_array_t*)ccv_array_get(tensor_data, pos); |
61 | 8 | memset(tape_tensor_data, 0, sizeof(ccv_nnc_tape_tensor_data_array_t)); |
62 | 8 | *tape_tensor_data_ref = tape_tensor_data; |
63 | 8 | } |
64 | | |
65 | | static ccv_nnc_tape_tensor_data_array_t* _ccv_nnc_tape_tensor_data_array_get(const ccv_array_t* const tensor_data, const int pos) |
66 | 62 | { |
67 | 62 | assert((pos >> 1) <= tensor_data->rnum); |
68 | 62 | return (ccv_nnc_tape_tensor_data_array_t*)ccv_array_get(tensor_data, pos >> 1); |
69 | 62 | } |
70 | | |
71 | | static void _ccv_nnc_tape_tensor_data_move(ccv_nnc_tape_tensor_data_t* const old_data, ccv_nnc_tape_tensor_data_t* const new_data, const int offset, const ccv_nnc_graph_t* const* const graphs, const int graph_size, const int* const dim, const int dim_count) |
72 | 69 | { |
73 | 69 | int i; |
74 | 69 | if (offset == ccv_max(dim_count, graph_size) - 1) |
75 | 47 | { |
76 | 47 | const int data_dim = offset < dim_count ? dim[offset] - 145 : 02 ; |
77 | 47 | const int graph_dim = offset < graph_size ? graphs[offset]->while_count + 1 : 00 ; |
78 | 47 | assert(old_data <= new_data); |
79 | | // Do the actual copy or set. |
80 | 47 | if (!old_data) |
81 | 11 | for (i = 2 ccv_max2 (data_dim, graph_dim); i >= 0; i--9 ) |
82 | 9 | new_data[i].data.u8 = 0; |
83 | 45 | else { |
84 | 90 | for (i = graph_dim; i > data_dim; i--45 ) |
85 | 45 | new_data[i].data.u8 = 0; |
86 | 203 | for (i = data_dim; i >= 0; i--158 ) |
87 | 158 | new_data[i] = old_data[i]; |
88 | 45 | } |
89 | 47 | } else { |
90 | 22 | int old_data_step = 1; |
91 | 43 | for (i = offset + 1; i < dim_count; i++21 ) |
92 | 21 | old_data_step *= dim[i]; |
93 | 22 | const int new_dim_count = ccv_max(graph_size, dim_count); |
94 | 22 | int new_data_step = 1; |
95 | 44 | for (i = offset + 1; i < new_dim_count; i++22 ) |
96 | 22 | { |
97 | 22 | int old_dim = (i < dim_count) ? dim[i]21 : 11 ; |
98 | 22 | int graph_dim = (i < graph_size) ? (int)(graphs[i]->while_count + 2) : 10 ; |
99 | 22 | new_data_step *= ccv_max(old_dim, graph_dim); |
100 | 22 | } |
101 | 22 | const int data_dim = offset < dim_count ? dim[offset] - 1 : 00 ; |
102 | 22 | const int graph_dim = offset < graph_size ? graphs[offset]->while_count + 1 : 00 ; |
103 | 69 | for (i = ccv_max22 (data_dim, graph_dim); i >= 0; i--47 ) |
104 | 47 | _ccv_nnc_tape_tensor_data_move((old_data && offset < dim_count && i < dim[offset]) ? old_data + i * old_data_step45 : 02 , new_data + i * new_data_step, offset + 1, graphs, graph_size, dim, dim_count); |
105 | 22 | } |
106 | 69 | } |
107 | | |
108 | | static void _ccv_nnc_tape_tensor_data_array_resize(ccv_nnc_tape_tensor_data_array_t* const data_array, const ccv_nnc_graph_t* const* const graphs, const int graph_size) |
109 | 22 | { |
110 | 22 | const int new_dim_count = ccv_max(graph_size, data_array->dim_count); |
111 | 22 | int i; |
112 | 22 | int size = 1; |
113 | 66 | for (i = 0; i < new_dim_count; i++44 ) |
114 | 44 | { |
115 | 44 | int old_dim = (i < data_array->dim_count) ? data_array->dim[i]43 : 11 ; |
116 | 44 | int graph_dim = (i < graph_size) ? (int)(graphs[i]->while_count + 2) : 10 ; |
117 | 44 | size *= ccv_max(old_dim, graph_dim); |
118 | 44 | } |
119 | 22 | data_array->dim = ccrealloc(data_array->dim, sizeof(int) * ALIGN_16(new_dim_count) + sizeof(ccv_nnc_tape_tensor_data_t) * size); |
120 | 22 | ccv_nnc_tape_tensor_data_t* const old_data = (ccv_nnc_tape_tensor_data_t*)(data_array->dim + ALIGN_16(data_array->dim_count)); |
121 | 22 | ccv_nnc_tape_tensor_data_t* const new_data = (ccv_nnc_tape_tensor_data_t*)(data_array->dim + ALIGN_16(new_dim_count)); |
122 | | // Note that both old_data and new_data occupies the same memory region, since the resize operation |
123 | | // is mono-increasing, we just need to move the data from the end to the beginning to avoid data |
124 | | // overwrite issues. |
125 | 22 | assert(graph_size > 0); |
126 | 22 | assert(data_array->dim_count > 0); |
127 | 22 | _ccv_nnc_tape_tensor_data_move(old_data, new_data, 0, graphs, graph_size, data_array->dim, data_array->dim_count); |
128 | 22 | data_array->data = new_data; |
129 | | // We are done, update the dim. |
130 | 66 | for (i = 0; i < new_dim_count; i++44 ) |
131 | 44 | { |
132 | 44 | int old_dim = (i < data_array->dim_count) ? data_array->dim[i]43 : 11 ; |
133 | 44 | int graph_dim = (i < graph_size) ? (int)(graphs[i]->while_count + 2) : 10 ; |
134 | 44 | data_array->dim[i] = ccv_max(old_dim, graph_dim); |
135 | 44 | } |
136 | 22 | data_array->dim_count = new_dim_count; |
137 | 22 | } |
138 | | |
139 | | static void _ccv_nnc_tensor_from_tape(ccv_array_t* const tensor_data, ccv_nnc_tensor_t* const tensor, const int flags, const ccv_nnc_graph_t* const* const graphs, const int graph_size, const int create_if_missing) |
140 | 70 | { |
141 | 70 | assert(graph_size > 0); |
142 | 70 | ccv_nnc_tensor_t* tensor_ref = tensor; |
143 | 70 | while (tensor_ref->alias_ref && !62 CCV_NNC_IS_TAPE_TENSOR_DATA_ARRAY_POS62 (tensor_ref->alias_ref)) |
144 | 0 | { |
145 | 0 | tensor_ref = (ccv_nnc_tensor_t*)tensor->alias_ref; |
146 | 0 | if (CCV_IS_TENSOR_MULTIVIEW(tensor_ref)) |
147 | 0 | tensor_ref = _ccv_nnc_tensor_from_tensor_multiview(graphs, graph_size, (ccv_nnc_tensor_multiview_t*)tensor_ref); |
148 | 0 | } |
149 | 70 | ccv_nnc_tape_tensor_data_array_t* data_array; |
150 | 70 | if (!tensor_ref->alias_ref) |
151 | 8 | { |
152 | | // Create data array. |
153 | 8 | int pos; |
154 | 8 | _ccv_nnc_tape_tensor_data_array_pos_new(tensor_data, &pos, &data_array); |
155 | 8 | tensor_ref->alias_ref = pos; |
156 | 8 | } else |
157 | 62 | data_array = _ccv_nnc_tape_tensor_data_array_get(tensor_data, (int)tensor_ref->alias_ref); |
158 | | // Either the data exists, or it doesn't and we need to create one. |
159 | 70 | int i; |
160 | 70 | if (!data_array->dim) |
161 | 8 | { |
162 | 8 | int size = 1; |
163 | 23 | for (i = 0; i < graph_size; i++15 ) |
164 | 15 | size *= (int)(graphs[i]->while_count + 2); |
165 | 8 | data_array->dim_count = graph_size; |
166 | 8 | data_array->dim = (int*)ccmalloc(sizeof(int) * ALIGN_16(graph_size) + sizeof(ccv_nnc_tape_tensor_data_t) * size); |
167 | 23 | for (i = 0; i < graph_size; i++15 ) |
168 | 15 | data_array->dim[i] = (int)(graphs[i]->while_count + 2); |
169 | 8 | data_array->data = (ccv_nnc_tape_tensor_data_t*)(data_array->dim + ALIGN_16(graph_size)); |
170 | 42 | for (i = 0; i < size; i++34 ) |
171 | 34 | data_array->data[i].data.u8 = 0; |
172 | 62 | } else { |
173 | 62 | int flag = (data_array->dim_count < graph_size); |
174 | 181 | for (i = 0; !flag && i < graph_size159 ; i++119 ) |
175 | 119 | flag = (data_array->dim[i] <= graphs[i]->while_count + 1); |
176 | 62 | if (flag) |
177 | 22 | _ccv_nnc_tape_tensor_data_array_resize(data_array, graphs, graph_size); |
178 | 62 | } |
179 | | // Compute the index. |
180 | 70 | int idx, step; |
181 | 70 | idx = (graphs[graph_size - 1]->while_count + 1); |
182 | 70 | step = data_array->dim[graph_size - 1]; |
183 | 138 | for (i = graph_size - 2; i >= 0; i--68 ) |
184 | 68 | { |
185 | 68 | idx += (graphs[i]->while_count + 1) * step; |
186 | 68 | step *= data_array->dim[i]; |
187 | 68 | } |
188 | 70 | ccv_numeric_data_t data = data_array->data[idx].data; |
189 | 70 | if (!data.u8) |
190 | 33 | { |
191 | | // If we cannot create, loop back idx until we find one that exists. |
192 | 33 | if (!create_if_missing) |
193 | 11 | { |
194 | 11 | if (data_array->data[idx].data.u8) |
195 | 0 | data.u8 = (unsigned char*)((uintptr_t)data_array->data[idx].data.u8 | (uintptr_t)1); |
196 | 11 | else |
197 | | // Now looped back to 0, if still cannot find, using the original pointer. |
198 | 11 | data.u8 = data_array->data[idx].data.u8 = (unsigned char*)((uintptr_t)tensor_ref->data.u8 | (uintptr_t)1); |
199 | 22 | } else { |
200 | 22 | const size_t size = ccv_nnc_tensor_data_size(tensor->info); |
201 | 22 | data_array->data[idx].type = tensor->info.type; |
202 | 22 | #ifdef HAVE_CUDA |
203 | 22 | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY) |
204 | 0 | data_array->data[idx].data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), size); |
205 | 22 | else |
206 | 22 | ccmemalign((void **)&data_array->data[idx].data.u8, 64, size); |
207 | | #elif defined(HAVE_MPS) |
208 | | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY) |
209 | | data_array->data[idx].data.u8 = (uint8_t*)mpobjmalloc(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), size); |
210 | | else |
211 | | ccmemalign((void **)&data_array->data[idx].data.u8, 64, size); |
212 | | #else |
213 | | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY); |
214 | | ccmemalign((void **)&data_array->data[idx].data.u8, 64, size); |
215 | | #endif |
216 | 22 | data = data_array->data[idx].data; |
217 | 22 | } |
218 | 33 | } |
219 | 70 | tensor->data.u8 = (unsigned char*)((uintptr_t)data.u8 & ~(uintptr_t)1); |
220 | 70 | } |
221 | | |
222 | | void ccv_nnc_tensor_tape_io(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph, const int* const input_flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, const int* const output_flags, ccv_nnc_tensor_t* const* const outputs, const int output_size) |
223 | 90 | { |
224 | 90 | int i, tape_io = 0; |
225 | 187 | for (i = 0; i < input_size && !tape_io119 ; i++97 ) |
226 | 97 | if (inputs[i] && CCV_GET_TAPE_ALLOC94 (inputs[i]->type)) |
227 | 39 | tape_io = 1; |
228 | 121 | for (i = 0; i < output_size && !tape_io64 ; i++31 ) |
229 | 31 | if (outputs[i] && CCV_GET_TAPE_ALLOC(outputs[i]->type)) |
230 | 8 | tape_io = 1; |
231 | | // If doesn't need to update with tape io, just pointing to the inputs and outputs directly. |
232 | 90 | if (!tape_io) |
233 | 43 | return; |
234 | | // Go to the root graph, record which was taken along the way. |
235 | | // In this way, we can then unwrap multi-view tensors. |
236 | 90 | assert(graph)47 ; |
237 | 47 | const ccv_nnc_graph_t* curr_graph = graph; |
238 | 47 | int d; |
239 | 139 | for (d = 0; curr_graph; d++92 ) |
240 | 92 | curr_graph = curr_graph->p; |
241 | 47 | curr_graph = graph; |
242 | 47 | const int graph_size = d; |
243 | 47 | assert(graph_size > 0); |
244 | 47 | const ccv_nnc_graph_t* graphs[graph_size]; |
245 | 139 | for (d = graph_size - 1; curr_graph; d--, curr_graph = curr_graph->p92 ) |
246 | 92 | graphs[d] = curr_graph; |
247 | | // Now, go through the inputs / outputs and update. |
248 | 147 | for (i = 0; i < input_size; i++100 ) |
249 | 100 | if (inputs[i] && CCV_GET_TAPE_ALLOC95 (inputs[i]->type)) |
250 | 48 | _ccv_nnc_tensor_from_tape(tape->tensor_data, inputs[i], input_flags ? input_flags[i]29 : 019 , graphs, graph_size, 0); |
251 | 99 | for (i = 0; i < output_size; i++52 ) |
252 | 52 | if (outputs[i] && CCV_GET_TAPE_ALLOC(outputs[i]->type)) |
253 | 22 | _ccv_nnc_tensor_from_tape(tape->tensor_data, outputs[i], output_flags ? output_flags[i]11 : 011 , graphs, graph_size, 1); // Create if it is not found. This is OK for output tensor. |
254 | 47 | } |
255 | | |
256 | | #define CCV_NNC_IS_TAPE_EXEC_DATA_ARRAY_POS(ptr) ((uintptr_t)(ptr) & 1) |
257 | | |
258 | | // Simple allocator from ccv_array_t. |
259 | | static void _ccv_nnc_tape_exec_data_array_pos_new(ccv_array_t* const exec_data, int* const pos_ref, ccv_nnc_tape_exec_data_array_t** const tape_exec_data_ref) |
260 | 5 | { |
261 | 5 | int pos = exec_data->rnum; |
262 | 5 | ccv_array_resize(exec_data, pos + 1); |
263 | 5 | *pos_ref = (pos << 1) | 1; |
264 | 5 | ccv_nnc_tape_exec_data_array_t* const tape_exec_data = (ccv_nnc_tape_exec_data_array_t*)ccv_array_get(exec_data, pos); |
265 | 5 | memset(tape_exec_data, 0, sizeof(ccv_nnc_tape_exec_data_array_t)); |
266 | 5 | *tape_exec_data_ref = tape_exec_data; |
267 | 5 | } |
268 | | |
269 | | static ccv_nnc_tape_exec_data_array_t* _ccv_nnc_tape_exec_data_array_get(const ccv_array_t* const exec_data, const int pos) |
270 | 30 | { |
271 | 30 | assert((pos >> 1) <= exec_data->rnum); |
272 | 30 | return (ccv_nnc_tape_exec_data_array_t*)ccv_array_get(exec_data, pos >> 1); |
273 | 30 | } |
274 | | |
275 | | static void _ccv_nnc_tape_exec_data_move(uint64_t* const old_data, uint64_t* const new_data, const int offset, const uint64_t* const while_counts, const int graph_size, const int* const dim, const int dim_count) |
276 | 6 | { |
277 | 6 | int i; |
278 | 6 | if (offset == ccv_max(dim_count, graph_size) - 1) |
279 | 4 | { |
280 | 4 | const int data_dim = offset < dim_count ? dim[offset] - 1 : 00 ; |
281 | 4 | const int graph_dim = offset < graph_size ? while_counts[offset] : 00 ; |
282 | 4 | assert(old_data <= new_data); |
283 | | // Do the actual copy or set. |
284 | 4 | if (!old_data) |
285 | 0 | for (i = ccv_max(data_dim, graph_dim); i >= 0; i--) |
286 | 0 | new_data[i] = 0; |
287 | 4 | else { |
288 | 8 | for (i = graph_dim; i > data_dim; i--4 ) |
289 | 4 | new_data[i] = 0; |
290 | 14 | for (i = data_dim; i >= 0; i--10 ) |
291 | 10 | new_data[i] = old_data[i]; |
292 | 4 | } |
293 | 4 | } else { |
294 | 2 | int old_data_step = 1; |
295 | 4 | for (i = offset + 1; i < dim_count; i++2 ) |
296 | 2 | old_data_step *= dim[i]; |
297 | 2 | const int new_dim_count = ccv_max(graph_size, dim_count); |
298 | 2 | int new_data_step = 1; |
299 | 4 | for (i = offset + 1; i < new_dim_count; i++2 ) |
300 | 2 | { |
301 | 2 | int old_dim = (i < dim_count) ? dim[i] : 10 ; |
302 | 2 | int graph_dim = (i < graph_size) ? (int)(while_counts[i] + 1) : 10 ; |
303 | 2 | new_data_step *= ccv_max(old_dim, graph_dim); |
304 | 2 | } |
305 | 2 | const int data_dim = offset < dim_count ? dim[offset] - 1 : 00 ; |
306 | 2 | const int graph_dim = offset < graph_size ? while_counts[offset] : 00 ; |
307 | 6 | for (i = ccv_max2 (data_dim, graph_dim); i >= 0; i--4 ) |
308 | 4 | _ccv_nnc_tape_exec_data_move((old_data && offset < dim_count && i < dim[offset]) ? old_data + i * old_data_step : 00 , new_data + i * new_data_step, offset + 1, while_counts, graph_size, dim, dim_count); |
309 | 2 | } |
310 | 6 | } |
311 | | |
312 | | static void _ccv_nnc_tape_exec_data_array_resize(ccv_nnc_tape_exec_data_array_t* const data_array, const uint64_t* const while_counts, const int graph_size) |
313 | 2 | { |
314 | 2 | const int new_dim_count = ccv_max(graph_size, data_array->dim_count); |
315 | 2 | int i; |
316 | 2 | int size = 1; |
317 | 6 | for (i = 0; i < new_dim_count; i++4 ) |
318 | 4 | { |
319 | 4 | int old_dim = (i < data_array->dim_count) ? data_array->dim[i] : 10 ; |
320 | 4 | int graph_dim = (i < graph_size) ? (int)(while_counts[i] + 1) : 10 ; |
321 | 4 | size *= ccv_max(old_dim, graph_dim); |
322 | 4 | } |
323 | 2 | data_array->dim = ccrealloc(data_array->dim, sizeof(int) * ALIGN_16(new_dim_count) + sizeof(uint64_t) * size); |
324 | 2 | uint64_t* const old_data = (uint64_t*)(data_array->dim + ALIGN_16(data_array->dim_count)); |
325 | 2 | uint64_t* const new_data = (uint64_t*)(data_array->dim + ALIGN_16(new_dim_count)); |
326 | | // Note that both old_data and new_data occupies the same memory region, since the resize operation |
327 | | // is mono-increasing, we just need to move the data from the end to the beginning to avoid data |
328 | | // overwrite issues. |
329 | 2 | assert(graph_size > 0); |
330 | 2 | assert(data_array->dim_count > 0); |
331 | 2 | _ccv_nnc_tape_exec_data_move(old_data, new_data, 0, while_counts, graph_size, data_array->dim, data_array->dim_count); |
332 | 2 | data_array->data = new_data; |
333 | | // We are done, update the dim. |
334 | 6 | for (i = 0; i < new_dim_count; i++4 ) |
335 | 4 | { |
336 | 4 | int old_dim = (i < data_array->dim_count) ? data_array->dim[i] : 10 ; |
337 | 4 | int graph_dim = (i < graph_size) ? (int)(while_counts[i] + 1) : 10 ; |
338 | 4 | data_array->dim[i] = ccv_max(old_dim, graph_dim); |
339 | 4 | } |
340 | 2 | data_array->dim_count = new_dim_count; |
341 | 2 | } |
342 | | |
343 | | uint64_t ccv_nnc_tensor_tape_numbering(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec) |
344 | 12 | { |
345 | 12 | assert(exec.graph == graph); |
346 | 12 | ccv_nnc_graph_exec_info_t* exec_info = ccv_array_get(graph->exec_info, exec.d); |
347 | 12 | if (!exec_info->alias_ref && exec_info->pair_ref6 ) |
348 | 0 | exec_info = ccv_array_get(graph->exec_info, exec_info->pair_ref - 1); |
349 | 12 | ccv_nnc_tape_exec_data_array_t* const data_array = _ccv_nnc_tape_exec_data_array_get(tape->exec_data, (int)exec_info->alias_ref); |
350 | 12 | const ccv_nnc_graph_t* curr_graph = graph; |
351 | 12 | int i; |
352 | 28 | for (i = 0; curr_graph; i++16 ) |
353 | 16 | curr_graph = curr_graph->p; |
354 | 12 | curr_graph = graph; |
355 | 12 | const int graph_size = i; |
356 | 12 | uint64_t while_counts[graph_size]; |
357 | 28 | for (i = graph_size - 1; curr_graph; i--, curr_graph = curr_graph->p16 ) |
358 | 16 | while_counts[i] = curr_graph->while_count; |
359 | 12 | assert(graph_size <= data_array->dim_count); |
360 | 12 | int idx = 0, step = 1; |
361 | 28 | for (i = graph_size - 1; i >= 0; i--16 ) |
362 | 16 | { |
363 | 16 | assert(while_counts[i] < data_array->dim[i]); |
364 | 16 | idx += while_counts[i] * step; |
365 | 16 | step *= data_array->dim[i]; |
366 | 16 | } |
367 | 12 | return data_array->data[idx]; |
368 | 12 | } |
369 | | |
370 | | void ccv_nnc_tensor_tape_set_numbering(ccv_nnc_tensor_tape_t* const tape, ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const uint64_t numbering) |
371 | 23 | { |
372 | 23 | ccv_nnc_tape_exec_data_array_t* data_array; |
373 | 23 | assert(exec.graph == graph); |
374 | 23 | ccv_nnc_graph_exec_info_t* const exec_info = ccv_array_get(graph->exec_info, exec.d); |
375 | 23 | if (exec_info->alias_ref) |
376 | 18 | { |
377 | 18 | assert(CCV_NNC_IS_TAPE_EXEC_DATA_ARRAY_POS(exec_info->alias_ref)); |
378 | 18 | data_array = _ccv_nnc_tape_exec_data_array_get(tape->exec_data, (int)exec_info->alias_ref); |
379 | 18 | } else { |
380 | 5 | int pos; |
381 | 5 | _ccv_nnc_tape_exec_data_array_pos_new(tape->exec_data, &pos, &data_array); |
382 | 5 | exec_info->alias_ref = pos; |
383 | 5 | } |
384 | 23 | const ccv_nnc_graph_t* curr_graph = graph; |
385 | 23 | assert(curr_graph); |
386 | 23 | int i; |
387 | 51 | for (i = 0; curr_graph; i++28 ) |
388 | 28 | curr_graph = curr_graph->p; |
389 | 23 | curr_graph = graph; |
390 | 23 | const int graph_size = i; |
391 | 23 | assert(graph_size > 0); |
392 | 23 | uint64_t while_counts[graph_size]; |
393 | 51 | for (i = graph_size - 1; curr_graph; i--, curr_graph = curr_graph->p28 ) |
394 | 28 | while_counts[i] = curr_graph->while_count; |
395 | 23 | if (!data_array->dim) |
396 | 5 | { |
397 | 5 | int size = 1; |
398 | 11 | for (i = 0; i < graph_size; i++6 ) |
399 | 6 | size *= (int)(while_counts[i] + 1); |
400 | 5 | data_array->dim_count = graph_size; |
401 | 5 | data_array->dim = (int*)ccmalloc(sizeof(int) * ALIGN_16(graph_size) + sizeof(uint64_t) * size); |
402 | 11 | for (i = 0; i < graph_size; i++6 ) |
403 | 6 | data_array->dim[i] = (int)(while_counts[i] + 1); |
404 | 5 | data_array->data = (uint64_t*)(data_array->dim + ALIGN_16(graph_size)); |
405 | 14 | for (i = 0; i < size; i++9 ) |
406 | 9 | data_array->data[i] = 0; |
407 | 18 | } else { |
408 | 18 | int flag = (data_array->dim_count < graph_size); |
409 | 40 | for (i = 0; !flag && i < graph_size38 ; i++22 ) |
410 | 22 | flag = (data_array->dim[i] <= while_counts[i]); |
411 | 18 | if (flag) |
412 | 2 | _ccv_nnc_tape_exec_data_array_resize(data_array, while_counts, graph_size); |
413 | 18 | } |
414 | 23 | int idx = 0, step = 1; |
415 | 51 | for (i = graph_size - 1; i >= 0; i--28 ) |
416 | 28 | { |
417 | 28 | assert(while_counts[i] < data_array->dim[i]); |
418 | 28 | idx += while_counts[i] * step; |
419 | 28 | step *= data_array->dim[i]; |
420 | 28 | } |
421 | 23 | data_array->data[idx] = numbering; |
422 | 23 | } |
423 | | |
424 | | void ccv_nnc_tensor_tape_free(ccv_nnc_tensor_tape_t* const tape) |
425 | 4 | { |
426 | 4 | int i, j; |
427 | 12 | for (i = 0; i < tape->tensor_data->rnum; i++8 ) |
428 | 8 | { |
429 | 8 | ccv_nnc_tape_tensor_data_array_t* const data_array = (ccv_nnc_tape_tensor_data_array_t*)ccv_array_get(tape->tensor_data, i); |
430 | 8 | if (data_array->dim) |
431 | 8 | { |
432 | 8 | int size = 1; |
433 | 24 | for (j = 0; j < data_array->dim_count; j++16 ) |
434 | 16 | size *= data_array->dim[j]; |
435 | 96 | for (j = 0; j < size; j++88 ) |
436 | 88 | if (data_array->data[j].data.u8 && !33 CCV_NUMERIC_DATA_NO_ALLOC33 (data_array->data[j].data)) |
437 | 22 | { |
438 | 22 | #ifdef HAVE_CUDA |
439 | 22 | if (CCV_TENSOR_GET_MEMORY(data_array->data[j].type) == CCV_TENSOR_GPU_MEMORY) |
440 | 0 | cufree(CCV_TENSOR_GET_DEVICE_ID(data_array->data[j].type), data_array->data[j].data.u8); |
441 | 22 | else |
442 | 22 | ccfree(data_array->data[j].data.u8); |
443 | | #else |
444 | | ccfree(data_array->data[j].data.u8); |
445 | | #endif |
446 | 22 | } |
447 | 8 | ccfree(data_array->dim); |
448 | 8 | } |
449 | 8 | } |
450 | 4 | ccv_array_free(tape->tensor_data); |
451 | 9 | for (i = 0; i < tape->exec_data->rnum; i++5 ) |
452 | 5 | { |
453 | 5 | ccv_nnc_tape_exec_data_array_t* const data_array = (ccv_nnc_tape_exec_data_array_t*)ccv_array_get(tape->exec_data, i); |
454 | 5 | if (data_array->dim) |
455 | 5 | ccfree(data_array->dim); |
456 | 5 | } |
457 | 4 | ccv_array_free(tape->exec_data); |
458 | 4 | ccfree(tape); |
459 | 4 | } |