/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "ccv_nnc.h" |
2 | | #include "ccv_nnc_easy.h" |
3 | | #include "ccv_nnc_internal.h" |
4 | | #ifdef HAVE_CUDA |
5 | | #include "gpu/ccv_nnc_compat.h" |
6 | | #elif defined(HAVE_MPS) |
7 | | #include "mps/ccv_nnc_mps.h" |
8 | | #endif |
9 | | #include <fcntl.h> |
10 | | #include <sys/mman.h> |
11 | | |
12 | | // MARK - Level-1 API |
13 | | |
14 | | const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC] = {0}; |
15 | | |
16 | | ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags) |
17 | 49.1k | { |
18 | 49.1k | ccv_nnc_tensor_t* tensor; |
19 | | // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t). |
20 | 49.1k | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC45.9k && params.dim[2] > 039.9k && params.dim[2] <= 5.06k CCV_MAX_CHANNEL5.06k && params.dim[0] > 05.06k && params.dim[1] > 05.06k && params.dim[3] == 05.06k ); |
21 | 49.1k | if (ptr || (flags & CCV_NO_DATA_ALLOC)47.4k ) |
22 | 1.68k | { |
23 | 1.68k | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
24 | 1.68k | tensor->dataof = 0; |
25 | 1.68k | tensor->alias_ref = 0; |
26 | 1.68k | tensor->sig = 0; |
27 | 1.68k | tensor->refcount = 1; |
28 | 1.68k | tensor->info = params; |
29 | 1.68k | if (tfb) |
30 | 59 | { |
31 | 59 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
32 | | // This corresponding to mat->step |
33 | 59 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
34 | 59 | } else // This won't be recognized by ccv_dense_matrix_t |
35 | 1.62k | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
36 | 1.68k | tensor->data.u8 = (uint8_t*)ptr; |
37 | 1.68k | return tensor; |
38 | 1.68k | } |
39 | 47.4k | if (flags & CCV_TENSOR_CPU_MEMORY) |
40 | 0 | { |
41 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
42 | 47.4k | } else if (flags & CCV_TENSOR_GPU_MEMORY) { |
43 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY); |
44 | 0 | } |
45 | 47.4k | const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64; |
46 | 47.4k | const size_t size = ccv_nnc_tensor_data_size(params); |
47 | 47.4k | #ifdef HAVE_CUDA |
48 | 47.4k | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
49 | 2.77k | { |
50 | 2.77k | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
51 | 2.77k | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
52 | 2.77k | if (size > 0) |
53 | 2.77k | tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size); |
54 | 0 | else |
55 | 0 | tensor->data.u8 = 0; |
56 | 44.7k | } else { |
57 | 44.7k | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
58 | 44.7k | ccmemalign((void **)&tensor, 64, tensor_hdr_size + size); |
59 | 44.7k | if (size > 0) |
60 | 44.7k | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; |
61 | 0 | else |
62 | 0 | tensor->data.u8 = 0; |
63 | 44.7k | } |
64 | | #elif defined(HAVE_MPS) |
65 | | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
66 | | { |
67 | | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
68 | | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
69 | | if (size > 0) |
70 | | tensor->data.u8 = (uint8_t*)mpobjmalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size); |
71 | | else |
72 | | tensor->data.u8 = 0; |
73 | | } else { |
74 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
75 | | ccmemalign((void **)&tensor, 64, tensor_hdr_size + size); |
76 | | if (size > 0) |
77 | | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; |
78 | | else |
79 | | tensor->data.u8 = 0; |
80 | | } |
81 | | #else |
82 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
83 | | ccmemalign((void **)&tensor, 64, tensor_hdr_size + size); |
84 | | if (size > 0) |
85 | | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; |
86 | | else |
87 | | tensor->data.u8 = 0; |
88 | | #endif |
89 | 47.4k | tensor->dataof = 0; |
90 | 47.4k | tensor->alias_ref = 0; |
91 | 47.4k | tensor->data_size = size; |
92 | 47.4k | tensor->sig = 0; |
93 | 47.4k | tensor->refcount = 1; |
94 | 47.4k | tensor->info = params; |
95 | 47.4k | if (tfb) |
96 | 4.33k | { |
97 | 4.33k | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
98 | | // This corresponding to mat->step |
99 | 4.33k | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
100 | 4.33k | } else |
101 | 43.1k | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
102 | 47.4k | return tensor; |
103 | 47.4k | } |
104 | | |
105 | | ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_file(const ccv_nnc_tensor_param_t params, const char* const filename, const off_t offset, const int flags) |
106 | 4 | { |
107 | 4 | ccv_nnc_tensor_t* tensor; |
108 | | // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t). |
109 | 4 | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC2 && params.dim[2] > 02 && params.dim[2] <= 0 CCV_MAX_CHANNEL0 && params.dim[0] > 00 && params.dim[1] > 00 && params.dim[3] == 00 ); |
110 | 4 | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
111 | 4 | tensor->dataof = 0; |
112 | 4 | tensor->alias_ref = 0; |
113 | 4 | tensor->sig = 0; |
114 | 4 | tensor->refcount = 1; |
115 | 4 | tensor->info = params; |
116 | 4 | if (tfb) |
117 | 0 | { |
118 | 0 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
119 | | // This corresponding to mat->step |
120 | 0 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
121 | 0 | } else // This won't be recognized by ccv_dense_matrix_t |
122 | 4 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
123 | 4 | const size_t size = ccv_nnc_tensor_data_size(params); |
124 | 4 | #ifdef HAVE_CUDA |
125 | 4 | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
126 | 2 | { |
127 | | // Remove this flag so it can be deallocated as usual. |
128 | 2 | tensor->type &= ~CCV_NO_DATA_ALLOC; |
129 | 2 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
130 | 2 | if (size > 0) |
131 | 2 | { |
132 | | // This is not supported yet on CUDA. |
133 | 2 | tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size); |
134 | 2 | int fd = open(filename, O_RDONLY, 0); |
135 | 2 | void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset); |
136 | 2 | close(fd); |
137 | 2 | madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED); |
138 | 2 | cumemcpy(tensor->data.u8, CCV_TENSOR_GPU_MEMORY, bufptr, CCV_TENSOR_CPU_MEMORY, size); |
139 | 2 | munmap(bufptr, size); |
140 | 2 | } else |
141 | 0 | tensor->data.u8 = 0; |
142 | 2 | } else { |
143 | 2 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
144 | 2 | if (size > 0) |
145 | 2 | { |
146 | 2 | int fd = open(filename, O_RDONLY, 0); |
147 | 2 | void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset); |
148 | 2 | close(fd); |
149 | 2 | madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED); |
150 | 2 | tensor->data.u8 = bufptr; |
151 | 2 | tensor->type |= CCV_MAPPED_MEM; |
152 | 2 | } else |
153 | 0 | tensor->data.u8 = 0; |
154 | 2 | } |
155 | | #elif defined(HAVE_MPS) |
156 | | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
157 | | { |
158 | | // Remove this flag so it can be deallocated as usual. |
159 | | tensor->type &= ~CCV_NO_DATA_ALLOC; |
160 | | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
161 | | if (size > 0) |
162 | | tensor->data.u8 = (uint8_t*)mpmemmap(filename, size, offset, flags); |
163 | | else |
164 | | tensor->data.u8 = 0; |
165 | | } else { |
166 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
167 | | if (size > 0) |
168 | | { |
169 | | int fd = open(filename, O_RDONLY, 0); |
170 | | void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset); |
171 | | close(fd); |
172 | | madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED); |
173 | | tensor->data.u8 = bufptr; |
174 | | tensor->type |= CCV_MAPPED_MEM; |
175 | | } else |
176 | | tensor->data.u8 = 0; |
177 | | } |
178 | | #else |
179 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
180 | | if (size > 0) |
181 | | { |
182 | | int fd = open(filename, O_RDONLY, 0); |
183 | | void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset); |
184 | | close(fd); |
185 | | madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED); |
186 | | tensor->data.u8 = bufptr; |
187 | | tensor->type |= CCV_MAPPED_MEM; |
188 | | } else |
189 | | tensor->data.u8 = 0; |
190 | | #endif |
191 | 4 | return tensor; |
192 | 4 | } |
193 | | |
194 | | ccv_nnc_tensor_t* ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params) |
195 | 1.27k | { |
196 | 1.27k | assert(!CCV_IS_TENSOR_VIEW(tensor)); |
197 | 1.27k | assert(tensor->type & CCV_UNMANAGED); |
198 | 1.27k | assert(tensor->data_size > 0); |
199 | 1.27k | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GET_MEMORY(tensor->info.type)); |
200 | 1.27k | assert(CCV_TENSOR_GET_DEVICE(params.type) == CCV_TENSOR_GET_DEVICE(tensor->info.type)); |
201 | 1.27k | const size_t size = ccv_nnc_tensor_data_size(params); |
202 | 1.27k | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC14 && params.dim[2] > 010 && params.dim[2] <= 10 CCV_MAX_CHANNEL10 && params.dim[0] > 010 && params.dim[1] > 010 && params.dim[3] == 010 ); |
203 | 1.27k | tensor->info = params; |
204 | 1.27k | #ifdef HAVE_CUDA |
205 | 1.27k | const int pinned_mem = (tensor->type & CCV_PINNED_MEM); |
206 | 1.27k | #endif |
207 | 1.27k | if (tfb) |
208 | 10 | { |
209 | 10 | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
210 | | // This corresponding to mat->step |
211 | 10 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
212 | 10 | } else |
213 | 1.26k | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
214 | 1.27k | if (size <= tensor->data_size) // Nothing. |
215 | 1.27k | { |
216 | 1.27k | #ifdef HAVE_CUDA |
217 | 1.27k | if (pinned_mem) |
218 | 4 | tensor->type |= CCV_PINNED_MEM; |
219 | 1.27k | #endif |
220 | 1.27k | return tensor; |
221 | 1.27k | } |
222 | 1 | ccv_nnc_tensor_t* new_tensor = tensor; |
223 | 1 | const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64; |
224 | 1 | #ifdef HAVE_CUDA |
225 | 1 | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
226 | 0 | { |
227 | 0 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
228 | 0 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type); |
229 | 0 | assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)); |
230 | 0 | cufree(device_id, tensor->data.u8); |
231 | 0 | new_tensor->data.u8 = (uint8_t*)cumalloc(device_id, size); |
232 | 1 | } else { |
233 | 1 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
234 | 1 | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY); |
235 | | // pin memory again. |
236 | 1 | if (pinned_mem) |
237 | 0 | cuunregister(new_tensor->data.u8); |
238 | 1 | new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size); |
239 | 1 | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; |
240 | 1 | } |
241 | | #elif defined(HAVE_MPS) |
242 | | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
243 | | { |
244 | | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
245 | | const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type); |
246 | | assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)); |
247 | | mpobjfree(device_id, tensor->data.u8); |
248 | | new_tensor->data.u8 = (uint8_t*)mpobjmalloc(device_id, size); |
249 | | } else { |
250 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
251 | | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY); |
252 | | new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size); |
253 | | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; |
254 | | } |
255 | | #else |
256 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
257 | | new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size); |
258 | | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; |
259 | | #endif |
260 | 1 | new_tensor->data_size = size; |
261 | 1 | #ifdef HAVE_CUDA |
262 | 1 | if (pinned_mem) |
263 | 0 | ccv_nnc_tensor_pin_memory(new_tensor); |
264 | 1 | #endif |
265 | 1 | return new_tensor; |
266 | 1 | } |
267 | | |
268 | | ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags) |
269 | 78.0k | { |
270 | | // this specific form can be toll-free bridging to ccv_dense_matrix_t |
271 | 78.0k | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC69.2k && params.dim[2] > 068.9k && params.dim[2] <= 899 CCV_MAX_CHANNEL899 && params.dim[0] > 0899 && params.dim[1] > 0899 && params.dim[3] == 0899 ); |
272 | 78.0k | ccv_nnc_tensor_t tensor; |
273 | 78.0k | tensor.dataof = 0; |
274 | 78.0k | tensor.alias_ref = 0; |
275 | 78.0k | tensor.sig = 0; |
276 | 78.0k | tensor.refcount = 1; |
277 | 78.0k | tensor.info = params; |
278 | 78.0k | if (flags & CCV_TENSOR_CPU_MEMORY) |
279 | 0 | { |
280 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
281 | 78.0k | } else if (flags & CCV_TENSOR_GPU_MEMORY) { |
282 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY); |
283 | 0 | } |
284 | 78.0k | if (tfb) |
285 | 192 | { |
286 | 192 | tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
287 | | // This corresponding to mat->step |
288 | 192 | tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
289 | 192 | } else // This won't be recognized by ccv_dense_matrix_t |
290 | 77.8k | tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
291 | 78.0k | if (params.dim[0] > 0) |
292 | 78.0k | tensor.data.u8 = (uint8_t*)ptr; |
293 | 0 | else |
294 | 0 | tensor.data.u8 = 0; |
295 | 78.0k | tensor.data_size = 0; |
296 | 78.0k | return tensor; |
297 | 78.0k | } |
298 | | |
299 | | int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor) |
300 | 1.40k | { |
301 | 1.40k | #ifdef HAVE_CUDA |
302 | 1.40k | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY); |
303 | 1.40k | if (!(tensor->type & CCV_PINNED_MEM) && tensor->data_size146 ) |
304 | 146 | { |
305 | 146 | const int success = curegister(tensor->data.u8, tensor->data_size); |
306 | 146 | if (success) |
307 | 146 | tensor->type |= CCV_PINNED_MEM; |
308 | 146 | return success ? 0 : -10 ; |
309 | 146 | } |
310 | 1.25k | #endif |
311 | 1.25k | return 0; |
312 | 1.40k | } |
313 | | |
314 | | void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor) |
315 | 49.1k | { |
316 | 49.1k | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY && tensor->type & CCV_MAPPED_MEM45.8k ) |
317 | 2 | { |
318 | | // The size might be different than the ones when we allocated (for example, the tensor might rewrite its size to be smaller). |
319 | | // This might cause issues in the future. |
320 | 2 | const size_t size = ccv_nnc_tensor_data_size(tensor->info); |
321 | 2 | munmap(tensor->data.u8, size); |
322 | 2 | } |
323 | 49.1k | #ifdef HAVE_CUDA |
324 | 49.1k | if (tensor->type & CCV_PINNED_MEM) |
325 | 146 | cuunregister(tensor->data.u8); |
326 | 49.1k | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY && |
327 | 49.1k | !(tensor->type & CCV_NO_DATA_ALLOC)3.24k ) // If this is GPU memory and it is allocated, free. |
328 | 2.76k | cufree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8); |
329 | | #elif defined(HAVE_MPS) |
330 | | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY && |
331 | | !(tensor->type & CCV_NO_DATA_ALLOC)) // If this is GPU memory and it is allocated, free. |
332 | | mpobjfree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8); |
333 | | #endif |
334 | 49.1k | ccfree(tensor); |
335 | 49.1k | } |
336 | | |
337 | | static inline void _ccv_nnc_tensor_view_set(ccv_nnc_tensor_view_t* const tv, const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]) |
338 | 165 | { |
339 | 165 | memcpy(tv->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
340 | 165 | memcpy(tv->info.dim, dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
341 | 165 | uint8_t* const p = tensor->data.u8; |
342 | 165 | const off_t off = tv->off = ccv_nnc_tensor_view_offset(tv->info.datatype, stride, ofs); |
343 | 165 | tv->contiguous = ccv_nnc_tensor_view_is_contiguous(dim, stride); |
344 | 165 | assert(off + CCV_GET_DATA_TYPE_SIZE(tv->info.datatype) * ccv_nnc_dimension_upper_bound(tv->info.dim, tv->stride) <= CCV_GET_DATA_TYPE_SIZE(tensor->info.datatype) * ccv_nnc_tensor_count(tensor->info)); |
345 | 165 | ccv_nnc_tensor_data(tv->info, p, off + tensor->dataof, &tv->data, &tv->dataof); |
346 | 165 | } |
347 | | |
348 | | ccv_nnc_tensor_view_t* ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]) |
349 | 84 | { |
350 | 84 | ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)ccmalloc(sizeof(ccv_nnc_tensor_view_t)); |
351 | 84 | tv->type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW; |
352 | 84 | tv->dataof = 0; |
353 | 84 | tv->alias_ref = (uintptr_t)tensor; |
354 | 84 | tv->refcount = 1; |
355 | 84 | tv->sig = 0; |
356 | 84 | tv->data_size = 0; |
357 | 84 | assert(params.type == tensor->info.type); |
358 | 84 | assert(params.datatype == tensor->info.datatype); |
359 | 84 | tv->info = params; |
360 | 84 | _ccv_nnc_tensor_view_set(tv, tensor, params.dim, ofs, stride); |
361 | 84 | return tv; |
362 | 84 | } |
363 | | |
364 | | ccv_nnc_tensor_view_t ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]) |
365 | 81 | { |
366 | 81 | assert(!CCV_IS_TENSOR_VIEW(tensor)); |
367 | 81 | assert(params.type == tensor->info.type); |
368 | 81 | assert(params.datatype == tensor->info.datatype); |
369 | 81 | ccv_nnc_tensor_view_t tv = { |
370 | 81 | .dataof = 0, |
371 | 81 | .alias_ref = (uintptr_t)tensor, |
372 | 81 | .type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW, // clean up the channel bits, and then add CCV_TENSOR_VIEW identifier |
373 | 81 | .refcount = 1, |
374 | 81 | .sig = 0, |
375 | 81 | .info = params, |
376 | 81 | .data_size = 0, |
377 | 81 | }; |
378 | 81 | _ccv_nnc_tensor_view_set(&tv, tensor, params.dim, ofs, stride); |
379 | 81 | return tv; |
380 | 81 | } |
381 | | |
382 | | void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view) |
383 | 84 | { |
384 | 84 | ccfree(tensor_view); |
385 | 84 | } |
386 | | |
387 | | void _ccv_nnc_tensor_set_zero(unsigned char* u8, const int nd, const int* const dim, const int* const stride, const size_t data_size) |
388 | 106 | { |
389 | 106 | if (nd == 1) |
390 | 0 | { |
391 | 0 | if (stride[0] == 1) |
392 | 0 | { |
393 | 0 | memset(u8, 0, data_size * dim[0]); |
394 | 0 | return; |
395 | 0 | } |
396 | 0 | int i; |
397 | 0 | for (i = 0; i < dim[0]; i++) |
398 | 0 | memset(u8 + i * stride[0] * data_size, 0, data_size); |
399 | 106 | } else if (nd == 2) { |
400 | 1 | if (stride[1] == 1 && stride[0] == dim[1]) |
401 | 0 | { |
402 | 0 | memset(u8, 0, data_size * dim[1] * dim[0]); |
403 | 0 | return; |
404 | 0 | } |
405 | 1 | int x, y; |
406 | 4 | for (y = 0; y < dim[0]; y++3 ) |
407 | 3 | { |
408 | 3 | unsigned char* const u8y = u8 + y * stride[0] * data_size; |
409 | 9 | for (x = 0; x < dim[1]; x++6 ) |
410 | 6 | memset(u8y + x * stride[1] * data_size, 0, data_size); |
411 | 3 | } |
412 | 105 | } else if (nd == 3) { |
413 | 0 | if (stride[2] == 1 && stride[1] == dim[2] && stride[0] == dim[1] * dim[2]) |
414 | 0 | { |
415 | 0 | memset(u8, 0, data_size * dim[2] * dim[1] * dim[0]); |
416 | 0 | return; |
417 | 0 | } |
418 | 0 | int x, y, z; |
419 | 0 | for (z = 0; z < dim[0]; z++) |
420 | 0 | { |
421 | 0 | unsigned char* const u8z = u8 + z * stride[0] * data_size; |
422 | 0 | for (y = 0; y < dim[1]; y++) |
423 | 0 | { |
424 | 0 | unsigned char* const u8y = u8z + y * stride[1] * data_size; |
425 | 0 | for (x = 0; x < dim[2]; x++) |
426 | 0 | memset(u8y + x * stride[2] * data_size, 0, data_size); |
427 | 0 | } |
428 | 0 | } |
429 | 105 | } else if (nd == 4) { |
430 | 96 | if (stride[3] == 1 && stride[2] == dim[3] && stride[1] == dim[2] * dim[3]0 && stride[0] == dim[1] * dim[2] * dim[3]0 ) |
431 | 0 | { |
432 | 0 | memset(u8, 0, data_size * dim[3] * dim[2] * dim[1] * dim[0]); |
433 | 0 | return; |
434 | 0 | } |
435 | 96 | int x, y, z, s; |
436 | 1.53k | for (s = 0; s < dim[0]; s++1.44k ) |
437 | 1.44k | { |
438 | 1.44k | unsigned char* const u8s = u8 + s * stride[0] * data_size; |
439 | 4.32k | for (z = 0; z < dim[1]; z++2.88k ) |
440 | 2.88k | { |
441 | 2.88k | unsigned char* const u8z = u8s + z * stride[1] * data_size; |
442 | 11.5k | for (y = 0; y < dim[2]; y++8.64k ) |
443 | 8.64k | { |
444 | 8.64k | unsigned char* const u8y = u8z + y * stride[2] * data_size; |
445 | 43.2k | for (x = 0; x < dim[3]; x++34.5k ) |
446 | 34.5k | memset(u8y + x * stride[3] * data_size, 0, data_size); |
447 | 8.64k | } |
448 | 2.88k | } |
449 | 1.44k | } |
450 | 96 | } else { |
451 | 9 | int i; |
452 | 113 | for (i = 0; i < dim[0]; i++104 ) |
453 | 104 | _ccv_nnc_tensor_set_zero(u8 + i * stride[0] * data_size, nd - 1, dim + 1, stride + 1, data_size); |
454 | 9 | } |
455 | 106 | } |
456 | | |
457 | | void ccv_nnc_tensor_zero(void* const tensor) |
458 | 22.7k | { |
459 | 22.7k | ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)tensor; |
460 | 22.7k | const size_t data_size = CCV_GET_DATA_TYPE_SIZE(tv->info.datatype); |
461 | 22.7k | if (CCV_IS_TENSOR_CONTIGUOUS(tv)) |
462 | 22.7k | { |
463 | 22.7k | memset(tv->data.u8, 0, data_size * ccv_nnc_tensor_count(tv->info)); |
464 | 22.7k | return; |
465 | 22.7k | } |
466 | 2 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
467 | 2 | assert(nd >= 1); |
468 | 2 | const int* const tvstride = tv->stride; |
469 | | // Go through this recursively. |
470 | 2 | _ccv_nnc_tensor_set_zero(tv->data.u8, nd, tv->info.dim, tvstride, data_size); |
471 | 2 | } |
472 | | |
473 | | int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b) |
474 | 691 | { |
475 | 691 | assert(!CCV_IS_TENSOR_VIEW(a)); |
476 | 691 | assert(!CCV_IS_TENSOR_VIEW(b)); |
477 | | // If a is a dense matrix, just use ccv_matrix_eq |
478 | 691 | if (CCV_TENSOR_IS_DENSE_MATRIX(a->type)) |
479 | 110 | return ccv_matrix_eq((ccv_matrix_t*)a, (ccv_matrix_t*)b); |
480 | | // Otherwise, do our own thing. |
481 | 581 | if (CCV_GET_DATA_TYPE(a->type) != CCV_GET_DATA_TYPE(b->type)) |
482 | 0 | return -1; |
483 | 581 | int i, c = 1; |
484 | 1.67k | for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; i++1.09k ) |
485 | 1.67k | { |
486 | 1.67k | if (!a->info.dim[i] && !b->info.dim[i]581 ) |
487 | 581 | break; |
488 | 1.09k | if (a->info.dim[i] != b->info.dim[i]) |
489 | 0 | return -1; |
490 | 1.09k | c *= a->info.dim[i]; |
491 | 1.09k | } |
492 | 581 | if (CCV_GET_DATA_TYPE(a->type) == CCV_32S) |
493 | 12 | return memcmp(a->data.i32, b->data.i32, sizeof(int) * c) == 0 ? 0 : -10 ; |
494 | | // Only support 32F at this point. |
495 | 569 | assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F || CCV_GET_DATA_TYPE(a->type) == CCV_64F); |
496 | | // Read: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm |
497 | | // http://floating-point-gui.de/errors/comparison/ |
498 | 569 | if (CCV_GET_DATA_TYPE(a->type) == CCV_32F) |
499 | 567 | { |
500 | 567 | static const float epsi = FLT_EPSILON; |
501 | 567 | static const int32_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same. |
502 | 11.1M | for (i = 0; i < c; i++11.1M ) |
503 | 11.1M | { |
504 | | // Although this is float point, I use integer as a way to compare. |
505 | 11.1M | int32_t i32a = a->data.i32[i]; |
506 | 11.1M | if (i32a < 0) |
507 | 25.3k | i32a = 0x80000000 - i32a; |
508 | 11.1M | int32_t i32b = b->data.i32[i]; |
509 | 11.1M | if (i32b < 0) |
510 | 25.4k | i32b = 0x80000000 - i32b; |
511 | 11.1M | if (abs(i32a - i32b) > ulps && fabsf(a->data.f32[i] - b->data.f32[i]) > epsi38 ) |
512 | 0 | return -1; |
513 | 11.1M | } |
514 | 567 | } else if (2 CCV_GET_DATA_TYPE2 (a->type) == CCV_64F2 ) { |
515 | 2 | typedef union { |
516 | 2 | double f64; |
517 | 2 | int64_t i64; |
518 | 2 | } Float64; |
519 | 2 | static const double epsi = DBL_EPSILON; |
520 | 2 | static const int64_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same. |
521 | 15.8k | for (i = 0; i < c; i++15.8k ) |
522 | 15.8k | { |
523 | | // Although this is float point, I use integer as a way to compare. |
524 | 15.8k | Float64 f64a, f64b; |
525 | 15.8k | f64a.f64 = a->data.f64[i]; |
526 | 15.8k | f64b.f64 = b->data.f64[i]; |
527 | 15.8k | if (f64a.i64 < 0) |
528 | 0 | f64a.i64 = 0x8000000000000000 - f64a.i64; |
529 | 15.8k | if (f64b.i64 < 0) |
530 | 0 | f64b.i64 = 0x8000000000000000 - f64b.i64; |
531 | 15.8k | if (llabs(f64a.i64 - f64b.i64) > ulps && fabs(a->data.f64[i] - b->data.f64[i]) > epsi0 ) |
532 | 0 | return -1; |
533 | 15.8k | } |
534 | 2 | } |
535 | 569 | return 0; |
536 | 569 | } |
537 | | |
538 | | static void _strcat(char** str, int* written, size_t* len, char* from, int from_size) |
539 | 1.34k | { |
540 | 1.34k | if (*len - *written < from_size) |
541 | 0 | { |
542 | 0 | *len += from_size * 2; |
543 | 0 | *str = (char*)ccrealloc(*str, *len); |
544 | 0 | } |
545 | 1.34k | memcpy(*str + *written, from, from_size); |
546 | 1.34k | *written += from_size; |
547 | 1.34k | } |
548 | | |
549 | 648 | #define _STRPRINTF(str, written, len, format, ...) \ |
550 | 648 | do { \ |
551 | 648 | const int newly_written = snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \ |
552 | 648 | if ((len) - (written) < newly_written) \ |
553 | 648 | { \ |
554 | 0 | (len) += newly_written * 2; \ |
555 | 0 | (str) = (char*)ccrealloc((str), (len)); \ |
556 | 0 | (written) += snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \ |
557 | 0 | } else \ |
558 | 648 | (written) += newly_written; \ |
559 | 648 | } while (0) |
560 | | |
561 | | static void _strv(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int i) |
562 | 648 | { |
563 | 648 | if (a->info.datatype == CCV_32F) |
564 | 0 | _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f32[i]); |
565 | 648 | else if (a->info.datatype == CCV_64F) |
566 | 0 | _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f64[i]); |
567 | 648 | else if (a->info.datatype == CCV_16F) { |
568 | 0 | float v; |
569 | 0 | ccv_half_precision_to_float((uint16_t*)(a->data.f16 + i), &v, 1); |
570 | 0 | _STRPRINTF(*str, *written, *len, "%10.5g", v); |
571 | 648 | } else if (a->info.datatype == CCV_32S) |
572 | 648 | _STRPRINTF(*str, *written, *len, "%10d", a->data.i32[i]); |
573 | 0 | else if (a->info.datatype == CCV_64S) |
574 | 0 | _STRPRINTF(*str, *written, *len, "%12lld", (long long int)a->data.i64[i]); |
575 | 0 | else if (a->info.datatype == CCV_8U) |
576 | 0 | _STRPRINTF(*str, *written, *len, "%3d", (int)a->data.u8[i]); |
577 | 648 | } |
578 | | |
579 | | static void _strt(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int nd, int spacer, const int* const dim, const int* const stride, int idx) |
580 | 28 | { |
581 | 28 | assert(nd != 1); |
582 | 28 | if (nd == 2) |
583 | 17 | { |
584 | | // Print columns and the rows. |
585 | 17 | int i, j, k; |
586 | 17 | if (dim[0] <= 8) |
587 | 1 | { |
588 | 5 | for (i = 0; i < dim[0]; i++4 ) |
589 | 4 | { |
590 | 4 | if (i != 0) |
591 | 3 | { |
592 | 3 | _strcat(str, written, len, " ", 2); |
593 | 3 | for (k = 0; k < spacer; k++0 ) |
594 | 0 | _strcat(str, written, len, " ", 1); |
595 | 3 | } |
596 | 4 | _strcat(str, written, len, "[", 1); |
597 | 4 | if (dim[1] <= 8) |
598 | 4 | { |
599 | 20 | for (j = 0; j < dim[1]; j++16 ) |
600 | 16 | { |
601 | 16 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
602 | 16 | if (j < dim[1] - 1) |
603 | 12 | _strcat(str, written, len, ", ", 2); |
604 | 16 | } |
605 | 4 | if (i < dim[0] - 1) |
606 | 3 | _strcat(str, written, len, "],\n", 3); |
607 | 4 | } else { |
608 | 0 | for (j = 0; j < 3; j++) |
609 | 0 | { |
610 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
611 | 0 | _strcat(str, written, len, ", ", 2); |
612 | 0 | } |
613 | 0 | _strcat(str, written, len, " ..., ", 6); |
614 | 0 | for (j = dim[1] - 3; j < dim[1]; j++) |
615 | 0 | { |
616 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
617 | 0 | if (j < dim[1] - 1) |
618 | 0 | _strcat(str, written, len, ", ", 2); |
619 | 0 | } |
620 | 0 | if (i < dim[0] - 1) |
621 | 0 | _strcat(str, written, len, "],\n", 3); |
622 | 0 | } |
623 | 4 | } |
624 | 1 | _strcat(str, written, len, "]", 1); |
625 | 16 | } else { |
626 | 64 | for (i = 0; i < 3; i++48 ) |
627 | 48 | { |
628 | 48 | if (i != 0) |
629 | 32 | { |
630 | 32 | _strcat(str, written, len, " ", 2); |
631 | 128 | for (k = 0; k < spacer; k++96 ) |
632 | 96 | _strcat(str, written, len, " ", 1); |
633 | 32 | } |
634 | 48 | _strcat(str, written, len, "[", 1); |
635 | 48 | if (dim[1] <= 8) |
636 | 0 | { |
637 | 0 | for (j = 0; j < dim[1]; j++) |
638 | 0 | { |
639 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
640 | 0 | if (j < dim[1] - 1) |
641 | 0 | _strcat(str, written, len, ", ", 2); |
642 | 0 | } |
643 | 0 | _strcat(str, written, len, "],\n", 3); |
644 | 48 | } else { |
645 | 192 | for (j = 0; j < 3; j++144 ) |
646 | 144 | { |
647 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
648 | 144 | _strcat(str, written, len, ", ", 2); |
649 | 144 | } |
650 | 48 | _strcat(str, written, len, " ..., ", 6); |
651 | 192 | for (j = dim[1] - 3; j < dim[1]; j++144 ) |
652 | 144 | { |
653 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
654 | 144 | if (j < dim[1] - 1) |
655 | 96 | _strcat(str, written, len, ", ", 2); |
656 | 144 | } |
657 | 48 | _strcat(str, written, len, "],\n", 3); |
658 | 48 | } |
659 | 48 | } |
660 | 16 | _strcat(str, written, len, " ", 2); |
661 | 64 | for (k = 0; k < spacer; k++48 ) |
662 | 48 | _strcat(str, written, len, " ", 1); |
663 | 16 | _strcat(str, written, len, "...,\n", 5); |
664 | 64 | for (i = dim[0] - 3; i < dim[0]; i++48 ) |
665 | 48 | { |
666 | 48 | _strcat(str, written, len, " ", 2); |
667 | 192 | for (k = 0; k < spacer; k++144 ) |
668 | 144 | _strcat(str, written, len, " ", 1); |
669 | 48 | _strcat(str, written, len, "[", 1); |
670 | 48 | if (dim[1] < 8) |
671 | 0 | { |
672 | 0 | for (j = 0; j < dim[1]; j++) |
673 | 0 | { |
674 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
675 | 0 | if (j < dim[1] - 1) |
676 | 0 | _strcat(str, written, len, ", ", 2); |
677 | 0 | } |
678 | 0 | if (i < dim[0] - 1) |
679 | 0 | _strcat(str, written, len, "],\n", 3); |
680 | 48 | } else { |
681 | 192 | for (j = 0; j < 3; j++144 ) |
682 | 144 | { |
683 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
684 | 144 | _strcat(str, written, len, ", ", 2); |
685 | 144 | } |
686 | 48 | _strcat(str, written, len, " ..., ", 6); |
687 | 192 | for (j = dim[1] - 3; j < dim[1]; j++144 ) |
688 | 144 | { |
689 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
690 | 144 | if (j < dim[1] - 1) |
691 | 96 | _strcat(str, written, len, ", ", 2); |
692 | 144 | } |
693 | 48 | if (i < dim[0] - 1) |
694 | 32 | _strcat(str, written, len, "],\n", 3); |
695 | 48 | } |
696 | 48 | } |
697 | 16 | _strcat(str, written, len, "]", 1); |
698 | 16 | } |
699 | 17 | return; |
700 | 17 | } |
701 | 11 | int i, j; |
702 | 11 | if (dim[0] > 4) |
703 | 2 | { |
704 | 6 | for (i = 0; i < 2; i++4 ) |
705 | 4 | { |
706 | 4 | _strcat(str, written, len, "[", 1); |
707 | 4 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); |
708 | 4 | _strcat(str, written, len, "],\n ", 5); |
709 | 8 | for (j = 0; j < spacer; j++4 ) |
710 | 4 | _strcat(str, written, len, " ", 1); |
711 | 4 | } |
712 | 2 | _strcat(str, written, len, "...,\n", 5); |
713 | 2 | _strcat(str, written, len, " ", 2); |
714 | 4 | for (j = 0; j < spacer; j++2 ) |
715 | 2 | _strcat(str, written, len, " ", 1); |
716 | 6 | for (i = dim[0] - 2; i < dim[0]; i++4 ) |
717 | 4 | { |
718 | 4 | _strcat(str, written, len, "[", 1); |
719 | 4 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); |
720 | 4 | if (i < dim[0] - 1) |
721 | 2 | { |
722 | 2 | _strcat(str, written, len, "],\n ", 5); |
723 | 4 | for (j = 0; j < spacer; j++2 ) |
724 | 2 | _strcat(str, written, len, " ", 1); |
725 | 2 | } |
726 | 4 | } |
727 | 2 | _strcat(str, written, len, "]", 1); |
728 | 9 | } else { |
729 | 27 | for (i = 0; i < dim[0]; i++18 ) |
730 | 18 | { |
731 | 18 | _strcat(str, written, len, "[", 1); |
732 | 18 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); |
733 | 18 | if (i < dim[0] - 1) |
734 | 9 | { |
735 | 9 | _strcat(str, written, len, "],\n", 3); |
736 | 9 | _strcat(str, written, len, " ", 2); |
737 | 25 | for (j = 0; j < spacer; j++16 ) |
738 | 16 | _strcat(str, written, len, " ", 1); |
739 | 9 | } |
740 | 18 | } |
741 | 9 | _strcat(str, written, len, "]", 1); |
742 | 9 | } |
743 | 11 | } |
744 | | |
745 | | char* ccv_nnc_tensor_format_new(const ccv_nnc_tensor_t* const a) |
746 | 4 | { |
747 | 4 | const int nd = ccv_nnc_tensor_nd(a->info.dim); |
748 | 4 | int i; |
749 | 4 | int rows = 8; // 8 rows for the first one, and then just first and last. |
750 | 7 | for (i = 2; i < nd; i++3 ) |
751 | 3 | rows *= 5; // Maximum 3 rows beyond the first two. |
752 | 4 | int columns = nd * 2 + 16 * 8; |
753 | 4 | size_t len = sizeof(char) * columns * rows; |
754 | | // Allocate return string buffer. |
755 | 4 | char* str = (char*)ccmalloc(len); |
756 | 4 | int written = 0; |
757 | 4 | int stride[CCV_NNC_MAX_DIM_ALLOC]; |
758 | 4 | if (CCV_IS_TENSOR_VIEW(a)) |
759 | 0 | memcpy(stride, ((ccv_nnc_tensor_view_t*)a)->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
760 | 4 | else |
761 | 4 | ccv_nnc_tensor_get_stride(a->info.dim, stride); |
762 | 4 | _strcat(&str, &written, &len, "[\n ", 4); |
763 | 4 | if (nd == 1) |
764 | 2 | { |
765 | | // Special casing for vector. |
766 | 2 | if (a->info.dim[0] <= 64) |
767 | 13 | for (i = 0; 1 i < a->info.dim[0]; i++12 ) |
768 | 12 | { |
769 | 12 | _strv(&str, &written, &len, a, i * stride[0]); |
770 | 12 | if (i < a->info.dim[0] - 1) |
771 | 11 | { |
772 | 11 | if ((i + 1) % 8 == 0) |
773 | 1 | _strcat(&str, &written, &len, ",\n ", 4); |
774 | 10 | else |
775 | 10 | _strcat(&str, &written, &len, ", ", 2); |
776 | 11 | } |
777 | 12 | } |
778 | 1 | else { |
779 | | // First 3 rows. |
780 | 25 | for (i = 0; i < 24; i++24 ) |
781 | 24 | { |
782 | 24 | _strv(&str, &written, &len, a, i * stride[0]); |
783 | 24 | if ((i + 1) % 8 == 0) |
784 | 3 | _strcat(&str, &written, &len, ",\n ", 4); |
785 | 21 | else |
786 | 21 | _strcat(&str, &written, &len, ", ", 2); |
787 | 24 | } |
788 | 1 | _strcat(&str, &written, &len, "...,\n ", 7); |
789 | | // Last 3 rows (aligned to 8 items per row). |
790 | 1 | int start = ((a->info.dim[0] + 7) / 8 - 3) * 8; |
791 | 21 | for (i = start; i < a->info.dim[0]; i++20 ) |
792 | 20 | { |
793 | 20 | _strv(&str, &written, &len, a, i * stride[0]); |
794 | 20 | if (i < a->info.dim[0] - 1) |
795 | 19 | { |
796 | 19 | if ((i + 1) % 8 == 0) |
797 | 2 | _strcat(&str, &written, &len, ",\n ", 4); |
798 | 17 | else |
799 | 17 | _strcat(&str, &written, &len, ", ", 2); |
800 | 19 | } |
801 | 20 | } |
802 | 1 | } |
803 | 2 | } else { |
804 | 2 | _strt(&str, &written, &len, a, nd, 0, a->info.dim, stride, 0); |
805 | 2 | } |
806 | 4 | _strcat(&str, &written, &len, "\n]", 3); // Including the terminal \0. |
807 | 4 | str = (char*)ccrealloc(str, written); // Don't need the extra spaces. |
808 | 4 | return str; |
809 | 4 | } |