/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor.c
Line | Count | Source |
1 | | #include "ccv_nnc.h" |
2 | | #include "ccv_nnc_easy.h" |
3 | | #include "ccv_nnc_internal.h" |
4 | | #ifdef HAVE_CUDA |
5 | | #include "gpu/ccv_nnc_compat.h" |
6 | | #elif defined(HAVE_MPS) |
7 | | #include "mps/ccv_nnc_mps.h" |
8 | | #endif |
9 | | #include <fcntl.h> |
10 | | #include <sys/mman.h> |
11 | | |
12 | | // MARK - Level-1 API |
13 | | |
14 | | const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC] = {0}; |
15 | | |
16 | | ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags) |
17 | 49.2k | { |
18 | 49.2k | ccv_nnc_tensor_t* tensor; |
19 | | // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t). |
20 | 49.2k | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC46.1k && params.dim[2] > 040.0k && params.dim[2] <= 4.88k CCV_MAX_CHANNEL4.88k && params.dim[0] > 04.88k && params.dim[1] > 04.88k && params.dim[3] == 04.88k ); |
21 | 49.2k | if (ptr || (flags & CCV_NO_DATA_ALLOC)47.4k ) |
22 | 1.78k | { |
23 | 1.78k | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
24 | 1.78k | tensor->dataof = 0; |
25 | 1.78k | tensor->alias_ref = 0; |
26 | 1.78k | tensor->sig = 0; |
27 | 1.78k | tensor->refcount = 1; |
28 | 1.78k | tensor->info = params; |
29 | 1.78k | if (tfb) |
30 | 59 | { |
31 | 59 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
32 | | // This corresponding to mat->step |
33 | 59 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
34 | 59 | } else // This won't be recognized by ccv_dense_matrix_t |
35 | 1.72k | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
36 | 1.78k | tensor->data.u8 = (uint8_t*)ptr; |
37 | 1.78k | return tensor; |
38 | 1.78k | } |
39 | 47.4k | if (flags & CCV_TENSOR_CPU_MEMORY) |
40 | 0 | { |
41 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
42 | 47.4k | } else if (flags & CCV_TENSOR_GPU_MEMORY) { |
43 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY); |
44 | 0 | } |
45 | 47.4k | const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64; |
46 | 47.4k | const size_t size = ccv_nnc_tensor_data_size(params); |
47 | 47.4k | #ifdef HAVE_CUDA |
48 | 47.4k | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
49 | 2.60k | { |
50 | 2.60k | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
51 | 2.60k | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
52 | 2.60k | if (size > 0) |
53 | 2.60k | tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size); |
54 | 0 | else |
55 | 0 | tensor->data.u8 = 0; |
56 | 44.8k | } else { |
57 | 44.8k | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
58 | 44.8k | ccmemalign((void **)&tensor, 64, tensor_hdr_size + size); |
59 | 44.8k | if (size > 0) |
60 | 44.8k | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; |
61 | 0 | else |
62 | 0 | tensor->data.u8 = 0; |
63 | 44.8k | } |
64 | | #elif defined(HAVE_MPS) |
65 | | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
66 | | { |
67 | | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
68 | | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
69 | | if (size > 0) |
70 | | tensor->data.u8 = (uint8_t*)mpobjmalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size); |
71 | | else |
72 | | tensor->data.u8 = 0; |
73 | | } else { |
74 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
75 | | ccmemalign((void **)&tensor, 64, tensor_hdr_size + size); |
76 | | if (size > 0) |
77 | | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; |
78 | | else |
79 | | tensor->data.u8 = 0; |
80 | | } |
81 | | #else |
82 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
83 | | ccmemalign((void **)&tensor, 64, tensor_hdr_size + size); |
84 | | if (size > 0) |
85 | | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; |
86 | | else |
87 | | tensor->data.u8 = 0; |
88 | | #endif |
89 | 47.4k | tensor->dataof = 0; |
90 | 47.4k | tensor->alias_ref = 0; |
91 | 47.4k | tensor->data_size = size; |
92 | 47.4k | tensor->sig = 0; |
93 | 47.4k | tensor->refcount = 1; |
94 | 47.4k | tensor->info = params; |
95 | 47.4k | if (tfb) |
96 | 4.40k | { |
97 | 4.40k | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
98 | | // This corresponding to mat->step |
99 | 4.40k | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
100 | 4.40k | } else |
101 | 43.0k | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
102 | 47.4k | return tensor; |
103 | 47.4k | } |
104 | | |
105 | | ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_file(const ccv_nnc_tensor_param_t params, const char* const filename, const off_t offset, const int flags) |
106 | 4 | { |
107 | 4 | ccv_nnc_tensor_t* tensor; |
108 | | // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t). |
109 | 4 | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC2 && params.dim[2] > 02 && params.dim[2] <= 0 CCV_MAX_CHANNEL0 && params.dim[0] > 00 && params.dim[1] > 00 && params.dim[3] == 00 ); |
110 | 4 | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
111 | 4 | tensor->dataof = 0; |
112 | 4 | tensor->alias_ref = 0; |
113 | 4 | tensor->sig = 0; |
114 | 4 | tensor->refcount = 1; |
115 | 4 | tensor->info = params; |
116 | 4 | if (tfb) |
117 | 0 | { |
118 | 0 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
119 | | // This corresponding to mat->step |
120 | 0 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
121 | 0 | } else // This won't be recognized by ccv_dense_matrix_t |
122 | 4 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
123 | 4 | const size_t size = ccv_nnc_tensor_data_size(params); |
124 | 4 | #ifdef HAVE_CUDA |
125 | 4 | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
126 | 2 | { |
127 | | // Remove this flag so it can be deallocated as usual. |
128 | 2 | tensor->type &= ~CCV_NO_DATA_ALLOC; |
129 | 2 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
130 | 2 | if (size > 0) |
131 | 2 | { |
132 | 2 | void* ptr = 0; |
133 | | // This is not supported yet on CUDA. |
134 | 2 | if (flags & CCV_NNC_TENSOR_MEMORY_MAP_ON_DEMAND) |
135 | 0 | ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type), size); |
136 | 2 | if (ptr) // If allocated successfully. Otherwise we go through the fallback path. |
137 | 0 | { |
138 | 0 | tensor->data.u8 = (uint8_t*)ptr; |
139 | 0 | int fd = open(filename, O_RDONLY, 0); |
140 | 0 | cufileread(fd, offset, tensor->data.u8, size); |
141 | 0 | close(fd); |
142 | 0 | cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type), tensor->data.u8, size); |
143 | 0 | tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call. |
144 | 2 | } else { |
145 | 2 | tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size); |
146 | 2 | int fd = open(filename, O_RDONLY, 0); |
147 | 2 | cufileread(fd, offset, tensor->data.u8, size); |
148 | 2 | close(fd); |
149 | 2 | } |
150 | 2 | } else |
151 | 0 | tensor->data.u8 = 0; |
152 | 2 | } else { |
153 | 2 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
154 | 2 | if (size > 0) |
155 | 2 | { |
156 | 2 | int fd = open(filename, O_RDONLY, 0); |
157 | 2 | void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset); |
158 | 2 | close(fd); |
159 | 2 | madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED); |
160 | 2 | tensor->data.u8 = bufptr; |
161 | 2 | tensor->type |= CCV_MAPPED_MEM; |
162 | 2 | } else |
163 | 0 | tensor->data.u8 = 0; |
164 | 2 | } |
165 | | #elif defined(HAVE_MPS) |
166 | | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
167 | | { |
168 | | // Remove this flag so it can be deallocated as usual. |
169 | | tensor->type &= ~CCV_NO_DATA_ALLOC; |
170 | | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
171 | | if (size > 0) |
172 | | tensor->data.u8 = (uint8_t*)mpmemmap(filename, size, offset, flags); |
173 | | else |
174 | | tensor->data.u8 = 0; |
175 | | } else { |
176 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
177 | | if (size > 0) |
178 | | { |
179 | | int fd = open(filename, O_RDONLY, 0); |
180 | | void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset); |
181 | | close(fd); |
182 | | madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED); |
183 | | tensor->data.u8 = bufptr; |
184 | | tensor->type |= CCV_MAPPED_MEM; |
185 | | } else |
186 | | tensor->data.u8 = 0; |
187 | | } |
188 | | #else |
189 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
190 | | if (size > 0) |
191 | | { |
192 | | int fd = open(filename, O_RDONLY, 0); |
193 | | void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset); |
194 | | close(fd); |
195 | | madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED); |
196 | | tensor->data.u8 = bufptr; |
197 | | tensor->type |= CCV_MAPPED_MEM; |
198 | | } else |
199 | | tensor->data.u8 = 0; |
200 | | #endif |
201 | 4 | return tensor; |
202 | 4 | } |
203 | | |
204 | | ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_raw(const ccv_nnc_tensor_param_t params, const void* const bufptr, const int flags) |
205 | 0 | { |
206 | 0 | ccv_nnc_tensor_t* tensor = ccv_nnc_tensor_new(0, params, flags); |
207 | 0 | const size_t size = ccv_nnc_tensor_data_size(params); |
208 | 0 | #ifdef HAVE_CUDA |
209 | 0 | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
210 | 0 | { |
211 | | // Remove this flag so it can be deallocated as usual. |
212 | 0 | tensor->type &= ~CCV_NO_DATA_ALLOC; |
213 | 0 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
214 | 0 | if (size > 0) |
215 | 0 | cumemcpy(tensor->data.u8, tensor->info.type, bufptr, CCV_TENSOR_CPU_MEMORY, size); |
216 | 0 | else |
217 | 0 | tensor->data.u8 = 0; |
218 | 0 | } else { |
219 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
220 | 0 | if (size > 0) |
221 | 0 | memcpy(tensor->data.u8, bufptr, size); |
222 | 0 | else |
223 | 0 | tensor->data.u8 = 0; |
224 | 0 | } |
225 | | #elif defined(HAVE_MPS) |
226 | | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
227 | | { |
228 | | // Remove this flag so it can be deallocated as usual. |
229 | | tensor->type &= ~CCV_NO_DATA_ALLOC; |
230 | | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
231 | | if (size > 0) |
232 | | mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, bufptr, 0, CCV_TENSOR_CPU_MEMORY, size); |
233 | | else |
234 | | tensor->data.u8 = 0; |
235 | | } else { |
236 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
237 | | if (size > 0) |
238 | | memcpy(tensor->data.u8, bufptr, size); |
239 | | else |
240 | | tensor->data.u8 = 0; |
241 | | } |
242 | | #else |
243 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
244 | | if (size > 0) |
245 | | memcpy(tensor->data.u8, bufptr, size); |
246 | | else |
247 | | tensor->data.u8 = 0; |
248 | | #endif |
249 | 0 | return tensor; |
250 | 0 | } |
251 | | |
252 | | ccv_nnc_tensor_t* ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params) |
253 | 1.27k | { |
254 | 1.27k | assert(!CCV_IS_TENSOR_VIEW(tensor)); |
255 | 1.27k | assert(tensor->type & CCV_UNMANAGED); |
256 | 1.27k | assert(tensor->data_size > 0); |
257 | 1.27k | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GET_MEMORY(tensor->info.type)); |
258 | 1.27k | assert(CCV_TENSOR_GET_DEVICE(params.type) == CCV_TENSOR_GET_DEVICE(tensor->info.type)); |
259 | 1.27k | const size_t size = ccv_nnc_tensor_data_size(params); |
260 | 1.27k | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC14 && params.dim[2] > 010 && params.dim[2] <= 10 CCV_MAX_CHANNEL10 && params.dim[0] > 010 && params.dim[1] > 010 && params.dim[3] == 010 ); |
261 | 1.27k | tensor->info = params; |
262 | 1.27k | #ifdef HAVE_CUDA |
263 | 1.27k | const int pinned_mem = (tensor->type & CCV_PINNED_MEM); |
264 | 1.27k | #endif |
265 | 1.27k | if (tfb) |
266 | 10 | { |
267 | 10 | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
268 | | // This corresponding to mat->step |
269 | 10 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
270 | 10 | } else |
271 | 1.26k | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
272 | 1.27k | if (size <= tensor->data_size) // Nothing. |
273 | 1.27k | { |
274 | 1.27k | #ifdef HAVE_CUDA |
275 | 1.27k | if (pinned_mem) |
276 | 4 | tensor->type |= CCV_PINNED_MEM; |
277 | 1.27k | #endif |
278 | 1.27k | return tensor; |
279 | 1.27k | } |
280 | 1 | ccv_nnc_tensor_t* new_tensor = tensor; |
281 | 1 | const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64; |
282 | 1 | #ifdef HAVE_CUDA |
283 | 1 | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
284 | 0 | { |
285 | 0 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
286 | 0 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type); |
287 | 0 | assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)); |
288 | 0 | cufree(device_id, tensor->data.u8); |
289 | 0 | new_tensor->data.u8 = (uint8_t*)cumalloc(device_id, size); |
290 | 1 | } else { |
291 | 1 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
292 | 1 | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY); |
293 | | // pin memory again. |
294 | 1 | if (pinned_mem) |
295 | 0 | cuunregister(new_tensor->data.u8); |
296 | 1 | new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size); |
297 | 1 | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; |
298 | 1 | } |
299 | | #elif defined(HAVE_MPS) |
300 | | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
301 | | { |
302 | | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
303 | | const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type); |
304 | | assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)); |
305 | | mpobjfree(device_id, tensor->data.u8); |
306 | | new_tensor->data.u8 = (uint8_t*)mpobjmalloc(device_id, size); |
307 | | } else { |
308 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
309 | | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY); |
310 | | new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size); |
311 | | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; |
312 | | } |
313 | | #else |
314 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
315 | | new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size); |
316 | | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; |
317 | | #endif |
318 | 1 | new_tensor->data_size = size; |
319 | 1 | #ifdef HAVE_CUDA |
320 | 1 | if (pinned_mem) |
321 | 0 | ccv_nnc_tensor_pin_memory(new_tensor); |
322 | 1 | #endif |
323 | 1 | return new_tensor; |
324 | 1 | } |
325 | | |
326 | | ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags) |
327 | 78.2k | { |
328 | | // this specific form can be toll-free bridging to ccv_dense_matrix_t |
329 | 78.2k | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC69.4k && params.dim[2] > 069.0k && params.dim[2] <= 903 CCV_MAX_CHANNEL903 && params.dim[0] > 0903 && params.dim[1] > 0903 && params.dim[3] == 0903 ); |
330 | 78.2k | ccv_nnc_tensor_t tensor; |
331 | 78.2k | tensor.dataof = 0; |
332 | 78.2k | tensor.alias_ref = 0; |
333 | 78.2k | tensor.sig = 0; |
334 | 78.2k | tensor.refcount = 1; |
335 | 78.2k | tensor.info = params; |
336 | 78.2k | if (flags & CCV_TENSOR_CPU_MEMORY) |
337 | 0 | { |
338 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
339 | 78.2k | } else if (flags & CCV_TENSOR_GPU_MEMORY) { |
340 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY); |
341 | 0 | } |
342 | 78.2k | if (tfb) |
343 | 196 | { |
344 | 196 | tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
345 | | // This corresponding to mat->step |
346 | 196 | tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
347 | 196 | } else // This won't be recognized by ccv_dense_matrix_t |
348 | 78.0k | tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
349 | 78.2k | if (params.dim[0] > 0) |
350 | 78.2k | tensor.data.u8 = (uint8_t*)ptr; |
351 | 0 | else |
352 | 0 | tensor.data.u8 = 0; |
353 | 78.2k | tensor.data_size = 0; |
354 | 78.2k | return tensor; |
355 | 78.2k | } |
356 | | |
357 | | int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor) |
358 | 1.40k | { |
359 | 1.40k | #ifdef HAVE_CUDA |
360 | 1.40k | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY); |
361 | 1.40k | if (!(tensor->type & CCV_PINNED_MEM) && tensor->data_size146 ) |
362 | 146 | { |
363 | 146 | const int success = curegister(tensor->data.u8, tensor->data_size); |
364 | 146 | if (success) |
365 | 146 | tensor->type |= CCV_PINNED_MEM; |
366 | 146 | return success ? 0 : -10 ; |
367 | 146 | } |
368 | 1.25k | #endif |
369 | 1.25k | return 0; |
370 | 1.40k | } |
371 | | |
372 | | void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor) |
373 | 49.1k | { |
374 | 49.1k | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY && (tensor->type & CCV_MAPPED_MEM)46.1k ) |
375 | 2 | { |
376 | | // The size might be different than the ones when we allocated (for example, the tensor might rewrite its size to be smaller). |
377 | | // This might cause issues in the future. |
378 | 2 | const size_t size = ccv_nnc_tensor_data_size(tensor->info); |
379 | 2 | munmap(tensor->data.u8, size); |
380 | 2 | } |
381 | 49.1k | #ifdef HAVE_CUDA |
382 | 49.1k | if (tensor->type & CCV_PINNED_MEM) |
383 | 146 | cuunregister(tensor->data.u8); |
384 | 49.1k | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY && |
385 | 49.1k | !(tensor->type & CCV_NO_DATA_ALLOC)3.08k ) // If this is GPU memory and it is allocated, free. |
386 | 2.60k | cufree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8); |
387 | | #elif defined(HAVE_MPS) |
388 | | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY && |
389 | | !(tensor->type & CCV_NO_DATA_ALLOC)) // If this is GPU memory and it is allocated, free. |
390 | | mpobjfree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8); |
391 | | #endif |
392 | 49.1k | ccfree(tensor); |
393 | 49.1k | } |
394 | | |
395 | | static inline void _ccv_nnc_tensor_view_set(ccv_nnc_tensor_view_t* const tv, const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]) |
396 | 177 | { |
397 | 177 | memcpy(tv->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
398 | 177 | memcpy(tv->info.dim, dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
399 | 177 | uint8_t* const p = tensor->data.u8; |
400 | 177 | const off_t off = tv->off = ccv_nnc_tensor_view_offset(tv->info.datatype, stride, ofs); |
401 | 177 | tv->contiguous = ccv_nnc_tensor_view_is_contiguous(dim, stride); |
402 | 177 | assert(off + CCV_GET_DATA_TYPE_SIZE(tv->info.datatype) * ccv_nnc_dimension_upper_bound(tv->info.dim, tv->stride) <= CCV_GET_DATA_TYPE_SIZE(tensor->info.datatype) * ccv_nnc_tensor_count(tensor->info)); |
403 | 177 | ccv_nnc_tensor_data(tv->info, p, off + tensor->dataof, &tv->data, &tv->dataof); |
404 | 177 | } |
405 | | |
406 | | ccv_nnc_tensor_view_t* ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]) |
407 | 96 | { |
408 | 96 | ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)ccmalloc(sizeof(ccv_nnc_tensor_view_t)); |
409 | 96 | tv->type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW; |
410 | 96 | tv->dataof = 0; |
411 | 96 | tv->alias_ref = (uintptr_t)tensor; |
412 | 96 | tv->refcount = 1; |
413 | 96 | tv->sig = 0; |
414 | 96 | tv->data_size = 0; |
415 | 96 | assert(params.type == tensor->info.type); |
416 | 96 | assert(params.datatype == tensor->info.datatype); |
417 | 96 | tv->info = params; |
418 | 96 | _ccv_nnc_tensor_view_set(tv, tensor, params.dim, ofs, stride); |
419 | 96 | return tv; |
420 | 96 | } |
421 | | |
422 | | ccv_nnc_tensor_view_t ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]) |
423 | 81 | { |
424 | 81 | assert(!CCV_IS_TENSOR_VIEW(tensor)); |
425 | 81 | assert(params.type == tensor->info.type); |
426 | 81 | assert(params.datatype == tensor->info.datatype); |
427 | 81 | ccv_nnc_tensor_view_t tv = { |
428 | 81 | .dataof = 0, |
429 | 81 | .alias_ref = (uintptr_t)tensor, |
430 | 81 | .type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW, // clean up the channel bits, and then add CCV_TENSOR_VIEW identifier |
431 | 81 | .refcount = 1, |
432 | 81 | .sig = 0, |
433 | 81 | .info = params, |
434 | 81 | .data_size = 0, |
435 | 81 | }; |
436 | 81 | _ccv_nnc_tensor_view_set(&tv, tensor, params.dim, ofs, stride); |
437 | 81 | return tv; |
438 | 81 | } |
439 | | |
440 | | void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view) |
441 | 96 | { |
442 | 96 | ccfree(tensor_view); |
443 | 96 | } |
444 | | |
445 | | void _ccv_nnc_tensor_set_zero(unsigned char* u8, const int nd, const int* const dim, const int* const stride, const size_t data_size) |
446 | 107 | { |
447 | 107 | if (nd == 1) |
448 | 0 | { |
449 | 0 | if (stride[0] == 1) |
450 | 0 | { |
451 | 0 | memset(u8, 0, data_size * dim[0]); |
452 | 0 | return; |
453 | 0 | } |
454 | 0 | int i; |
455 | 0 | for (i = 0; i < dim[0]; i++) |
456 | 0 | memset(u8 + i * stride[0] * data_size, 0, data_size); |
457 | 107 | } else if (nd == 2) { |
458 | 2 | if (stride[1] == 1 && stride[0] == dim[1]) |
459 | 0 | { |
460 | 0 | memset(u8, 0, data_size * dim[1] * dim[0]); |
461 | 0 | return; |
462 | 0 | } |
463 | 2 | int x, y; |
464 | 8 | for (y = 0; y < dim[0]; y++6 ) |
465 | 6 | { |
466 | 6 | unsigned char* const u8y = u8 + y * stride[0] * data_size; |
467 | 18 | for (x = 0; x < dim[1]; x++12 ) |
468 | 12 | memset(u8y + x * stride[1] * data_size, 0, data_size); |
469 | 6 | } |
470 | 105 | } else if (nd == 3) { |
471 | 0 | if (stride[2] == 1 && stride[1] == dim[2] && stride[0] == dim[1] * dim[2]) |
472 | 0 | { |
473 | 0 | memset(u8, 0, data_size * dim[2] * dim[1] * dim[0]); |
474 | 0 | return; |
475 | 0 | } |
476 | 0 | int x, y, z; |
477 | 0 | for (z = 0; z < dim[0]; z++) |
478 | 0 | { |
479 | 0 | unsigned char* const u8z = u8 + z * stride[0] * data_size; |
480 | 0 | for (y = 0; y < dim[1]; y++) |
481 | 0 | { |
482 | 0 | unsigned char* const u8y = u8z + y * stride[1] * data_size; |
483 | 0 | for (x = 0; x < dim[2]; x++) |
484 | 0 | memset(u8y + x * stride[2] * data_size, 0, data_size); |
485 | 0 | } |
486 | 0 | } |
487 | 105 | } else if (nd == 4) { |
488 | 96 | if (stride[3] == 1 && stride[2] == dim[3] && stride[1] == dim[2] * dim[3]0 && stride[0] == dim[1] * dim[2] * dim[3]0 ) |
489 | 0 | { |
490 | 0 | memset(u8, 0, data_size * dim[3] * dim[2] * dim[1] * dim[0]); |
491 | 0 | return; |
492 | 0 | } |
493 | 96 | int x, y, z, s; |
494 | 1.53k | for (s = 0; s < dim[0]; s++1.44k ) |
495 | 1.44k | { |
496 | 1.44k | unsigned char* const u8s = u8 + s * stride[0] * data_size; |
497 | 4.32k | for (z = 0; z < dim[1]; z++2.88k ) |
498 | 2.88k | { |
499 | 2.88k | unsigned char* const u8z = u8s + z * stride[1] * data_size; |
500 | 11.5k | for (y = 0; y < dim[2]; y++8.64k ) |
501 | 8.64k | { |
502 | 8.64k | unsigned char* const u8y = u8z + y * stride[2] * data_size; |
503 | 43.2k | for (x = 0; x < dim[3]; x++34.5k ) |
504 | 34.5k | memset(u8y + x * stride[3] * data_size, 0, data_size); |
505 | 8.64k | } |
506 | 2.88k | } |
507 | 1.44k | } |
508 | 96 | } else { |
509 | 9 | int i; |
510 | 113 | for (i = 0; i < dim[0]; i++104 ) |
511 | 104 | _ccv_nnc_tensor_set_zero(u8 + i * stride[0] * data_size, nd - 1, dim + 1, stride + 1, data_size); |
512 | 9 | } |
513 | 107 | } |
514 | | |
515 | | void ccv_nnc_tensor_zero(void* const tensor) |
516 | 28.0k | { |
517 | 28.0k | ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)tensor; |
518 | 28.0k | const size_t data_size = CCV_GET_DATA_TYPE_SIZE(tv->info.datatype); |
519 | 28.0k | if (CCV_IS_TENSOR_CONTIGUOUS(tv)) |
520 | 28.0k | { |
521 | 28.0k | memset(tv->data.u8, 0, data_size * ccv_nnc_tensor_count(tv->info)); |
522 | 28.0k | return; |
523 | 28.0k | } |
524 | 3 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
525 | 3 | assert(nd >= 1); |
526 | 3 | const int* const tvstride = tv->stride; |
527 | | // Go through this recursively. |
528 | 3 | _ccv_nnc_tensor_set_zero(tv->data.u8, nd, tv->info.dim, tvstride, data_size); |
529 | 3 | } |
530 | | |
531 | | int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b) |
532 | 842 | { |
533 | 842 | assert(!CCV_IS_TENSOR_VIEW(a)); |
534 | 842 | assert(!CCV_IS_TENSOR_VIEW(b)); |
535 | | // If a is a dense matrix, just use ccv_matrix_eq |
536 | 842 | if (CCV_TENSOR_IS_DENSE_MATRIX(a->type)) |
537 | 130 | return ccv_matrix_eq((ccv_matrix_t*)a, (ccv_matrix_t*)b); |
538 | | // Otherwise, do our own thing. |
539 | 712 | if (CCV_GET_DATA_TYPE(a->type) != CCV_GET_DATA_TYPE(b->type)) |
540 | 0 | return -1; |
541 | 712 | int i, c = 1; |
542 | 2.02k | for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; i++1.31k ) |
543 | 2.02k | { |
544 | 2.02k | if (!a->info.dim[i] && !b->info.dim[i]712 ) |
545 | 712 | break; |
546 | 1.31k | if (a->info.dim[i] != b->info.dim[i]) |
547 | 0 | return -1; |
548 | 1.31k | c *= a->info.dim[i]; |
549 | 1.31k | } |
550 | 712 | if (CCV_GET_DATA_TYPE(a->type) == CCV_32S) |
551 | 91 | return memcmp(a->data.i32, b->data.i32, sizeof(int) * c) == 0 ? 0 : -10 ; |
552 | | // Only support 32F at this point. |
553 | 712 | assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F || CCV_GET_DATA_TYPE(a->type) == CCV_64F)621 ; |
554 | | // Read: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm |
555 | | // http://floating-point-gui.de/errors/comparison/ |
556 | 621 | if (CCV_GET_DATA_TYPE(a->type) == CCV_32F) |
557 | 619 | { |
558 | 619 | static const float epsi = FLT_EPSILON; |
559 | 619 | static const int32_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same. |
560 | 30.4M | for (i = 0; i < c; i++30.4M ) |
561 | 30.4M | { |
562 | | // Although this is float point, I use integer as a way to compare. |
563 | 30.4M | int32_t i32a = a->data.i32[i]; |
564 | 30.4M | if (i32a < 0) |
565 | 4.82M | i32a = 0x80000000 - i32a; |
566 | 30.4M | int32_t i32b = b->data.i32[i]; |
567 | 30.4M | if (i32b < 0) |
568 | 4.82M | i32b = 0x80000000 - i32b; |
569 | 30.4M | if (abs(i32a - i32b) > ulps && fabsf(a->data.f32[i] - b->data.f32[i]) > epsi9.11k ) |
570 | 0 | return -1; |
571 | 30.4M | } |
572 | 619 | } else if (2 CCV_GET_DATA_TYPE2 (a->type) == CCV_64F2 ) { |
573 | 2 | typedef union { |
574 | 2 | double f64; |
575 | 2 | int64_t i64; |
576 | 2 | } Float64; |
577 | 2 | static const double epsi = DBL_EPSILON; |
578 | 2 | static const int64_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same. |
579 | 15.8k | for (i = 0; i < c; i++15.8k ) |
580 | 15.8k | { |
581 | | // Although this is float point, I use integer as a way to compare. |
582 | 15.8k | Float64 f64a, f64b; |
583 | 15.8k | f64a.f64 = a->data.f64[i]; |
584 | 15.8k | f64b.f64 = b->data.f64[i]; |
585 | 15.8k | if (f64a.i64 < 0) |
586 | 0 | f64a.i64 = 0x8000000000000000 - f64a.i64; |
587 | 15.8k | if (f64b.i64 < 0) |
588 | 0 | f64b.i64 = 0x8000000000000000 - f64b.i64; |
589 | 15.8k | if (llabs(f64a.i64 - f64b.i64) > ulps && fabs(a->data.f64[i] - b->data.f64[i]) > epsi0 ) |
590 | 0 | return -1; |
591 | 15.8k | } |
592 | 2 | } |
593 | 621 | return 0; |
594 | 621 | } |
595 | | |
596 | | static void _strcat(char** str, int* written, size_t* len, char* from, int from_size) |
597 | 1.34k | { |
598 | 1.34k | if (*len - *written < from_size) |
599 | 0 | { |
600 | 0 | *len += from_size * 2; |
601 | 0 | *str = (char*)ccrealloc(*str, *len); |
602 | 0 | } |
603 | 1.34k | memcpy(*str + *written, from, from_size); |
604 | 1.34k | *written += from_size; |
605 | 1.34k | } |
606 | | |
607 | 648 | #define _STRPRINTF(str, written, len, format, ...) \ |
608 | 648 | do { \ |
609 | 648 | const int newly_written = snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \ |
610 | 648 | if ((len) - (written) < newly_written) \ |
611 | 648 | { \ |
612 | 0 | (len) += newly_written * 2; \ |
613 | 0 | (str) = (char*)ccrealloc((str), (len)); \ |
614 | 0 | (written) += snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \ |
615 | 0 | } else \ |
616 | 648 | (written) += newly_written; \ |
617 | 648 | } while (0) |
618 | | |
619 | | static void _strv(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int i) |
620 | 648 | { |
621 | 648 | if (a->info.datatype == CCV_32F) |
622 | 0 | _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f32[i]); |
623 | 648 | else if (a->info.datatype == CCV_64F) |
624 | 0 | _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f64[i]); |
625 | 648 | else if (a->info.datatype == CCV_16F) { |
626 | 0 | float v; |
627 | 0 | ccv_half_precision_to_float((uint16_t*)(a->data.f16 + i), &v, 1); |
628 | 0 | _STRPRINTF(*str, *written, *len, "%10.5g", v); |
629 | 648 | } else if (a->info.datatype == CCV_32S) |
630 | 648 | _STRPRINTF(*str, *written, *len, "%10d", a->data.i32[i]); |
631 | 0 | else if (a->info.datatype == CCV_64S) |
632 | 0 | _STRPRINTF(*str, *written, *len, "%12lld", (long long int)a->data.i64[i]); |
633 | 0 | else if (a->info.datatype == CCV_8U) |
634 | 0 | _STRPRINTF(*str, *written, *len, "%3d", (int)a->data.u8[i]); |
635 | 648 | } |
636 | | |
637 | | static void _strt(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int nd, int spacer, const int* const dim, const int* const stride, int idx) |
638 | 28 | { |
639 | 28 | assert(nd != 1); |
640 | 28 | if (nd == 2) |
641 | 17 | { |
642 | | // Print columns and the rows. |
643 | 17 | int i, j, k; |
644 | 17 | if (dim[0] <= 8) |
645 | 1 | { |
646 | 5 | for (i = 0; i < dim[0]; i++4 ) |
647 | 4 | { |
648 | 4 | if (i != 0) |
649 | 3 | { |
650 | 3 | _strcat(str, written, len, " ", 2); |
651 | 3 | for (k = 0; k < spacer; k++0 ) |
652 | 0 | _strcat(str, written, len, " ", 1); |
653 | 3 | } |
654 | 4 | _strcat(str, written, len, "[", 1); |
655 | 4 | if (dim[1] <= 8) |
656 | 4 | { |
657 | 20 | for (j = 0; j < dim[1]; j++16 ) |
658 | 16 | { |
659 | 16 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
660 | 16 | if (j < dim[1] - 1) |
661 | 12 | _strcat(str, written, len, ", ", 2); |
662 | 16 | } |
663 | 4 | if (i < dim[0] - 1) |
664 | 3 | _strcat(str, written, len, "],\n", 3); |
665 | 4 | } else { |
666 | 0 | for (j = 0; j < 3; j++) |
667 | 0 | { |
668 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
669 | 0 | _strcat(str, written, len, ", ", 2); |
670 | 0 | } |
671 | 0 | _strcat(str, written, len, " ..., ", 6); |
672 | 0 | for (j = dim[1] - 3; j < dim[1]; j++) |
673 | 0 | { |
674 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
675 | 0 | if (j < dim[1] - 1) |
676 | 0 | _strcat(str, written, len, ", ", 2); |
677 | 0 | } |
678 | 0 | if (i < dim[0] - 1) |
679 | 0 | _strcat(str, written, len, "],\n", 3); |
680 | 0 | } |
681 | 4 | } |
682 | 1 | _strcat(str, written, len, "]", 1); |
683 | 16 | } else { |
684 | 64 | for (i = 0; i < 3; i++48 ) |
685 | 48 | { |
686 | 48 | if (i != 0) |
687 | 32 | { |
688 | 32 | _strcat(str, written, len, " ", 2); |
689 | 128 | for (k = 0; k < spacer; k++96 ) |
690 | 96 | _strcat(str, written, len, " ", 1); |
691 | 32 | } |
692 | 48 | _strcat(str, written, len, "[", 1); |
693 | 48 | if (dim[1] <= 8) |
694 | 0 | { |
695 | 0 | for (j = 0; j < dim[1]; j++) |
696 | 0 | { |
697 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
698 | 0 | if (j < dim[1] - 1) |
699 | 0 | _strcat(str, written, len, ", ", 2); |
700 | 0 | } |
701 | 0 | _strcat(str, written, len, "],\n", 3); |
702 | 48 | } else { |
703 | 192 | for (j = 0; j < 3; j++144 ) |
704 | 144 | { |
705 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
706 | 144 | _strcat(str, written, len, ", ", 2); |
707 | 144 | } |
708 | 48 | _strcat(str, written, len, " ..., ", 6); |
709 | 192 | for (j = dim[1] - 3; j < dim[1]; j++144 ) |
710 | 144 | { |
711 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
712 | 144 | if (j < dim[1] - 1) |
713 | 96 | _strcat(str, written, len, ", ", 2); |
714 | 144 | } |
715 | 48 | _strcat(str, written, len, "],\n", 3); |
716 | 48 | } |
717 | 48 | } |
718 | 16 | _strcat(str, written, len, " ", 2); |
719 | 64 | for (k = 0; k < spacer; k++48 ) |
720 | 48 | _strcat(str, written, len, " ", 1); |
721 | 16 | _strcat(str, written, len, "...,\n", 5); |
722 | 64 | for (i = dim[0] - 3; i < dim[0]; i++48 ) |
723 | 48 | { |
724 | 48 | _strcat(str, written, len, " ", 2); |
725 | 192 | for (k = 0; k < spacer; k++144 ) |
726 | 144 | _strcat(str, written, len, " ", 1); |
727 | 48 | _strcat(str, written, len, "[", 1); |
728 | 48 | if (dim[1] < 8) |
729 | 0 | { |
730 | 0 | for (j = 0; j < dim[1]; j++) |
731 | 0 | { |
732 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
733 | 0 | if (j < dim[1] - 1) |
734 | 0 | _strcat(str, written, len, ", ", 2); |
735 | 0 | } |
736 | 0 | if (i < dim[0] - 1) |
737 | 0 | _strcat(str, written, len, "],\n", 3); |
738 | 48 | } else { |
739 | 192 | for (j = 0; j < 3; j++144 ) |
740 | 144 | { |
741 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
742 | 144 | _strcat(str, written, len, ", ", 2); |
743 | 144 | } |
744 | 48 | _strcat(str, written, len, " ..., ", 6); |
745 | 192 | for (j = dim[1] - 3; j < dim[1]; j++144 ) |
746 | 144 | { |
747 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
748 | 144 | if (j < dim[1] - 1) |
749 | 96 | _strcat(str, written, len, ", ", 2); |
750 | 144 | } |
751 | 48 | if (i < dim[0] - 1) |
752 | 32 | _strcat(str, written, len, "],\n", 3); |
753 | 48 | } |
754 | 48 | } |
755 | 16 | _strcat(str, written, len, "]", 1); |
756 | 16 | } |
757 | 17 | return; |
758 | 17 | } |
759 | 11 | int i, j; |
760 | 11 | if (dim[0] > 4) |
761 | 2 | { |
762 | 6 | for (i = 0; i < 2; i++4 ) |
763 | 4 | { |
764 | 4 | _strcat(str, written, len, "[", 1); |
765 | 4 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); |
766 | 4 | _strcat(str, written, len, "],\n ", 5); |
767 | 8 | for (j = 0; j < spacer; j++4 ) |
768 | 4 | _strcat(str, written, len, " ", 1); |
769 | 4 | } |
770 | 2 | _strcat(str, written, len, "...,\n", 5); |
771 | 2 | _strcat(str, written, len, " ", 2); |
772 | 4 | for (j = 0; j < spacer; j++2 ) |
773 | 2 | _strcat(str, written, len, " ", 1); |
774 | 6 | for (i = dim[0] - 2; i < dim[0]; i++4 ) |
775 | 4 | { |
776 | 4 | _strcat(str, written, len, "[", 1); |
777 | 4 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); |
778 | 4 | if (i < dim[0] - 1) |
779 | 2 | { |
780 | 2 | _strcat(str, written, len, "],\n ", 5); |
781 | 4 | for (j = 0; j < spacer; j++2 ) |
782 | 2 | _strcat(str, written, len, " ", 1); |
783 | 2 | } |
784 | 4 | } |
785 | 2 | _strcat(str, written, len, "]", 1); |
786 | 9 | } else { |
787 | 27 | for (i = 0; i < dim[0]; i++18 ) |
788 | 18 | { |
789 | 18 | _strcat(str, written, len, "[", 1); |
790 | 18 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); |
791 | 18 | if (i < dim[0] - 1) |
792 | 9 | { |
793 | 9 | _strcat(str, written, len, "],\n", 3); |
794 | 9 | _strcat(str, written, len, " ", 2); |
795 | 25 | for (j = 0; j < spacer; j++16 ) |
796 | 16 | _strcat(str, written, len, " ", 1); |
797 | 9 | } |
798 | 18 | } |
799 | 9 | _strcat(str, written, len, "]", 1); |
800 | 9 | } |
801 | 11 | } |
802 | | |
803 | | char* ccv_nnc_tensor_format_new(const ccv_nnc_tensor_t* const a) |
804 | 4 | { |
805 | 4 | const int nd = ccv_nnc_tensor_nd(a->info.dim); |
806 | 4 | int i; |
807 | 4 | int rows = 8; // 8 rows for the first one, and then just first and last. |
808 | 7 | for (i = 2; i < nd; i++3 ) |
809 | 3 | rows *= 5; // Maximum 3 rows beyond the first two. |
810 | 4 | int columns = nd * 2 + 16 * 8; |
811 | 4 | size_t len = sizeof(char) * columns * rows; |
812 | | // Allocate return string buffer. |
813 | 4 | char* str = (char*)ccmalloc(len); |
814 | 4 | int written = 0; |
815 | 4 | int stride[CCV_NNC_MAX_DIM_ALLOC]; |
816 | 4 | if (CCV_IS_TENSOR_VIEW(a)) |
817 | 0 | memcpy(stride, ((ccv_nnc_tensor_view_t*)a)->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
818 | 4 | else |
819 | 4 | ccv_nnc_tensor_get_stride(a->info.dim, stride); |
820 | 4 | _strcat(&str, &written, &len, "[\n ", 4); |
821 | 4 | if (nd == 1) |
822 | 2 | { |
823 | | // Special casing for vector. |
824 | 2 | if (a->info.dim[0] <= 64) |
825 | 13 | for (i = 0; 1 i < a->info.dim[0]; i++12 ) |
826 | 12 | { |
827 | 12 | _strv(&str, &written, &len, a, i * stride[0]); |
828 | 12 | if (i < a->info.dim[0] - 1) |
829 | 11 | { |
830 | 11 | if ((i + 1) % 8 == 0) |
831 | 1 | _strcat(&str, &written, &len, ",\n ", 4); |
832 | 10 | else |
833 | 10 | _strcat(&str, &written, &len, ", ", 2); |
834 | 11 | } |
835 | 12 | } |
836 | 1 | else { |
837 | | // First 3 rows. |
838 | 25 | for (i = 0; i < 24; i++24 ) |
839 | 24 | { |
840 | 24 | _strv(&str, &written, &len, a, i * stride[0]); |
841 | 24 | if ((i + 1) % 8 == 0) |
842 | 3 | _strcat(&str, &written, &len, ",\n ", 4); |
843 | 21 | else |
844 | 21 | _strcat(&str, &written, &len, ", ", 2); |
845 | 24 | } |
846 | 1 | _strcat(&str, &written, &len, "...,\n ", 7); |
847 | | // Last 3 rows (aligned to 8 items per row). |
848 | 1 | int start = ((a->info.dim[0] + 7) / 8 - 3) * 8; |
849 | 21 | for (i = start; i < a->info.dim[0]; i++20 ) |
850 | 20 | { |
851 | 20 | _strv(&str, &written, &len, a, i * stride[0]); |
852 | 20 | if (i < a->info.dim[0] - 1) |
853 | 19 | { |
854 | 19 | if ((i + 1) % 8 == 0) |
855 | 2 | _strcat(&str, &written, &len, ",\n ", 4); |
856 | 17 | else |
857 | 17 | _strcat(&str, &written, &len, ", ", 2); |
858 | 19 | } |
859 | 20 | } |
860 | 1 | } |
861 | 2 | } else { |
862 | 2 | _strt(&str, &written, &len, a, nd, 0, a->info.dim, stride, 0); |
863 | 2 | } |
864 | 4 | _strcat(&str, &written, &len, "\n]", 3); // Including the terminal \0. |
865 | 4 | str = (char*)ccrealloc(str, written); // Don't need the extra spaces. |
866 | 4 | return str; |
867 | 4 | } |