/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor.c
Line | Count | Source |
1 | | #include "ccv_nnc.h" |
2 | | #include "ccv_nnc_easy.h" |
3 | | #include "ccv_nnc_internal.h" |
4 | | #ifdef HAVE_CUDA |
5 | | #include "gpu/ccv_nnc_compat.h" |
6 | | #elif defined(HAVE_MPS) |
7 | | #include "mps/ccv_nnc_mps.h" |
8 | | #endif |
9 | | #include <fcntl.h> |
10 | | #include <sys/mman.h> |
11 | | |
12 | | // MARK - Level-1 API |
13 | | |
14 | | const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC] = {0}; |
15 | | |
16 | | ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags) |
17 | 48.8k | { |
18 | 48.8k | ccv_nnc_tensor_t* tensor; |
19 | | // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t). |
20 | 48.8k | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC45.6k && params.dim[2] > 039.6k && params.dim[2] <= 4.81k CCV_MAX_CHANNEL4.81k && params.dim[0] > 04.81k && params.dim[1] > 04.81k && params.dim[3] == 04.81k ); |
21 | 48.8k | if (ptr || (flags & CCV_NO_DATA_ALLOC)47.1k ) |
22 | 1.68k | { |
23 | 1.68k | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
24 | 1.68k | tensor->dataof = 0; |
25 | 1.68k | tensor->alias_ref = 0; |
26 | 1.68k | tensor->sig = 0; |
27 | 1.68k | tensor->refcount = 1; |
28 | 1.68k | tensor->info = params; |
29 | 1.68k | if (tfb) |
30 | 59 | { |
31 | 59 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
32 | | // This corresponding to mat->step |
33 | 59 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
34 | 59 | } else // This won't be recognized by ccv_dense_matrix_t |
35 | 1.62k | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
36 | 1.68k | tensor->data.u8 = (uint8_t*)ptr; |
37 | 1.68k | return tensor; |
38 | 1.68k | } |
39 | 47.1k | if (flags & CCV_TENSOR_CPU_MEMORY) |
40 | 0 | { |
41 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
42 | 47.1k | } else if (flags & CCV_TENSOR_GPU_MEMORY) { |
43 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY); |
44 | 0 | } |
45 | 47.1k | const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64; |
46 | 47.1k | const size_t size = ccv_nnc_tensor_data_size(params); |
47 | 47.1k | #ifdef HAVE_CUDA |
48 | 47.1k | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
49 | 2.65k | { |
50 | 2.65k | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
51 | 2.65k | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
52 | 2.65k | if (size > 0) |
53 | 2.65k | tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size); |
54 | 0 | else |
55 | 0 | tensor->data.u8 = 0; |
56 | 44.4k | } else { |
57 | 44.4k | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
58 | 44.4k | ccmemalign((void **)&tensor, 64, tensor_hdr_size + size); |
59 | 44.4k | if (size > 0) |
60 | 44.4k | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; |
61 | 0 | else |
62 | 0 | tensor->data.u8 = 0; |
63 | 44.4k | } |
64 | | #elif defined(HAVE_MPS) |
65 | | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
66 | | { |
67 | | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
68 | | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
69 | | if (size > 0) |
70 | | tensor->data.u8 = (uint8_t*)mpobjmalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size); |
71 | | else |
72 | | tensor->data.u8 = 0; |
73 | | } else { |
74 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
75 | | ccmemalign((void **)&tensor, 64, tensor_hdr_size + size); |
76 | | if (size > 0) |
77 | | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; |
78 | | else |
79 | | tensor->data.u8 = 0; |
80 | | } |
81 | | #else |
82 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
83 | | ccmemalign((void **)&tensor, 64, tensor_hdr_size + size); |
84 | | if (size > 0) |
85 | | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; |
86 | | else |
87 | | tensor->data.u8 = 0; |
88 | | #endif |
89 | 47.1k | tensor->dataof = 0; |
90 | 47.1k | tensor->alias_ref = 0; |
91 | 47.1k | tensor->data_size = size; |
92 | 47.1k | tensor->sig = 0; |
93 | 47.1k | tensor->refcount = 1; |
94 | 47.1k | tensor->info = params; |
95 | 47.1k | if (tfb) |
96 | 4.33k | { |
97 | 4.33k | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
98 | | // This corresponding to mat->step |
99 | 4.33k | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
100 | 4.33k | } else |
101 | 42.8k | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
102 | 47.1k | return tensor; |
103 | 47.1k | } |
104 | | |
105 | | ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_file(const ccv_nnc_tensor_param_t params, const char* const filename, const off_t offset, const int flags) |
106 | 4 | { |
107 | 4 | ccv_nnc_tensor_t* tensor; |
108 | | // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t). |
109 | 4 | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC2 && params.dim[2] > 02 && params.dim[2] <= 0 CCV_MAX_CHANNEL0 && params.dim[0] > 00 && params.dim[1] > 00 && params.dim[3] == 00 ); |
110 | 4 | tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t)); |
111 | 4 | tensor->dataof = 0; |
112 | 4 | tensor->alias_ref = 0; |
113 | 4 | tensor->sig = 0; |
114 | 4 | tensor->refcount = 1; |
115 | 4 | tensor->info = params; |
116 | 4 | if (tfb) |
117 | 0 | { |
118 | 0 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
119 | | // This corresponding to mat->step |
120 | 0 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
121 | 0 | } else // This won't be recognized by ccv_dense_matrix_t |
122 | 4 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
123 | 4 | const size_t size = ccv_nnc_tensor_data_size(params); |
124 | 4 | #ifdef HAVE_CUDA |
125 | 4 | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
126 | 2 | { |
127 | | // Remove this flag so it can be deallocated as usual. |
128 | 2 | tensor->type &= ~CCV_NO_DATA_ALLOC; |
129 | 2 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
130 | 2 | if (size > 0) |
131 | 2 | { |
132 | | // This is not supported yet on CUDA. |
133 | 2 | tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size); |
134 | 2 | int fd = open(filename, O_RDONLY, 0); |
135 | 2 | cufileread(fd, offset, tensor->data.u8, size); |
136 | 2 | close(fd); |
137 | 2 | } else |
138 | 0 | tensor->data.u8 = 0; |
139 | 2 | } else { |
140 | 2 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
141 | 2 | if (size > 0) |
142 | 2 | { |
143 | 2 | int fd = open(filename, O_RDONLY, 0); |
144 | 2 | void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset); |
145 | 2 | close(fd); |
146 | 2 | madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED); |
147 | 2 | tensor->data.u8 = bufptr; |
148 | 2 | tensor->type |= CCV_MAPPED_MEM; |
149 | 2 | } else |
150 | 0 | tensor->data.u8 = 0; |
151 | 2 | } |
152 | | #elif defined(HAVE_MPS) |
153 | | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
154 | | { |
155 | | // Remove this flag so it can be deallocated as usual. |
156 | | tensor->type &= ~CCV_NO_DATA_ALLOC; |
157 | | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
158 | | if (size > 0) |
159 | | tensor->data.u8 = (uint8_t*)mpmemmap(filename, size, offset, flags); |
160 | | else |
161 | | tensor->data.u8 = 0; |
162 | | } else { |
163 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
164 | | if (size > 0) |
165 | | { |
166 | | int fd = open(filename, O_RDONLY, 0); |
167 | | void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset); |
168 | | close(fd); |
169 | | madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED); |
170 | | tensor->data.u8 = bufptr; |
171 | | tensor->type |= CCV_MAPPED_MEM; |
172 | | } else |
173 | | tensor->data.u8 = 0; |
174 | | } |
175 | | #else |
176 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
177 | | if (size > 0) |
178 | | { |
179 | | int fd = open(filename, O_RDONLY, 0); |
180 | | void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset); |
181 | | close(fd); |
182 | | madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED); |
183 | | tensor->data.u8 = bufptr; |
184 | | tensor->type |= CCV_MAPPED_MEM; |
185 | | } else |
186 | | tensor->data.u8 = 0; |
187 | | #endif |
188 | 4 | return tensor; |
189 | 4 | } |
190 | | |
191 | | ccv_nnc_tensor_t* ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params) |
192 | 1.27k | { |
193 | 1.27k | assert(!CCV_IS_TENSOR_VIEW(tensor)); |
194 | 1.27k | assert(tensor->type & CCV_UNMANAGED); |
195 | 1.27k | assert(tensor->data_size > 0); |
196 | 1.27k | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GET_MEMORY(tensor->info.type)); |
197 | 1.27k | assert(CCV_TENSOR_GET_DEVICE(params.type) == CCV_TENSOR_GET_DEVICE(tensor->info.type)); |
198 | 1.27k | const size_t size = ccv_nnc_tensor_data_size(params); |
199 | 1.27k | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC14 && params.dim[2] > 010 && params.dim[2] <= 10 CCV_MAX_CHANNEL10 && params.dim[0] > 010 && params.dim[1] > 010 && params.dim[3] == 010 ); |
200 | 1.27k | tensor->info = params; |
201 | 1.27k | #ifdef HAVE_CUDA |
202 | 1.27k | const int pinned_mem = (tensor->type & CCV_PINNED_MEM); |
203 | 1.27k | #endif |
204 | 1.27k | if (tfb) |
205 | 10 | { |
206 | 10 | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
207 | | // This corresponding to mat->step |
208 | 10 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
209 | 10 | } else |
210 | 1.26k | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
211 | 1.27k | if (size <= tensor->data_size) // Nothing. |
212 | 1.27k | { |
213 | 1.27k | #ifdef HAVE_CUDA |
214 | 1.27k | if (pinned_mem) |
215 | 4 | tensor->type |= CCV_PINNED_MEM; |
216 | 1.27k | #endif |
217 | 1.27k | return tensor; |
218 | 1.27k | } |
219 | 1 | ccv_nnc_tensor_t* new_tensor = tensor; |
220 | 1 | const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64; |
221 | 1 | #ifdef HAVE_CUDA |
222 | 1 | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
223 | 0 | { |
224 | 0 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
225 | 0 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type); |
226 | 0 | assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)); |
227 | 0 | cufree(device_id, tensor->data.u8); |
228 | 0 | new_tensor->data.u8 = (uint8_t*)cumalloc(device_id, size); |
229 | 1 | } else { |
230 | 1 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
231 | 1 | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY); |
232 | | // pin memory again. |
233 | 1 | if (pinned_mem) |
234 | 0 | cuunregister(new_tensor->data.u8); |
235 | 1 | new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size); |
236 | 1 | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; |
237 | 1 | } |
238 | | #elif defined(HAVE_MPS) |
239 | | if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY) |
240 | | { |
241 | | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY); |
242 | | const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type); |
243 | | assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)); |
244 | | mpobjfree(device_id, tensor->data.u8); |
245 | | new_tensor->data.u8 = (uint8_t*)mpobjmalloc(device_id, size); |
246 | | } else { |
247 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
248 | | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY); |
249 | | new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size); |
250 | | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; |
251 | | } |
252 | | #else |
253 | | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
254 | | new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size); |
255 | | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; |
256 | | #endif |
257 | 1 | new_tensor->data_size = size; |
258 | 1 | #ifdef HAVE_CUDA |
259 | 1 | if (pinned_mem) |
260 | 0 | ccv_nnc_tensor_pin_memory(new_tensor); |
261 | 1 | #endif |
262 | 1 | return new_tensor; |
263 | 1 | } |
264 | | |
265 | | ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags) |
266 | 78.0k | { |
267 | | // this specific form can be toll-free bridging to ccv_dense_matrix_t |
268 | 78.0k | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC69.2k && params.dim[2] > 068.9k && params.dim[2] <= 899 CCV_MAX_CHANNEL899 && params.dim[0] > 0899 && params.dim[1] > 0899 && params.dim[3] == 0899 ); |
269 | 78.0k | ccv_nnc_tensor_t tensor; |
270 | 78.0k | tensor.dataof = 0; |
271 | 78.0k | tensor.alias_ref = 0; |
272 | 78.0k | tensor.sig = 0; |
273 | 78.0k | tensor.refcount = 1; |
274 | 78.0k | tensor.info = params; |
275 | 78.0k | if (flags & CCV_TENSOR_CPU_MEMORY) |
276 | 0 | { |
277 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY); |
278 | 78.0k | } else if (flags & CCV_TENSOR_GPU_MEMORY) { |
279 | 0 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY); |
280 | 0 | } |
281 | 78.0k | if (tfb) |
282 | 192 | { |
283 | 192 | tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]; |
284 | | // This corresponding to mat->step |
285 | 192 | tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2])); |
286 | 192 | } else // This won't be recognized by ccv_dense_matrix_t |
287 | 77.8k | tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype); |
288 | 78.0k | if (params.dim[0] > 0) |
289 | 78.0k | tensor.data.u8 = (uint8_t*)ptr; |
290 | 0 | else |
291 | 0 | tensor.data.u8 = 0; |
292 | 78.0k | tensor.data_size = 0; |
293 | 78.0k | return tensor; |
294 | 78.0k | } |
295 | | |
296 | | int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor) |
297 | 1.40k | { |
298 | 1.40k | #ifdef HAVE_CUDA |
299 | 1.40k | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY); |
300 | 1.40k | if (!(tensor->type & CCV_PINNED_MEM) && tensor->data_size146 ) |
301 | 146 | { |
302 | 146 | const int success = curegister(tensor->data.u8, tensor->data_size); |
303 | 146 | if (success) |
304 | 146 | tensor->type |= CCV_PINNED_MEM; |
305 | 146 | return success ? 0 : -10 ; |
306 | 146 | } |
307 | 1.25k | #endif |
308 | 1.25k | return 0; |
309 | 1.40k | } |
310 | | |
311 | | void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor) |
312 | 48.8k | { |
313 | 48.8k | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY && tensor->type & CCV_MAPPED_MEM45.6k ) |
314 | 2 | { |
315 | | // The size might be different than the ones when we allocated (for example, the tensor might rewrite its size to be smaller). |
316 | | // This might cause issues in the future. |
317 | 2 | const size_t size = ccv_nnc_tensor_data_size(tensor->info); |
318 | 2 | munmap(tensor->data.u8, size); |
319 | 2 | } |
320 | 48.8k | #ifdef HAVE_CUDA |
321 | 48.8k | if (tensor->type & CCV_PINNED_MEM) |
322 | 146 | cuunregister(tensor->data.u8); |
323 | 48.8k | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY && |
324 | 48.8k | !(tensor->type & CCV_NO_DATA_ALLOC)3.14k ) // If this is GPU memory and it is allocated, free. |
325 | 2.65k | cufree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8); |
326 | | #elif defined(HAVE_MPS) |
327 | | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY && |
328 | | !(tensor->type & CCV_NO_DATA_ALLOC)) // If this is GPU memory and it is allocated, free. |
329 | | mpobjfree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8); |
330 | | #endif |
331 | 48.8k | ccfree(tensor); |
332 | 48.8k | } |
333 | | |
334 | | static inline void _ccv_nnc_tensor_view_set(ccv_nnc_tensor_view_t* const tv, const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]) |
335 | 165 | { |
336 | 165 | memcpy(tv->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
337 | 165 | memcpy(tv->info.dim, dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
338 | 165 | uint8_t* const p = tensor->data.u8; |
339 | 165 | const off_t off = tv->off = ccv_nnc_tensor_view_offset(tv->info.datatype, stride, ofs); |
340 | 165 | tv->contiguous = ccv_nnc_tensor_view_is_contiguous(dim, stride); |
341 | 165 | assert(off + CCV_GET_DATA_TYPE_SIZE(tv->info.datatype) * ccv_nnc_dimension_upper_bound(tv->info.dim, tv->stride) <= CCV_GET_DATA_TYPE_SIZE(tensor->info.datatype) * ccv_nnc_tensor_count(tensor->info)); |
342 | 165 | ccv_nnc_tensor_data(tv->info, p, off + tensor->dataof, &tv->data, &tv->dataof); |
343 | 165 | } |
344 | | |
345 | | ccv_nnc_tensor_view_t* ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]) |
346 | 84 | { |
347 | 84 | ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)ccmalloc(sizeof(ccv_nnc_tensor_view_t)); |
348 | 84 | tv->type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW; |
349 | 84 | tv->dataof = 0; |
350 | 84 | tv->alias_ref = (uintptr_t)tensor; |
351 | 84 | tv->refcount = 1; |
352 | 84 | tv->sig = 0; |
353 | 84 | tv->data_size = 0; |
354 | 84 | assert(params.type == tensor->info.type); |
355 | 84 | assert(params.datatype == tensor->info.datatype); |
356 | 84 | tv->info = params; |
357 | 84 | _ccv_nnc_tensor_view_set(tv, tensor, params.dim, ofs, stride); |
358 | 84 | return tv; |
359 | 84 | } |
360 | | |
361 | | ccv_nnc_tensor_view_t ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]) |
362 | 81 | { |
363 | 81 | assert(!CCV_IS_TENSOR_VIEW(tensor)); |
364 | 81 | assert(params.type == tensor->info.type); |
365 | 81 | assert(params.datatype == tensor->info.datatype); |
366 | 81 | ccv_nnc_tensor_view_t tv = { |
367 | 81 | .dataof = 0, |
368 | 81 | .alias_ref = (uintptr_t)tensor, |
369 | 81 | .type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW, // clean up the channel bits, and then add CCV_TENSOR_VIEW identifier |
370 | 81 | .refcount = 1, |
371 | 81 | .sig = 0, |
372 | 81 | .info = params, |
373 | 81 | .data_size = 0, |
374 | 81 | }; |
375 | 81 | _ccv_nnc_tensor_view_set(&tv, tensor, params.dim, ofs, stride); |
376 | 81 | return tv; |
377 | 81 | } |
378 | | |
379 | | void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view) |
380 | 84 | { |
381 | 84 | ccfree(tensor_view); |
382 | 84 | } |
383 | | |
384 | | void _ccv_nnc_tensor_set_zero(unsigned char* u8, const int nd, const int* const dim, const int* const stride, const size_t data_size) |
385 | 106 | { |
386 | 106 | if (nd == 1) |
387 | 0 | { |
388 | 0 | if (stride[0] == 1) |
389 | 0 | { |
390 | 0 | memset(u8, 0, data_size * dim[0]); |
391 | 0 | return; |
392 | 0 | } |
393 | 0 | int i; |
394 | 0 | for (i = 0; i < dim[0]; i++) |
395 | 0 | memset(u8 + i * stride[0] * data_size, 0, data_size); |
396 | 106 | } else if (nd == 2) { |
397 | 1 | if (stride[1] == 1 && stride[0] == dim[1]) |
398 | 0 | { |
399 | 0 | memset(u8, 0, data_size * dim[1] * dim[0]); |
400 | 0 | return; |
401 | 0 | } |
402 | 1 | int x, y; |
403 | 4 | for (y = 0; y < dim[0]; y++3 ) |
404 | 3 | { |
405 | 3 | unsigned char* const u8y = u8 + y * stride[0] * data_size; |
406 | 9 | for (x = 0; x < dim[1]; x++6 ) |
407 | 6 | memset(u8y + x * stride[1] * data_size, 0, data_size); |
408 | 3 | } |
409 | 105 | } else if (nd == 3) { |
410 | 0 | if (stride[2] == 1 && stride[1] == dim[2] && stride[0] == dim[1] * dim[2]) |
411 | 0 | { |
412 | 0 | memset(u8, 0, data_size * dim[2] * dim[1] * dim[0]); |
413 | 0 | return; |
414 | 0 | } |
415 | 0 | int x, y, z; |
416 | 0 | for (z = 0; z < dim[0]; z++) |
417 | 0 | { |
418 | 0 | unsigned char* const u8z = u8 + z * stride[0] * data_size; |
419 | 0 | for (y = 0; y < dim[1]; y++) |
420 | 0 | { |
421 | 0 | unsigned char* const u8y = u8z + y * stride[1] * data_size; |
422 | 0 | for (x = 0; x < dim[2]; x++) |
423 | 0 | memset(u8y + x * stride[2] * data_size, 0, data_size); |
424 | 0 | } |
425 | 0 | } |
426 | 105 | } else if (nd == 4) { |
427 | 96 | if (stride[3] == 1 && stride[2] == dim[3] && stride[1] == dim[2] * dim[3]0 && stride[0] == dim[1] * dim[2] * dim[3]0 ) |
428 | 0 | { |
429 | 0 | memset(u8, 0, data_size * dim[3] * dim[2] * dim[1] * dim[0]); |
430 | 0 | return; |
431 | 0 | } |
432 | 96 | int x, y, z, s; |
433 | 1.53k | for (s = 0; s < dim[0]; s++1.44k ) |
434 | 1.44k | { |
435 | 1.44k | unsigned char* const u8s = u8 + s * stride[0] * data_size; |
436 | 4.32k | for (z = 0; z < dim[1]; z++2.88k ) |
437 | 2.88k | { |
438 | 2.88k | unsigned char* const u8z = u8s + z * stride[1] * data_size; |
439 | 11.5k | for (y = 0; y < dim[2]; y++8.64k ) |
440 | 8.64k | { |
441 | 8.64k | unsigned char* const u8y = u8z + y * stride[2] * data_size; |
442 | 43.2k | for (x = 0; x < dim[3]; x++34.5k ) |
443 | 34.5k | memset(u8y + x * stride[3] * data_size, 0, data_size); |
444 | 8.64k | } |
445 | 2.88k | } |
446 | 1.44k | } |
447 | 96 | } else { |
448 | 9 | int i; |
449 | 113 | for (i = 0; i < dim[0]; i++104 ) |
450 | 104 | _ccv_nnc_tensor_set_zero(u8 + i * stride[0] * data_size, nd - 1, dim + 1, stride + 1, data_size); |
451 | 9 | } |
452 | 106 | } |
453 | | |
454 | | void ccv_nnc_tensor_zero(void* const tensor) |
455 | 23.6k | { |
456 | 23.6k | ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)tensor; |
457 | 23.6k | const size_t data_size = CCV_GET_DATA_TYPE_SIZE(tv->info.datatype); |
458 | 23.6k | if (CCV_IS_TENSOR_CONTIGUOUS(tv)) |
459 | 23.6k | { |
460 | 23.6k | memset(tv->data.u8, 0, data_size * ccv_nnc_tensor_count(tv->info)); |
461 | 23.6k | return; |
462 | 23.6k | } |
463 | 2 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
464 | 2 | assert(nd >= 1); |
465 | 2 | const int* const tvstride = tv->stride; |
466 | | // Go through this recursively. |
467 | 2 | _ccv_nnc_tensor_set_zero(tv->data.u8, nd, tv->info.dim, tvstride, data_size); |
468 | 2 | } |
469 | | |
470 | | int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b) |
471 | 686 | { |
472 | 686 | assert(!CCV_IS_TENSOR_VIEW(a)); |
473 | 686 | assert(!CCV_IS_TENSOR_VIEW(b)); |
474 | | // If a is a dense matrix, just use ccv_matrix_eq |
475 | 686 | if (CCV_TENSOR_IS_DENSE_MATRIX(a->type)) |
476 | 110 | return ccv_matrix_eq((ccv_matrix_t*)a, (ccv_matrix_t*)b); |
477 | | // Otherwise, do our own thing. |
478 | 576 | if (CCV_GET_DATA_TYPE(a->type) != CCV_GET_DATA_TYPE(b->type)) |
479 | 0 | return -1; |
480 | 576 | int i, c = 1; |
481 | 1.66k | for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; i++1.08k ) |
482 | 1.66k | { |
483 | 1.66k | if (!a->info.dim[i] && !b->info.dim[i]576 ) |
484 | 576 | break; |
485 | 1.08k | if (a->info.dim[i] != b->info.dim[i]) |
486 | 0 | return -1; |
487 | 1.08k | c *= a->info.dim[i]; |
488 | 1.08k | } |
489 | 576 | if (CCV_GET_DATA_TYPE(a->type) == CCV_32S) |
490 | 12 | return memcmp(a->data.i32, b->data.i32, sizeof(int) * c) == 0 ? 0 : -10 ; |
491 | | // Only support 32F at this point. |
492 | 576 | assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F || CCV_GET_DATA_TYPE(a->type) == CCV_64F)564 ; |
493 | | // Read: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm |
494 | | // http://floating-point-gui.de/errors/comparison/ |
495 | 564 | if (CCV_GET_DATA_TYPE(a->type) == CCV_32F) |
496 | 562 | { |
497 | 562 | static const float epsi = FLT_EPSILON; |
498 | 562 | static const int32_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same. |
499 | 30.3M | for (i = 0; i < c; i++30.3M ) |
500 | 30.3M | { |
501 | | // Although this is float point, I use integer as a way to compare. |
502 | 30.3M | int32_t i32a = a->data.i32[i]; |
503 | 30.3M | if (i32a < 0) |
504 | 4.82M | i32a = 0x80000000 - i32a; |
505 | 30.3M | int32_t i32b = b->data.i32[i]; |
506 | 30.3M | if (i32b < 0) |
507 | 4.82M | i32b = 0x80000000 - i32b; |
508 | 30.3M | if (abs(i32a - i32b) > ulps && fabsf(a->data.f32[i] - b->data.f32[i]) > epsi9.11k ) |
509 | 0 | return -1; |
510 | 30.3M | } |
511 | 562 | } else if (2 CCV_GET_DATA_TYPE2 (a->type) == CCV_64F2 ) { |
512 | 2 | typedef union { |
513 | 2 | double f64; |
514 | 2 | int64_t i64; |
515 | 2 | } Float64; |
516 | 2 | static const double epsi = DBL_EPSILON; |
517 | 2 | static const int64_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same. |
518 | 15.8k | for (i = 0; i < c; i++15.8k ) |
519 | 15.8k | { |
520 | | // Although this is float point, I use integer as a way to compare. |
521 | 15.8k | Float64 f64a, f64b; |
522 | 15.8k | f64a.f64 = a->data.f64[i]; |
523 | 15.8k | f64b.f64 = b->data.f64[i]; |
524 | 15.8k | if (f64a.i64 < 0) |
525 | 0 | f64a.i64 = 0x8000000000000000 - f64a.i64; |
526 | 15.8k | if (f64b.i64 < 0) |
527 | 0 | f64b.i64 = 0x8000000000000000 - f64b.i64; |
528 | 15.8k | if (llabs(f64a.i64 - f64b.i64) > ulps && fabs(a->data.f64[i] - b->data.f64[i]) > epsi0 ) |
529 | 0 | return -1; |
530 | 15.8k | } |
531 | 2 | } |
532 | 564 | return 0; |
533 | 564 | } |
534 | | |
535 | | static void _strcat(char** str, int* written, size_t* len, char* from, int from_size) |
536 | 1.34k | { |
537 | 1.34k | if (*len - *written < from_size) |
538 | 0 | { |
539 | 0 | *len += from_size * 2; |
540 | 0 | *str = (char*)ccrealloc(*str, *len); |
541 | 0 | } |
542 | 1.34k | memcpy(*str + *written, from, from_size); |
543 | 1.34k | *written += from_size; |
544 | 1.34k | } |
545 | | |
546 | 648 | #define _STRPRINTF(str, written, len, format, ...) \ |
547 | 648 | do { \ |
548 | 648 | const int newly_written = snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \ |
549 | 648 | if ((len) - (written) < newly_written) \ |
550 | 648 | { \ |
551 | 0 | (len) += newly_written * 2; \ |
552 | 0 | (str) = (char*)ccrealloc((str), (len)); \ |
553 | 0 | (written) += snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \ |
554 | 0 | } else \ |
555 | 648 | (written) += newly_written; \ |
556 | 648 | } while (0) |
557 | | |
558 | | static void _strv(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int i) |
559 | 648 | { |
560 | 648 | if (a->info.datatype == CCV_32F) |
561 | 0 | _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f32[i]); |
562 | 648 | else if (a->info.datatype == CCV_64F) |
563 | 0 | _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f64[i]); |
564 | 648 | else if (a->info.datatype == CCV_16F) { |
565 | 0 | float v; |
566 | 0 | ccv_half_precision_to_float((uint16_t*)(a->data.f16 + i), &v, 1); |
567 | 0 | _STRPRINTF(*str, *written, *len, "%10.5g", v); |
568 | 648 | } else if (a->info.datatype == CCV_32S) |
569 | 648 | _STRPRINTF(*str, *written, *len, "%10d", a->data.i32[i]); |
570 | 0 | else if (a->info.datatype == CCV_64S) |
571 | 0 | _STRPRINTF(*str, *written, *len, "%12lld", (long long int)a->data.i64[i]); |
572 | 0 | else if (a->info.datatype == CCV_8U) |
573 | 0 | _STRPRINTF(*str, *written, *len, "%3d", (int)a->data.u8[i]); |
574 | 648 | } |
575 | | |
576 | | static void _strt(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int nd, int spacer, const int* const dim, const int* const stride, int idx) |
577 | 28 | { |
578 | 28 | assert(nd != 1); |
579 | 28 | if (nd == 2) |
580 | 17 | { |
581 | | // Print columns and the rows. |
582 | 17 | int i, j, k; |
583 | 17 | if (dim[0] <= 8) |
584 | 1 | { |
585 | 5 | for (i = 0; i < dim[0]; i++4 ) |
586 | 4 | { |
587 | 4 | if (i != 0) |
588 | 3 | { |
589 | 3 | _strcat(str, written, len, " ", 2); |
590 | 3 | for (k = 0; k < spacer; k++0 ) |
591 | 0 | _strcat(str, written, len, " ", 1); |
592 | 3 | } |
593 | 4 | _strcat(str, written, len, "[", 1); |
594 | 4 | if (dim[1] <= 8) |
595 | 4 | { |
596 | 20 | for (j = 0; j < dim[1]; j++16 ) |
597 | 16 | { |
598 | 16 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
599 | 16 | if (j < dim[1] - 1) |
600 | 12 | _strcat(str, written, len, ", ", 2); |
601 | 16 | } |
602 | 4 | if (i < dim[0] - 1) |
603 | 3 | _strcat(str, written, len, "],\n", 3); |
604 | 4 | } else { |
605 | 0 | for (j = 0; j < 3; j++) |
606 | 0 | { |
607 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
608 | 0 | _strcat(str, written, len, ", ", 2); |
609 | 0 | } |
610 | 0 | _strcat(str, written, len, " ..., ", 6); |
611 | 0 | for (j = dim[1] - 3; j < dim[1]; j++) |
612 | 0 | { |
613 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
614 | 0 | if (j < dim[1] - 1) |
615 | 0 | _strcat(str, written, len, ", ", 2); |
616 | 0 | } |
617 | 0 | if (i < dim[0] - 1) |
618 | 0 | _strcat(str, written, len, "],\n", 3); |
619 | 0 | } |
620 | 4 | } |
621 | 1 | _strcat(str, written, len, "]", 1); |
622 | 16 | } else { |
623 | 64 | for (i = 0; i < 3; i++48 ) |
624 | 48 | { |
625 | 48 | if (i != 0) |
626 | 32 | { |
627 | 32 | _strcat(str, written, len, " ", 2); |
628 | 128 | for (k = 0; k < spacer; k++96 ) |
629 | 96 | _strcat(str, written, len, " ", 1); |
630 | 32 | } |
631 | 48 | _strcat(str, written, len, "[", 1); |
632 | 48 | if (dim[1] <= 8) |
633 | 0 | { |
634 | 0 | for (j = 0; j < dim[1]; j++) |
635 | 0 | { |
636 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
637 | 0 | if (j < dim[1] - 1) |
638 | 0 | _strcat(str, written, len, ", ", 2); |
639 | 0 | } |
640 | 0 | _strcat(str, written, len, "],\n", 3); |
641 | 48 | } else { |
642 | 192 | for (j = 0; j < 3; j++144 ) |
643 | 144 | { |
644 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
645 | 144 | _strcat(str, written, len, ", ", 2); |
646 | 144 | } |
647 | 48 | _strcat(str, written, len, " ..., ", 6); |
648 | 192 | for (j = dim[1] - 3; j < dim[1]; j++144 ) |
649 | 144 | { |
650 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
651 | 144 | if (j < dim[1] - 1) |
652 | 96 | _strcat(str, written, len, ", ", 2); |
653 | 144 | } |
654 | 48 | _strcat(str, written, len, "],\n", 3); |
655 | 48 | } |
656 | 48 | } |
657 | 16 | _strcat(str, written, len, " ", 2); |
658 | 64 | for (k = 0; k < spacer; k++48 ) |
659 | 48 | _strcat(str, written, len, " ", 1); |
660 | 16 | _strcat(str, written, len, "...,\n", 5); |
661 | 64 | for (i = dim[0] - 3; i < dim[0]; i++48 ) |
662 | 48 | { |
663 | 48 | _strcat(str, written, len, " ", 2); |
664 | 192 | for (k = 0; k < spacer; k++144 ) |
665 | 144 | _strcat(str, written, len, " ", 1); |
666 | 48 | _strcat(str, written, len, "[", 1); |
667 | 48 | if (dim[1] < 8) |
668 | 0 | { |
669 | 0 | for (j = 0; j < dim[1]; j++) |
670 | 0 | { |
671 | 0 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
672 | 0 | if (j < dim[1] - 1) |
673 | 0 | _strcat(str, written, len, ", ", 2); |
674 | 0 | } |
675 | 0 | if (i < dim[0] - 1) |
676 | 0 | _strcat(str, written, len, "],\n", 3); |
677 | 48 | } else { |
678 | 192 | for (j = 0; j < 3; j++144 ) |
679 | 144 | { |
680 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
681 | 144 | _strcat(str, written, len, ", ", 2); |
682 | 144 | } |
683 | 48 | _strcat(str, written, len, " ..., ", 6); |
684 | 192 | for (j = dim[1] - 3; j < dim[1]; j++144 ) |
685 | 144 | { |
686 | 144 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); |
687 | 144 | if (j < dim[1] - 1) |
688 | 96 | _strcat(str, written, len, ", ", 2); |
689 | 144 | } |
690 | 48 | if (i < dim[0] - 1) |
691 | 32 | _strcat(str, written, len, "],\n", 3); |
692 | 48 | } |
693 | 48 | } |
694 | 16 | _strcat(str, written, len, "]", 1); |
695 | 16 | } |
696 | 17 | return; |
697 | 17 | } |
698 | 11 | int i, j; |
699 | 11 | if (dim[0] > 4) |
700 | 2 | { |
701 | 6 | for (i = 0; i < 2; i++4 ) |
702 | 4 | { |
703 | 4 | _strcat(str, written, len, "[", 1); |
704 | 4 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); |
705 | 4 | _strcat(str, written, len, "],\n ", 5); |
706 | 8 | for (j = 0; j < spacer; j++4 ) |
707 | 4 | _strcat(str, written, len, " ", 1); |
708 | 4 | } |
709 | 2 | _strcat(str, written, len, "...,\n", 5); |
710 | 2 | _strcat(str, written, len, " ", 2); |
711 | 4 | for (j = 0; j < spacer; j++2 ) |
712 | 2 | _strcat(str, written, len, " ", 1); |
713 | 6 | for (i = dim[0] - 2; i < dim[0]; i++4 ) |
714 | 4 | { |
715 | 4 | _strcat(str, written, len, "[", 1); |
716 | 4 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); |
717 | 4 | if (i < dim[0] - 1) |
718 | 2 | { |
719 | 2 | _strcat(str, written, len, "],\n ", 5); |
720 | 4 | for (j = 0; j < spacer; j++2 ) |
721 | 2 | _strcat(str, written, len, " ", 1); |
722 | 2 | } |
723 | 4 | } |
724 | 2 | _strcat(str, written, len, "]", 1); |
725 | 9 | } else { |
726 | 27 | for (i = 0; i < dim[0]; i++18 ) |
727 | 18 | { |
728 | 18 | _strcat(str, written, len, "[", 1); |
729 | 18 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); |
730 | 18 | if (i < dim[0] - 1) |
731 | 9 | { |
732 | 9 | _strcat(str, written, len, "],\n", 3); |
733 | 9 | _strcat(str, written, len, " ", 2); |
734 | 25 | for (j = 0; j < spacer; j++16 ) |
735 | 16 | _strcat(str, written, len, " ", 1); |
736 | 9 | } |
737 | 18 | } |
738 | 9 | _strcat(str, written, len, "]", 1); |
739 | 9 | } |
740 | 11 | } |
741 | | |
742 | | char* ccv_nnc_tensor_format_new(const ccv_nnc_tensor_t* const a) |
743 | 4 | { |
744 | 4 | const int nd = ccv_nnc_tensor_nd(a->info.dim); |
745 | 4 | int i; |
746 | 4 | int rows = 8; // 8 rows for the first one, and then just first and last. |
747 | 7 | for (i = 2; i < nd; i++3 ) |
748 | 3 | rows *= 5; // Maximum 3 rows beyond the first two. |
749 | 4 | int columns = nd * 2 + 16 * 8; |
750 | 4 | size_t len = sizeof(char) * columns * rows; |
751 | | // Allocate return string buffer. |
752 | 4 | char* str = (char*)ccmalloc(len); |
753 | 4 | int written = 0; |
754 | 4 | int stride[CCV_NNC_MAX_DIM_ALLOC]; |
755 | 4 | if (CCV_IS_TENSOR_VIEW(a)) |
756 | 0 | memcpy(stride, ((ccv_nnc_tensor_view_t*)a)->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
757 | 4 | else |
758 | 4 | ccv_nnc_tensor_get_stride(a->info.dim, stride); |
759 | 4 | _strcat(&str, &written, &len, "[\n ", 4); |
760 | 4 | if (nd == 1) |
761 | 2 | { |
762 | | // Special casing for vector. |
763 | 2 | if (a->info.dim[0] <= 64) |
764 | 13 | for (i = 0; 1 i < a->info.dim[0]; i++12 ) |
765 | 12 | { |
766 | 12 | _strv(&str, &written, &len, a, i * stride[0]); |
767 | 12 | if (i < a->info.dim[0] - 1) |
768 | 11 | { |
769 | 11 | if ((i + 1) % 8 == 0) |
770 | 1 | _strcat(&str, &written, &len, ",\n ", 4); |
771 | 10 | else |
772 | 10 | _strcat(&str, &written, &len, ", ", 2); |
773 | 11 | } |
774 | 12 | } |
775 | 1 | else { |
776 | | // First 3 rows. |
777 | 25 | for (i = 0; i < 24; i++24 ) |
778 | 24 | { |
779 | 24 | _strv(&str, &written, &len, a, i * stride[0]); |
780 | 24 | if ((i + 1) % 8 == 0) |
781 | 3 | _strcat(&str, &written, &len, ",\n ", 4); |
782 | 21 | else |
783 | 21 | _strcat(&str, &written, &len, ", ", 2); |
784 | 24 | } |
785 | 1 | _strcat(&str, &written, &len, "...,\n ", 7); |
786 | | // Last 3 rows (aligned to 8 items per row). |
787 | 1 | int start = ((a->info.dim[0] + 7) / 8 - 3) * 8; |
788 | 21 | for (i = start; i < a->info.dim[0]; i++20 ) |
789 | 20 | { |
790 | 20 | _strv(&str, &written, &len, a, i * stride[0]); |
791 | 20 | if (i < a->info.dim[0] - 1) |
792 | 19 | { |
793 | 19 | if ((i + 1) % 8 == 0) |
794 | 2 | _strcat(&str, &written, &len, ",\n ", 4); |
795 | 17 | else |
796 | 17 | _strcat(&str, &written, &len, ", ", 2); |
797 | 19 | } |
798 | 20 | } |
799 | 1 | } |
800 | 2 | } else { |
801 | 2 | _strt(&str, &written, &len, a, nd, 0, a->info.dim, stride, 0); |
802 | 2 | } |
803 | 4 | _strcat(&str, &written, &len, "\n]", 3); // Including the terminal \0. |
804 | 4 | str = (char*)ccrealloc(str, written); // Don't need the extra spaces. |
805 | 4 | return str; |
806 | 4 | } |