File: | nnc/ccv_nnc_tensor.c |
Warning: | line 221, column 4 Null pointer passed to 1st parameter expecting 'nonnull' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" | |||
2 | #include "ccv_nnc_easy.h" | |||
3 | #include "ccv_nnc_internal.h" | |||
4 | #ifdef HAVE_CUDA1 | |||
5 | #include "gpu/ccv_nnc_compat.h" | |||
6 | #elif defined(HAVE_MPS) | |||
7 | #include "mps/ccv_nnc_mps.h" | |||
8 | #endif | |||
9 | #include <fcntl.h> | |||
10 | #include <sys/mman.h> | |||
11 | ||||
12 | // MARK - Level-1 API | |||
13 | ||||
14 | const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC(12)] = {0}; | |||
15 | ||||
16 | ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags) | |||
17 | { | |||
18 | ccv_nnc_tensor_t* tensor; | |||
19 | // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t). | |||
20 | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0); | |||
21 | if (ptr
| |||
22 | { | |||
23 | tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t)); | |||
24 | tensor->dataof = 0; | |||
25 | tensor->alias_ref = 0; | |||
26 | tensor->sig = 0; | |||
27 | tensor->refcount = 1; | |||
28 | tensor->info = params; | |||
29 | if (tfb
| |||
30 | { | |||
31 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2]; | |||
32 | // This corresponding to mat->step | |||
33 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype ) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12 ] * (((((params.datatype) & 0xFF000) | params.dim[2])) & 0xFFF) + 3) & -4); | |||
34 | } else // This won't be recognized by ccv_dense_matrix_t | |||
35 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000); | |||
36 | tensor->data.u8 = (uint8_t*)ptr; | |||
37 | return tensor; | |||
38 | } | |||
39 | if (flags & CCV_TENSOR_CPU_MEMORY) | |||
40 | { | |||
41 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 41, __extension__ __PRETTY_FUNCTION__); })); | |||
42 | } else if (flags & CCV_TENSOR_GPU_MEMORY) { | |||
43 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY" , "ccv_nnc_tensor.c", 43, __extension__ __PRETTY_FUNCTION__); })); | |||
44 | } | |||
45 | const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64; | |||
46 | const size_t size = ccv_nnc_tensor_data_size(params); | |||
47 | #ifdef HAVE_CUDA1 | |||
48 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
49 | { | |||
50 | tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t)); | |||
51 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00 ) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY" , "ccv_nnc_tensor.c", 51, __extension__ __PRETTY_FUNCTION__); })); | |||
52 | if (size > 0) | |||
53 | tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size); | |||
54 | else | |||
55 | tensor->data.u8 = 0; | |||
56 | } else { | |||
57 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 57, __extension__ __PRETTY_FUNCTION__); })); | |||
58 | ccmemalignposix_memalign((void **)&tensor, 64, tensor_hdr_size + size); | |||
59 | if (size > 0) | |||
60 | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; | |||
61 | else | |||
62 | tensor->data.u8 = 0; | |||
63 | } | |||
64 | #elif defined(HAVE_MPS) | |||
65 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
66 | { | |||
67 | tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t)); | |||
68 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00 ) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY" , "ccv_nnc_tensor.c", 68, __extension__ __PRETTY_FUNCTION__); })); | |||
69 | if (size > 0) | |||
70 | tensor->data.u8 = (uint8_t*)mpobjmalloc(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size); | |||
71 | else | |||
72 | tensor->data.u8 = 0; | |||
73 | } else { | |||
74 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 74, __extension__ __PRETTY_FUNCTION__); })); | |||
75 | ccmemalignposix_memalign((void **)&tensor, 64, tensor_hdr_size + size); | |||
76 | if (size > 0) | |||
77 | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; | |||
78 | else | |||
79 | tensor->data.u8 = 0; | |||
80 | } | |||
81 | #else | |||
82 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 82, __extension__ __PRETTY_FUNCTION__); })); | |||
83 | ccmemalignposix_memalign((void **)&tensor, 64, tensor_hdr_size + size); | |||
84 | if (size > 0) | |||
85 | tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size; | |||
86 | else | |||
87 | tensor->data.u8 = 0; | |||
88 | #endif | |||
89 | tensor->dataof = 0; | |||
90 | tensor->alias_ref = 0; | |||
91 | tensor->data_size = size; | |||
92 | tensor->sig = 0; | |||
93 | tensor->refcount = 1; | |||
94 | tensor->info = params; | |||
95 | if (tfb) | |||
96 | { | |||
97 | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2]; | |||
98 | // This corresponding to mat->step | |||
99 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype ) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12 ] * (((((params.datatype) & 0xFF000) | params.dim[2])) & 0xFFF) + 3) & -4); | |||
100 | } else | |||
101 | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000); | |||
102 | return tensor; | |||
103 | } | |||
104 | ||||
105 | ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_file(const ccv_nnc_tensor_param_t params, const char* const filename, const off_t offset, const int flags) | |||
106 | { | |||
107 | ccv_nnc_tensor_t* tensor; | |||
108 | // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t). | |||
109 | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0); | |||
110 | tensor = (ccv_nnc_tensor_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_t)); | |||
111 | tensor->dataof = 0; | |||
112 | tensor->alias_ref = 0; | |||
113 | tensor->sig = 0; | |||
114 | tensor->refcount = 1; | |||
115 | tensor->info = params; | |||
116 | if (tfb) | |||
117 | { | |||
118 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2]; | |||
119 | // This corresponding to mat->step | |||
120 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype ) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12 ] * (((((params.datatype) & 0xFF000) | params.dim[2])) & 0xFFF) + 3) & -4); | |||
121 | } else // This won't be recognized by ccv_dense_matrix_t | |||
122 | tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000); | |||
123 | const size_t size = ccv_nnc_tensor_data_size(params); | |||
124 | #ifdef HAVE_CUDA1 | |||
125 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
126 | { | |||
127 | // Remove this flag so it can be deallocated as usual. | |||
128 | tensor->type &= ~CCV_NO_DATA_ALLOC; | |||
129 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00 ) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY" , "ccv_nnc_tensor.c", 129, __extension__ __PRETTY_FUNCTION__) ; })); | |||
130 | if (size > 0) | |||
131 | { | |||
132 | void* ptr = 0; | |||
133 | // This is not supported yet on CUDA. | |||
134 | if (flags & CCV_NNC_TENSOR_MEMORY_MAP_ON_DEMAND) | |||
135 | ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size); | |||
136 | if (ptr) // If allocated successfully. Otherwise we go through the fallback path. | |||
137 | { | |||
138 | tensor->data.u8 = (uint8_t*)ptr; | |||
139 | int fd = open(filename, O_RDONLY00, 0); | |||
140 | cufileread(fd, offset, tensor->data.u8, size); | |||
141 | close(fd); | |||
142 | cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), tensor->data.u8, size); | |||
143 | tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call. | |||
144 | } else { | |||
145 | tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8), size); | |||
146 | int fd = open(filename, O_RDONLY00, 0); | |||
147 | cufileread(fd, offset, tensor->data.u8, size); | |||
148 | close(fd); | |||
149 | } | |||
150 | } else | |||
151 | tensor->data.u8 = 0; | |||
152 | } else { | |||
153 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 153, __extension__ __PRETTY_FUNCTION__) ; })); | |||
154 | if (size > 0) | |||
155 | { | |||
156 | int fd = open(filename, O_RDONLY00, 0); | |||
157 | void* bufptr = mmap(0, size, PROT_READ0x1, MAP_PRIVATE0x02, fd, offset); | |||
158 | close(fd); | |||
159 | madvise(bufptr, size, MADV_SEQUENTIAL2 | MADV_WILLNEED3); | |||
160 | tensor->data.u8 = bufptr; | |||
161 | tensor->type |= CCV_MAPPED_MEM; | |||
162 | } else | |||
163 | tensor->data.u8 = 0; | |||
164 | } | |||
165 | #elif defined(HAVE_MPS) | |||
166 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
167 | { | |||
168 | // Remove this flag so it can be deallocated as usual. | |||
169 | tensor->type &= ~CCV_NO_DATA_ALLOC; | |||
170 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00 ) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY" , "ccv_nnc_tensor.c", 170, __extension__ __PRETTY_FUNCTION__) ; })); | |||
171 | if (size > 0) | |||
172 | tensor->data.u8 = (uint8_t*)mpmemmap(filename, size, offset, flags); | |||
173 | else | |||
174 | tensor->data.u8 = 0; | |||
175 | } else { | |||
176 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 176, __extension__ __PRETTY_FUNCTION__) ; })); | |||
177 | if (size > 0) | |||
178 | { | |||
179 | int fd = open(filename, O_RDONLY00, 0); | |||
180 | void* bufptr = mmap(0, size, PROT_READ0x1, MAP_PRIVATE0x02, fd, offset); | |||
181 | close(fd); | |||
182 | madvise(bufptr, size, MADV_SEQUENTIAL2 | MADV_WILLNEED3); | |||
183 | tensor->data.u8 = bufptr; | |||
184 | tensor->type |= CCV_MAPPED_MEM; | |||
185 | } else | |||
186 | tensor->data.u8 = 0; | |||
187 | } | |||
188 | #else | |||
189 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 189, __extension__ __PRETTY_FUNCTION__) ; })); | |||
190 | if (size > 0) | |||
191 | { | |||
192 | int fd = open(filename, O_RDONLY00, 0); | |||
193 | void* bufptr = mmap(0, size, PROT_READ0x1, MAP_PRIVATE0x02, fd, offset); | |||
194 | close(fd); | |||
195 | madvise(bufptr, size, MADV_SEQUENTIAL2 | MADV_WILLNEED3); | |||
196 | tensor->data.u8 = bufptr; | |||
197 | tensor->type |= CCV_MAPPED_MEM; | |||
198 | } else | |||
199 | tensor->data.u8 = 0; | |||
200 | #endif | |||
201 | return tensor; | |||
202 | } | |||
203 | ||||
204 | ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_raw(const ccv_nnc_tensor_param_t params, const void* const bufptr, const size_t buf_size, const int flags) | |||
205 | { | |||
206 | ccv_nnc_tensor_t* tensor = ccv_nnc_tensor_new(0, params, flags); | |||
| ||||
207 | const size_t size = ccv_min(ccv_nnc_tensor_data_size_without_padding(params), buf_size)({ typeof (ccv_nnc_tensor_data_size_without_padding(params)) _a = (ccv_nnc_tensor_data_size_without_padding(params)); typeof (buf_size) _b = (buf_size); (_a < _b) ? _a : _b; }); | |||
208 | #ifdef HAVE_CUDA1 | |||
209 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
210 | { | |||
211 | // Remove this flag so it can be deallocated as usual. | |||
212 | tensor->type &= ~CCV_NO_DATA_ALLOC; | |||
213 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00 ) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY" , "ccv_nnc_tensor.c", 213, __extension__ __PRETTY_FUNCTION__) ; })); | |||
214 | if (size > 0) | |||
215 | cumemcpy(tensor->data.u8, tensor->info.type, bufptr, CCV_TENSOR_CPU_MEMORY, size); | |||
216 | else | |||
217 | tensor->data.u8 = 0; | |||
218 | } else { | |||
219 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 219, __extension__ __PRETTY_FUNCTION__) ; })); | |||
220 | if (size > 0) | |||
221 | memcpy(tensor->data.u8, bufptr, size); | |||
| ||||
222 | else | |||
223 | tensor->data.u8 = 0; | |||
224 | } | |||
225 | #elif defined(HAVE_MPS) | |||
226 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
227 | { | |||
228 | // Remove this flag so it can be deallocated as usual. | |||
229 | tensor->type &= ~CCV_NO_DATA_ALLOC; | |||
230 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00 ) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY" , "ccv_nnc_tensor.c", 230, __extension__ __PRETTY_FUNCTION__) ; })); | |||
231 | if (size > 0) | |||
232 | mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, bufptr, 0, CCV_TENSOR_CPU_MEMORY, size); | |||
233 | else | |||
234 | tensor->data.u8 = 0; | |||
235 | } else { | |||
236 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 236, __extension__ __PRETTY_FUNCTION__) ; })); | |||
237 | if (size > 0) | |||
238 | memcpy(tensor->data.u8, bufptr, size); | |||
239 | else | |||
240 | tensor->data.u8 = 0; | |||
241 | } | |||
242 | #else | |||
243 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 243, __extension__ __PRETTY_FUNCTION__) ; })); | |||
244 | if (size > 0) | |||
245 | memcpy(tensor->data.u8, bufptr, size); | |||
246 | else | |||
247 | tensor->data.u8 = 0; | |||
248 | #endif | |||
249 | return tensor; | |||
250 | } | |||
251 | ||||
252 | ccv_nnc_tensor_t* ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params) | |||
253 | { | |||
254 | assert(!CCV_IS_TENSOR_VIEW(tensor))((void) sizeof ((!((*(int*)(tensor)) & CCV_TENSOR_VIEW)) ? 1 : 0), __extension__ ({ if (!((*(int*)(tensor)) & CCV_TENSOR_VIEW )) ; else __assert_fail ("!CCV_IS_TENSOR_VIEW(tensor)", "ccv_nnc_tensor.c" , 254, __extension__ __PRETTY_FUNCTION__); })); | |||
255 | assert(tensor->type & CCV_UNMANAGED)((void) sizeof ((tensor->type & CCV_UNMANAGED) ? 1 : 0 ), __extension__ ({ if (tensor->type & CCV_UNMANAGED) ; else __assert_fail ("tensor->type & CCV_UNMANAGED", "ccv_nnc_tensor.c" , 255, __extension__ __PRETTY_FUNCTION__); })); | |||
256 | assert(tensor->data_size > 0)((void) sizeof ((tensor->data_size > 0) ? 1 : 0), __extension__ ({ if (tensor->data_size > 0) ; else __assert_fail ("tensor->data_size > 0" , "ccv_nnc_tensor.c", 256, __extension__ __PRETTY_FUNCTION__) ; })); | |||
257 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GET_MEMORY(tensor->info.type))((void) sizeof ((((params.type) & 0x3) == ((tensor->info .type) & 0x3)) ? 1 : 0), __extension__ ({ if (((params.type ) & 0x3) == ((tensor->info.type) & 0x3)) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GET_MEMORY(tensor->info.type)" , "ccv_nnc_tensor.c", 257, __extension__ __PRETTY_FUNCTION__) ; })); | |||
258 | assert(CCV_TENSOR_GET_DEVICE(params.type) == CCV_TENSOR_GET_DEVICE(tensor->info.type))((void) sizeof ((((params.type) & 0xfff00) == ((tensor-> info.type) & 0xfff00)) ? 1 : 0), __extension__ ({ if (((params .type) & 0xfff00) == ((tensor->info.type) & 0xfff00 )) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) == CCV_TENSOR_GET_DEVICE(tensor->info.type)" , "ccv_nnc_tensor.c", 258, __extension__ __PRETTY_FUNCTION__) ; })); | |||
259 | const size_t size = ccv_nnc_tensor_data_size(params); | |||
260 | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0); | |||
261 | tensor->info = params; | |||
262 | #ifdef HAVE_CUDA1 | |||
263 | const int pinned_mem = (tensor->type & CCV_PINNED_MEM); | |||
264 | #endif | |||
265 | if (tfb) | |||
266 | { | |||
267 | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2]; | |||
268 | // This corresponding to mat->step | |||
269 | tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype ) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12 ] * (((((params.datatype) & 0xFF000) | params.dim[2])) & 0xFFF) + 3) & -4); | |||
270 | } else | |||
271 | tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000); | |||
272 | if (size <= tensor->data_size) // Nothing. | |||
273 | { | |||
274 | #ifdef HAVE_CUDA1 | |||
275 | if (pinned_mem) | |||
276 | tensor->type |= CCV_PINNED_MEM; | |||
277 | #endif | |||
278 | return tensor; | |||
279 | } | |||
280 | ccv_nnc_tensor_t* new_tensor = tensor; | |||
281 | const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64; | |||
282 | #ifdef HAVE_CUDA1 | |||
283 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
284 | { | |||
285 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00 ) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY" , "ccv_nnc_tensor.c", 285, __extension__ __PRETTY_FUNCTION__) ; })); | |||
286 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8); | |||
287 | assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type))((void) sizeof ((device_id == (((tensor->info.type) & 0xfff00 ) >> 8)) ? 1 : 0), __extension__ ({ if (device_id == (( (tensor->info.type) & 0xfff00) >> 8)) ; else __assert_fail ("device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)" , "ccv_nnc_tensor.c", 287, __extension__ __PRETTY_FUNCTION__) ; })); | |||
288 | cufree(device_id, tensor->data.u8); | |||
289 | new_tensor->data.u8 = (uint8_t*)cumalloc(device_id, size); | |||
290 | } else { | |||
291 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 291, __extension__ __PRETTY_FUNCTION__) ; })); | |||
292 | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((tensor->info.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((tensor->info.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 292, __extension__ __PRETTY_FUNCTION__) ; })); | |||
293 | // pin memory again. | |||
294 | if (pinned_mem) | |||
295 | cuunregister(new_tensor->data.u8); | |||
296 | new_tensor = ccreallocrealloc(new_tensor, tensor_hdr_size + size); | |||
297 | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; | |||
298 | } | |||
299 | #elif defined(HAVE_MPS) | |||
300 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) | |||
301 | { | |||
302 | assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY)((void) sizeof ((((params.type) & 0xfff00) != CCV_COMPUTE_DEVICE_ANY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0xfff00 ) != CCV_COMPUTE_DEVICE_ANY) ; else __assert_fail ("CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY" , "ccv_nnc_tensor.c", 302, __extension__ __PRETTY_FUNCTION__) ; })); | |||
303 | const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type)(((params.type) & 0xfff00) >> 8); | |||
304 | assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type))((void) sizeof ((device_id == (((tensor->info.type) & 0xfff00 ) >> 8)) ? 1 : 0), __extension__ ({ if (device_id == (( (tensor->info.type) & 0xfff00) >> 8)) ; else __assert_fail ("device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)" , "ccv_nnc_tensor.c", 304, __extension__ __PRETTY_FUNCTION__) ; })); | |||
305 | mpobjfree(device_id, tensor->data.u8); | |||
306 | new_tensor->data.u8 = (uint8_t*)mpobjmalloc(device_id, size); | |||
307 | } else { | |||
308 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 308, __extension__ __PRETTY_FUNCTION__) ; })); | |||
309 | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((tensor->info.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((tensor->info.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 309, __extension__ __PRETTY_FUNCTION__) ; })); | |||
310 | new_tensor = ccreallocrealloc(new_tensor, tensor_hdr_size + size); | |||
311 | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; | |||
312 | } | |||
313 | #else | |||
314 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 314, __extension__ __PRETTY_FUNCTION__) ; })); | |||
315 | new_tensor = ccreallocrealloc(new_tensor, tensor_hdr_size + size); | |||
316 | new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size; | |||
317 | #endif | |||
318 | new_tensor->data_size = size; | |||
319 | #ifdef HAVE_CUDA1 | |||
320 | if (pinned_mem) | |||
321 | ccv_nnc_tensor_pin_memory(new_tensor); | |||
322 | #endif | |||
323 | return new_tensor; | |||
324 | } | |||
325 | ||||
326 | ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags) | |||
327 | { | |||
328 | // this specific form can be toll-free bridging to ccv_dense_matrix_t | |||
329 | const int tfb = (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && params.format == CCV_TENSOR_FORMAT_NHWC && params.dim[2] > 0 && params.dim[2] <= CCV_MAX_CHANNEL(0xFFF) && params.dim[0] > 0 && params.dim[1] > 0 && params.dim[3] == 0); | |||
330 | ccv_nnc_tensor_t tensor; | |||
331 | tensor.dataof = 0; | |||
332 | tensor.alias_ref = 0; | |||
333 | tensor.sig = 0; | |||
334 | tensor.refcount = 1; | |||
335 | tensor.info = params; | |||
336 | if (flags & CCV_TENSOR_CPU_MEMORY) | |||
337 | { | |||
338 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 338, __extension__ __PRETTY_FUNCTION__) ; })); | |||
339 | } else if (flags & CCV_TENSOR_GPU_MEMORY) { | |||
340 | assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)((void) sizeof ((((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY" , "ccv_nnc_tensor.c", 340, __extension__ __PRETTY_FUNCTION__) ; })); | |||
341 | } | |||
342 | if (tfb) | |||
343 | { | |||
344 | tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) | params.dim[2]; | |||
345 | // This corresponding to mat->step | |||
346 | tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]))(((params.dim[1]) * _ccv_get_data_type_size[(((((params.datatype ) & 0xFF000) | params.dim[2])) & 0xFF000) >> 12 ] * (((((params.datatype) & 0xFF000) | params.dim[2])) & 0xFFF) + 3) & -4); | |||
347 | } else // This won't be recognized by ccv_dense_matrix_t | |||
348 | tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000); | |||
349 | if (params.dim[0] > 0) | |||
350 | tensor.data.u8 = (uint8_t*)ptr; | |||
351 | else | |||
352 | tensor.data.u8 = 0; | |||
353 | tensor.data_size = 0; | |||
354 | return tensor; | |||
355 | } | |||
356 | ||||
357 | int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor) | |||
358 | { | |||
359 | #ifdef HAVE_CUDA1 | |||
360 | assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY)((void) sizeof ((((tensor->info.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ) ? 1 : 0), __extension__ ({ if (((tensor->info.type) & 0x3) == CCV_TENSOR_CPU_MEMORY) ; else __assert_fail ("CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY" , "ccv_nnc_tensor.c", 360, __extension__ __PRETTY_FUNCTION__) ; })); | |||
361 | if (!(tensor->type & CCV_PINNED_MEM) && tensor->data_size) | |||
362 | { | |||
363 | const int success = curegister(tensor->data.u8, tensor->data_size); | |||
364 | if (success) | |||
365 | tensor->type |= CCV_PINNED_MEM; | |||
366 | return success ? 0 : -1; | |||
367 | } | |||
368 | #endif | |||
369 | return 0; | |||
370 | } | |||
371 | ||||
372 | void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor) | |||
373 | { | |||
374 | if (CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3) == CCV_TENSOR_CPU_MEMORY && (tensor->type & CCV_MAPPED_MEM)) | |||
375 | { | |||
376 | // The size might be different than the ones when we allocated (for example, the tensor might rewrite its size to be smaller). | |||
377 | // This might cause issues in the future. | |||
378 | const size_t size = ccv_nnc_tensor_data_size(tensor->info); | |||
379 | munmap(tensor->data.u8, size); | |||
380 | } | |||
381 | #ifdef HAVE_CUDA1 | |||
382 | if (tensor->type & CCV_PINNED_MEM) | |||
383 | cuunregister(tensor->data.u8); | |||
384 | if (CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY && | |||
385 | !(tensor->type & CCV_NO_DATA_ALLOC)) // If this is GPU memory and it is allocated, free. | |||
386 | cufree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)(((tensor->info.type) & 0xfff00) >> 8), tensor->data.u8); | |||
387 | #elif defined(HAVE_MPS) | |||
388 | if (CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY && | |||
389 | !(tensor->type & CCV_NO_DATA_ALLOC)) // If this is GPU memory and it is allocated, free. | |||
390 | mpobjfree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type)(((tensor->info.type) & 0xfff00) >> 8), tensor->data.u8); | |||
391 | #endif | |||
392 | ccfreefree(tensor); | |||
393 | } | |||
394 | ||||
395 | static inline void _ccv_nnc_tensor_view_set(ccv_nnc_tensor_view_t* const tv, const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC(12)], const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int stride[CCV_NNC_MAX_DIM_ALLOC(12)]) | |||
396 | { | |||
397 | memcpy(tv->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
398 | memcpy(tv->info.dim, dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
399 | uint8_t* const p = tensor->data.u8; | |||
400 | const off_t off = tv->off = ccv_nnc_tensor_view_offset(tv->info.datatype, stride, ofs); | |||
401 | tv->contiguous = ccv_nnc_tensor_view_is_contiguous(dim, stride); | |||
402 | assert(off + CCV_GET_DATA_TYPE_SIZE(tv->info.datatype) * ccv_nnc_dimension_upper_bound(tv->info.dim, tv->stride) <= CCV_GET_DATA_TYPE_SIZE(tensor->info.datatype) * ccv_nnc_tensor_count(tensor->info))((void) sizeof ((off + _ccv_get_data_type_size[((tv->info. datatype) & 0xFF000) >> 12] * ccv_nnc_dimension_upper_bound (tv->info.dim, tv->stride) <= _ccv_get_data_type_size [((tensor->info.datatype) & 0xFF000) >> 12] * ccv_nnc_tensor_count (tensor->info)) ? 1 : 0), __extension__ ({ if (off + _ccv_get_data_type_size [((tv->info.datatype) & 0xFF000) >> 12] * ccv_nnc_dimension_upper_bound (tv->info.dim, tv->stride) <= _ccv_get_data_type_size [((tensor->info.datatype) & 0xFF000) >> 12] * ccv_nnc_tensor_count (tensor->info)) ; else __assert_fail ("off + CCV_GET_DATA_TYPE_SIZE(tv->info.datatype) * ccv_nnc_dimension_upper_bound(tv->info.dim, tv->stride) <= CCV_GET_DATA_TYPE_SIZE(tensor->info.datatype) * ccv_nnc_tensor_count(tensor->info)" , "ccv_nnc_tensor.c", 402, __extension__ __PRETTY_FUNCTION__) ; })); | |||
403 | ccv_nnc_tensor_data(tv->info, p, off + tensor->dataof, &tv->data, &tv->dataof); | |||
404 | } | |||
405 | ||||
406 | ccv_nnc_tensor_view_t* ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int stride[CCV_NNC_MAX_DIM_ALLOC(12)]) | |||
407 | { | |||
408 | ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_view_t)); | |||
409 | tv->type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW; | |||
410 | tv->dataof = 0; | |||
411 | tv->alias_ref = (uintptr_t)tensor; | |||
412 | tv->refcount = 1; | |||
413 | tv->sig = 0; | |||
414 | tv->data_size = 0; | |||
415 | assert(params.type == tensor->info.type)((void) sizeof ((params.type == tensor->info.type) ? 1 : 0 ), __extension__ ({ if (params.type == tensor->info.type) ; else __assert_fail ("params.type == tensor->info.type", "ccv_nnc_tensor.c" , 415, __extension__ __PRETTY_FUNCTION__); })); | |||
416 | assert(params.datatype == tensor->info.datatype)((void) sizeof ((params.datatype == tensor->info.datatype) ? 1 : 0), __extension__ ({ if (params.datatype == tensor-> info.datatype) ; else __assert_fail ("params.datatype == tensor->info.datatype" , "ccv_nnc_tensor.c", 416, __extension__ __PRETTY_FUNCTION__) ; })); | |||
417 | tv->info = params; | |||
418 | _ccv_nnc_tensor_view_set(tv, tensor, params.dim, ofs, stride); | |||
419 | return tv; | |||
420 | } | |||
421 | ||||
422 | ccv_nnc_tensor_view_t ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int stride[CCV_NNC_MAX_DIM_ALLOC(12)]) | |||
423 | { | |||
424 | assert(!CCV_IS_TENSOR_VIEW(tensor))((void) sizeof ((!((*(int*)(tensor)) & CCV_TENSOR_VIEW)) ? 1 : 0), __extension__ ({ if (!((*(int*)(tensor)) & CCV_TENSOR_VIEW )) ; else __assert_fail ("!CCV_IS_TENSOR_VIEW(tensor)", "ccv_nnc_tensor.c" , 424, __extension__ __PRETTY_FUNCTION__); })); | |||
425 | assert(params.type == tensor->info.type)((void) sizeof ((params.type == tensor->info.type) ? 1 : 0 ), __extension__ ({ if (params.type == tensor->info.type) ; else __assert_fail ("params.type == tensor->info.type", "ccv_nnc_tensor.c" , 425, __extension__ __PRETTY_FUNCTION__); })); | |||
426 | assert(params.datatype == tensor->info.datatype)((void) sizeof ((params.datatype == tensor->info.datatype) ? 1 : 0), __extension__ ({ if (params.datatype == tensor-> info.datatype) ; else __assert_fail ("params.datatype == tensor->info.datatype" , "ccv_nnc_tensor.c", 426, __extension__ __PRETTY_FUNCTION__) ; })); | |||
427 | ccv_nnc_tensor_view_t tv = { | |||
428 | .dataof = 0, | |||
429 | .alias_ref = (uintptr_t)tensor, | |||
430 | .type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW, // clean up the channel bits, and then add CCV_TENSOR_VIEW identifier | |||
431 | .refcount = 1, | |||
432 | .sig = 0, | |||
433 | .info = params, | |||
434 | .data_size = 0, | |||
435 | }; | |||
436 | _ccv_nnc_tensor_view_set(&tv, tensor, params.dim, ofs, stride); | |||
437 | return tv; | |||
438 | } | |||
439 | ||||
440 | void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view) | |||
441 | { | |||
442 | ccfreefree(tensor_view); | |||
443 | } | |||
444 | ||||
445 | void _ccv_nnc_tensor_set_zero(unsigned char* u8, const int nd, const int* const dim, const int* const stride, const size_t data_size) | |||
446 | { | |||
447 | if (nd == 1) | |||
448 | { | |||
449 | if (stride[0] == 1) | |||
450 | { | |||
451 | memset(u8, 0, data_size * dim[0]); | |||
452 | return; | |||
453 | } | |||
454 | int i; | |||
455 | for (i = 0; i < dim[0]; i++) | |||
456 | memset(u8 + i * stride[0] * data_size, 0, data_size); | |||
457 | } else if (nd == 2) { | |||
458 | if (stride[1] == 1 && stride[0] == dim[1]) | |||
459 | { | |||
460 | memset(u8, 0, data_size * dim[1] * dim[0]); | |||
461 | return; | |||
462 | } | |||
463 | int x, y; | |||
464 | for (y = 0; y < dim[0]; y++) | |||
465 | { | |||
466 | unsigned char* const u8y = u8 + y * stride[0] * data_size; | |||
467 | for (x = 0; x < dim[1]; x++) | |||
468 | memset(u8y + x * stride[1] * data_size, 0, data_size); | |||
469 | } | |||
470 | } else if (nd == 3) { | |||
471 | if (stride[2] == 1 && stride[1] == dim[2] && stride[0] == dim[1] * dim[2]) | |||
472 | { | |||
473 | memset(u8, 0, data_size * dim[2] * dim[1] * dim[0]); | |||
474 | return; | |||
475 | } | |||
476 | int x, y, z; | |||
477 | for (z = 0; z < dim[0]; z++) | |||
478 | { | |||
479 | unsigned char* const u8z = u8 + z * stride[0] * data_size; | |||
480 | for (y = 0; y < dim[1]; y++) | |||
481 | { | |||
482 | unsigned char* const u8y = u8z + y * stride[1] * data_size; | |||
483 | for (x = 0; x < dim[2]; x++) | |||
484 | memset(u8y + x * stride[2] * data_size, 0, data_size); | |||
485 | } | |||
486 | } | |||
487 | } else if (nd == 4) { | |||
488 | if (stride[3] == 1 && stride[2] == dim[3] && stride[1] == dim[2] * dim[3] && stride[0] == dim[1] * dim[2] * dim[3]) | |||
489 | { | |||
490 | memset(u8, 0, data_size * dim[3] * dim[2] * dim[1] * dim[0]); | |||
491 | return; | |||
492 | } | |||
493 | int x, y, z, s; | |||
494 | for (s = 0; s < dim[0]; s++) | |||
495 | { | |||
496 | unsigned char* const u8s = u8 + s * stride[0] * data_size; | |||
497 | for (z = 0; z < dim[1]; z++) | |||
498 | { | |||
499 | unsigned char* const u8z = u8s + z * stride[1] * data_size; | |||
500 | for (y = 0; y < dim[2]; y++) | |||
501 | { | |||
502 | unsigned char* const u8y = u8z + y * stride[2] * data_size; | |||
503 | for (x = 0; x < dim[3]; x++) | |||
504 | memset(u8y + x * stride[3] * data_size, 0, data_size); | |||
505 | } | |||
506 | } | |||
507 | } | |||
508 | } else { | |||
509 | int i; | |||
510 | for (i = 0; i < dim[0]; i++) | |||
511 | _ccv_nnc_tensor_set_zero(u8 + i * stride[0] * data_size, nd - 1, dim + 1, stride + 1, data_size); | |||
512 | } | |||
513 | } | |||
514 | ||||
515 | void ccv_nnc_tensor_zero(void* const tensor) | |||
516 | { | |||
517 | ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)tensor; | |||
518 | const size_t data_size = CCV_GET_DATA_TYPE_SIZE(tv->info.datatype)_ccv_get_data_type_size[((tv->info.datatype) & 0xFF000 ) >> 12]; | |||
519 | if (CCV_IS_TENSOR_CONTIGUOUS(tv)(!((*(int*)(tv)) & CCV_TENSOR_VIEW) || (((ccv_nnc_tensor_view_t *)tv)->contiguous == 1))) | |||
520 | { | |||
521 | memset(tv->data.u8, 0, data_size * ccv_nnc_tensor_count(tv->info)); | |||
522 | return; | |||
523 | } | |||
524 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); | |||
525 | assert(nd >= 1)((void) sizeof ((nd >= 1) ? 1 : 0), __extension__ ({ if (nd >= 1) ; else __assert_fail ("nd >= 1", "ccv_nnc_tensor.c" , 525, __extension__ __PRETTY_FUNCTION__); })); | |||
526 | const int* const tvstride = tv->stride; | |||
527 | // Go through this recursively. | |||
528 | _ccv_nnc_tensor_set_zero(tv->data.u8, nd, tv->info.dim, tvstride, data_size); | |||
529 | } | |||
530 | ||||
531 | int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b) | |||
532 | { | |||
533 | assert(!CCV_IS_TENSOR_VIEW(a))((void) sizeof ((!((*(int*)(a)) & CCV_TENSOR_VIEW)) ? 1 : 0), __extension__ ({ if (!((*(int*)(a)) & CCV_TENSOR_VIEW )) ; else __assert_fail ("!CCV_IS_TENSOR_VIEW(a)", "ccv_nnc_tensor.c" , 533, __extension__ __PRETTY_FUNCTION__); })); | |||
534 | assert(!CCV_IS_TENSOR_VIEW(b))((void) sizeof ((!((*(int*)(b)) & CCV_TENSOR_VIEW)) ? 1 : 0), __extension__ ({ if (!((*(int*)(b)) & CCV_TENSOR_VIEW )) ; else __assert_fail ("!CCV_IS_TENSOR_VIEW(b)", "ccv_nnc_tensor.c" , 534, __extension__ __PRETTY_FUNCTION__); })); | |||
535 | // If a is a dense matrix, just use ccv_matrix_eq | |||
536 | if (CCV_TENSOR_IS_DENSE_MATRIX(a->type)(((a->type) & 0xFFF) > 0)) | |||
537 | return ccv_matrix_eq((ccv_matrix_t*)a, (ccv_matrix_t*)b); | |||
538 | // Otherwise, do our own thing. | |||
539 | if (CCV_GET_DATA_TYPE(a->type)((a->type) & 0xFF000) != CCV_GET_DATA_TYPE(b->type)((b->type) & 0xFF000)) | |||
540 | return -1; | |||
541 | int i, c = 1; | |||
542 | for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC(12); i++) | |||
543 | { | |||
544 | if (!a->info.dim[i] && !b->info.dim[i]) | |||
545 | break; | |||
546 | if (a->info.dim[i] != b->info.dim[i]) | |||
547 | return -1; | |||
548 | c *= a->info.dim[i]; | |||
549 | } | |||
550 | if (CCV_GET_DATA_TYPE(a->type)((a->type) & 0xFF000) == CCV_32S) | |||
551 | return memcmp(a->data.i32, b->data.i32, sizeof(int) * c) == 0 ? 0 : -1; | |||
552 | // Only support 32F at this point. | |||
553 | assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F || CCV_GET_DATA_TYPE(a->type) == CCV_64F)((void) sizeof ((((a->type) & 0xFF000) == CCV_32F || ( (a->type) & 0xFF000) == CCV_64F) ? 1 : 0), __extension__ ({ if (((a->type) & 0xFF000) == CCV_32F || ((a->type ) & 0xFF000) == CCV_64F) ; else __assert_fail ("CCV_GET_DATA_TYPE(a->type) == CCV_32F || CCV_GET_DATA_TYPE(a->type) == CCV_64F" , "ccv_nnc_tensor.c", 553, __extension__ __PRETTY_FUNCTION__) ; })); | |||
554 | // Read: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm | |||
555 | // http://floating-point-gui.de/errors/comparison/ | |||
556 | if (CCV_GET_DATA_TYPE(a->type)((a->type) & 0xFF000) == CCV_32F) | |||
557 | { | |||
558 | static const float epsi = FLT_EPSILON1.19209290e-7F; | |||
559 | static const int32_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same. | |||
560 | for (i = 0; i < c; i++) | |||
561 | { | |||
562 | // Although this is float point, I use integer as a way to compare. | |||
563 | int32_t i32a = a->data.i32[i]; | |||
564 | if (i32a < 0) | |||
565 | i32a = 0x80000000 - i32a; | |||
566 | int32_t i32b = b->data.i32[i]; | |||
567 | if (i32b < 0) | |||
568 | i32b = 0x80000000 - i32b; | |||
569 | if (abs(i32a - i32b) > ulps && fabsf(a->data.f32[i] - b->data.f32[i]) > epsi) | |||
570 | return -1; | |||
571 | } | |||
572 | } else if (CCV_GET_DATA_TYPE(a->type)((a->type) & 0xFF000) == CCV_64F) { | |||
573 | typedef union { | |||
574 | double f64; | |||
575 | int64_t i64; | |||
576 | } Float64; | |||
577 | static const double epsi = DBL_EPSILON2.2204460492503131e-16; | |||
578 | static const int64_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same. | |||
579 | for (i = 0; i < c; i++) | |||
580 | { | |||
581 | // Although this is float point, I use integer as a way to compare. | |||
582 | Float64 f64a, f64b; | |||
583 | f64a.f64 = a->data.f64[i]; | |||
584 | f64b.f64 = b->data.f64[i]; | |||
585 | if (f64a.i64 < 0) | |||
586 | f64a.i64 = 0x8000000000000000 - f64a.i64; | |||
587 | if (f64b.i64 < 0) | |||
588 | f64b.i64 = 0x8000000000000000 - f64b.i64; | |||
589 | if (llabs(f64a.i64 - f64b.i64) > ulps && fabs(a->data.f64[i] - b->data.f64[i]) > epsi) | |||
590 | return -1; | |||
591 | } | |||
592 | } | |||
593 | return 0; | |||
594 | } | |||
595 | ||||
596 | static void _strcat(char** str, int* written, size_t* len, char* from, int from_size) | |||
597 | { | |||
598 | if (*len - *written < from_size) | |||
599 | { | |||
600 | *len += from_size * 2; | |||
601 | *str = (char*)ccreallocrealloc(*str, *len); | |||
602 | } | |||
603 | memcpy(*str + *written, from, from_size); | |||
604 | *written += from_size; | |||
605 | } | |||
606 | ||||
607 | #define _STRPRINTF(str, written, len, format, ...)do { const int newly_written = snprintf((str) + (written), (len ) - (written), format, ...); if ((len) - (written) < newly_written ) { (len) += newly_written * 2; (str) = (char*)realloc((str), (len)); (written) += snprintf((str) + (written), (len) - (written ), format, ...); } else (written) += newly_written; } while ( 0) \ | |||
608 | do { \ | |||
609 | const int newly_written = snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \ | |||
610 | if ((len) - (written) < newly_written) \ | |||
611 | { \ | |||
612 | (len) += newly_written * 2; \ | |||
613 | (str) = (char*)ccreallocrealloc((str), (len)); \ | |||
614 | (written) += snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \ | |||
615 | } else \ | |||
616 | (written) += newly_written; \ | |||
617 | } while (0) | |||
618 | ||||
619 | static void _strv(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int i) | |||
620 | { | |||
621 | if (a->info.datatype == CCV_32F) | |||
622 | _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f32[i])do { const int newly_written = snprintf((*str) + (*written), ( *len) - (*written), "%10.5g", a->data.f32[i]); if ((*len) - (*written) < newly_written) { (*len) += newly_written * 2 ; (*str) = (char*)realloc((*str), (*len)); (*written) += snprintf ((*str) + (*written), (*len) - (*written), "%10.5g", a->data .f32[i]); } else (*written) += newly_written; } while (0); | |||
623 | else if (a->info.datatype == CCV_64F) | |||
624 | _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f64[i])do { const int newly_written = snprintf((*str) + (*written), ( *len) - (*written), "%10.5g", a->data.f64[i]); if ((*len) - (*written) < newly_written) { (*len) += newly_written * 2 ; (*str) = (char*)realloc((*str), (*len)); (*written) += snprintf ((*str) + (*written), (*len) - (*written), "%10.5g", a->data .f64[i]); } else (*written) += newly_written; } while (0); | |||
625 | else if (a->info.datatype == CCV_16F) { | |||
626 | float v; | |||
627 | ccv_half_precision_to_float((uint16_t*)(a->data.f16 + i), &v, 1); | |||
628 | _STRPRINTF(*str, *written, *len, "%10.5g", v)do { const int newly_written = snprintf((*str) + (*written), ( *len) - (*written), "%10.5g", v); if ((*len) - (*written) < newly_written) { (*len) += newly_written * 2; (*str) = (char *)realloc((*str), (*len)); (*written) += snprintf((*str) + (* written), (*len) - (*written), "%10.5g", v); } else (*written ) += newly_written; } while (0); | |||
629 | } else if (a->info.datatype == CCV_32S) | |||
630 | _STRPRINTF(*str, *written, *len, "%10d", a->data.i32[i])do { const int newly_written = snprintf((*str) + (*written), ( *len) - (*written), "%10d", a->data.i32[i]); if ((*len) - ( *written) < newly_written) { (*len) += newly_written * 2; ( *str) = (char*)realloc((*str), (*len)); (*written) += snprintf ((*str) + (*written), (*len) - (*written), "%10d", a->data .i32[i]); } else (*written) += newly_written; } while (0); | |||
631 | else if (a->info.datatype == CCV_64S) | |||
632 | _STRPRINTF(*str, *written, *len, "%12lld", (long long int)a->data.i64[i])do { const int newly_written = snprintf((*str) + (*written), ( *len) - (*written), "%12lld", (long long int)a->data.i64[i ]); if ((*len) - (*written) < newly_written) { (*len) += newly_written * 2; (*str) = (char*)realloc((*str), (*len)); (*written) += snprintf ((*str) + (*written), (*len) - (*written), "%12lld", (long long int)a->data.i64[i]); } else (*written) += newly_written; } while (0); | |||
633 | else if (a->info.datatype == CCV_8U) | |||
634 | _STRPRINTF(*str, *written, *len, "%3d", (int)a->data.u8[i])do { const int newly_written = snprintf((*str) + (*written), ( *len) - (*written), "%3d", (int)a->data.u8[i]); if ((*len) - (*written) < newly_written) { (*len) += newly_written * 2; (*str) = (char*)realloc((*str), (*len)); (*written) += snprintf ((*str) + (*written), (*len) - (*written), "%3d", (int)a-> data.u8[i]); } else (*written) += newly_written; } while (0); | |||
635 | } | |||
636 | ||||
637 | static void _strt(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int nd, int spacer, const int* const dim, const int* const stride, int idx) | |||
638 | { | |||
639 | assert(nd != 1)((void) sizeof ((nd != 1) ? 1 : 0), __extension__ ({ if (nd != 1) ; else __assert_fail ("nd != 1", "ccv_nnc_tensor.c", 639, __extension__ __PRETTY_FUNCTION__); })); | |||
640 | if (nd == 2) | |||
641 | { | |||
642 | // Print columns and the rows. | |||
643 | int i, j, k; | |||
644 | if (dim[0] <= 8) | |||
645 | { | |||
646 | for (i = 0; i < dim[0]; i++) | |||
647 | { | |||
648 | if (i != 0) | |||
649 | { | |||
650 | _strcat(str, written, len, " ", 2); | |||
651 | for (k = 0; k < spacer; k++) | |||
652 | _strcat(str, written, len, " ", 1); | |||
653 | } | |||
654 | _strcat(str, written, len, "[", 1); | |||
655 | if (dim[1] <= 8) | |||
656 | { | |||
657 | for (j = 0; j < dim[1]; j++) | |||
658 | { | |||
659 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); | |||
660 | if (j < dim[1] - 1) | |||
661 | _strcat(str, written, len, ", ", 2); | |||
662 | } | |||
663 | if (i < dim[0] - 1) | |||
664 | _strcat(str, written, len, "],\n", 3); | |||
665 | } else { | |||
666 | for (j = 0; j < 3; j++) | |||
667 | { | |||
668 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); | |||
669 | _strcat(str, written, len, ", ", 2); | |||
670 | } | |||
671 | _strcat(str, written, len, " ..., ", 6); | |||
672 | for (j = dim[1] - 3; j < dim[1]; j++) | |||
673 | { | |||
674 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); | |||
675 | if (j < dim[1] - 1) | |||
676 | _strcat(str, written, len, ", ", 2); | |||
677 | } | |||
678 | if (i < dim[0] - 1) | |||
679 | _strcat(str, written, len, "],\n", 3); | |||
680 | } | |||
681 | } | |||
682 | _strcat(str, written, len, "]", 1); | |||
683 | } else { | |||
684 | for (i = 0; i < 3; i++) | |||
685 | { | |||
686 | if (i != 0) | |||
687 | { | |||
688 | _strcat(str, written, len, " ", 2); | |||
689 | for (k = 0; k < spacer; k++) | |||
690 | _strcat(str, written, len, " ", 1); | |||
691 | } | |||
692 | _strcat(str, written, len, "[", 1); | |||
693 | if (dim[1] <= 8) | |||
694 | { | |||
695 | for (j = 0; j < dim[1]; j++) | |||
696 | { | |||
697 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); | |||
698 | if (j < dim[1] - 1) | |||
699 | _strcat(str, written, len, ", ", 2); | |||
700 | } | |||
701 | _strcat(str, written, len, "],\n", 3); | |||
702 | } else { | |||
703 | for (j = 0; j < 3; j++) | |||
704 | { | |||
705 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); | |||
706 | _strcat(str, written, len, ", ", 2); | |||
707 | } | |||
708 | _strcat(str, written, len, " ..., ", 6); | |||
709 | for (j = dim[1] - 3; j < dim[1]; j++) | |||
710 | { | |||
711 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); | |||
712 | if (j < dim[1] - 1) | |||
713 | _strcat(str, written, len, ", ", 2); | |||
714 | } | |||
715 | _strcat(str, written, len, "],\n", 3); | |||
716 | } | |||
717 | } | |||
718 | _strcat(str, written, len, " ", 2); | |||
719 | for (k = 0; k < spacer; k++) | |||
720 | _strcat(str, written, len, " ", 1); | |||
721 | _strcat(str, written, len, "...,\n", 5); | |||
722 | for (i = dim[0] - 3; i < dim[0]; i++) | |||
723 | { | |||
724 | _strcat(str, written, len, " ", 2); | |||
725 | for (k = 0; k < spacer; k++) | |||
726 | _strcat(str, written, len, " ", 1); | |||
727 | _strcat(str, written, len, "[", 1); | |||
728 | if (dim[1] < 8) | |||
729 | { | |||
730 | for (j = 0; j < dim[1]; j++) | |||
731 | { | |||
732 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); | |||
733 | if (j < dim[1] - 1) | |||
734 | _strcat(str, written, len, ", ", 2); | |||
735 | } | |||
736 | if (i < dim[0] - 1) | |||
737 | _strcat(str, written, len, "],\n", 3); | |||
738 | } else { | |||
739 | for (j = 0; j < 3; j++) | |||
740 | { | |||
741 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); | |||
742 | _strcat(str, written, len, ", ", 2); | |||
743 | } | |||
744 | _strcat(str, written, len, " ..., ", 6); | |||
745 | for (j = dim[1] - 3; j < dim[1]; j++) | |||
746 | { | |||
747 | _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]); | |||
748 | if (j < dim[1] - 1) | |||
749 | _strcat(str, written, len, ", ", 2); | |||
750 | } | |||
751 | if (i < dim[0] - 1) | |||
752 | _strcat(str, written, len, "],\n", 3); | |||
753 | } | |||
754 | } | |||
755 | _strcat(str, written, len, "]", 1); | |||
756 | } | |||
757 | return; | |||
758 | } | |||
759 | int i, j; | |||
760 | if (dim[0] > 4) | |||
761 | { | |||
762 | for (i = 0; i < 2; i++) | |||
763 | { | |||
764 | _strcat(str, written, len, "[", 1); | |||
765 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); | |||
766 | _strcat(str, written, len, "],\n ", 5); | |||
767 | for (j = 0; j < spacer; j++) | |||
768 | _strcat(str, written, len, " ", 1); | |||
769 | } | |||
770 | _strcat(str, written, len, "...,\n", 5); | |||
771 | _strcat(str, written, len, " ", 2); | |||
772 | for (j = 0; j < spacer; j++) | |||
773 | _strcat(str, written, len, " ", 1); | |||
774 | for (i = dim[0] - 2; i < dim[0]; i++) | |||
775 | { | |||
776 | _strcat(str, written, len, "[", 1); | |||
777 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); | |||
778 | if (i < dim[0] - 1) | |||
779 | { | |||
780 | _strcat(str, written, len, "],\n ", 5); | |||
781 | for (j = 0; j < spacer; j++) | |||
782 | _strcat(str, written, len, " ", 1); | |||
783 | } | |||
784 | } | |||
785 | _strcat(str, written, len, "]", 1); | |||
786 | } else { | |||
787 | for (i = 0; i < dim[0]; i++) | |||
788 | { | |||
789 | _strcat(str, written, len, "[", 1); | |||
790 | _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i); | |||
791 | if (i < dim[0] - 1) | |||
792 | { | |||
793 | _strcat(str, written, len, "],\n", 3); | |||
794 | _strcat(str, written, len, " ", 2); | |||
795 | for (j = 0; j < spacer; j++) | |||
796 | _strcat(str, written, len, " ", 1); | |||
797 | } | |||
798 | } | |||
799 | _strcat(str, written, len, "]", 1); | |||
800 | } | |||
801 | } | |||
802 | ||||
803 | char* ccv_nnc_tensor_format_new(const ccv_nnc_tensor_t* const a) | |||
804 | { | |||
805 | const int nd = ccv_nnc_tensor_nd(a->info.dim); | |||
806 | int i; | |||
807 | int rows = 8; // 8 rows for the first one, and then just first and last. | |||
808 | for (i = 2; i < nd; i++) | |||
809 | rows *= 5; // Maximum 3 rows beyond the first two. | |||
810 | int columns = nd * 2 + 16 * 8; | |||
811 | size_t len = sizeof(char) * columns * rows; | |||
812 | // Allocate return string buffer. | |||
813 | char* str = (char*)ccmallocmalloc(len); | |||
814 | int written = 0; | |||
815 | int stride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
816 | if (CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW)) | |||
817 | memcpy(stride, ((ccv_nnc_tensor_view_t*)a)->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
818 | else | |||
819 | ccv_nnc_tensor_get_stride(a->info.dim, stride); | |||
820 | _strcat(&str, &written, &len, "[\n ", 4); | |||
821 | if (nd == 1) | |||
822 | { | |||
823 | // Special casing for vector. | |||
824 | if (a->info.dim[0] <= 64) | |||
825 | for (i = 0; i < a->info.dim[0]; i++) | |||
826 | { | |||
827 | _strv(&str, &written, &len, a, i * stride[0]); | |||
828 | if (i < a->info.dim[0] - 1) | |||
829 | { | |||
830 | if ((i + 1) % 8 == 0) | |||
831 | _strcat(&str, &written, &len, ",\n ", 4); | |||
832 | else | |||
833 | _strcat(&str, &written, &len, ", ", 2); | |||
834 | } | |||
835 | } | |||
836 | else { | |||
837 | // First 3 rows. | |||
838 | for (i = 0; i < 24; i++) | |||
839 | { | |||
840 | _strv(&str, &written, &len, a, i * stride[0]); | |||
841 | if ((i + 1) % 8 == 0) | |||
842 | _strcat(&str, &written, &len, ",\n ", 4); | |||
843 | else | |||
844 | _strcat(&str, &written, &len, ", ", 2); | |||
845 | } | |||
846 | _strcat(&str, &written, &len, "...,\n ", 7); | |||
847 | // Last 3 rows (aligned to 8 items per row). | |||
848 | int start = ((a->info.dim[0] + 7) / 8 - 3) * 8; | |||
849 | for (i = start; i < a->info.dim[0]; i++) | |||
850 | { | |||
851 | _strv(&str, &written, &len, a, i * stride[0]); | |||
852 | if (i < a->info.dim[0] - 1) | |||
853 | { | |||
854 | if ((i + 1) % 8 == 0) | |||
855 | _strcat(&str, &written, &len, ",\n ", 4); | |||
856 | else | |||
857 | _strcat(&str, &written, &len, ", ", 2); | |||
858 | } | |||
859 | } | |||
860 | } | |||
861 | } else { | |||
862 | _strt(&str, &written, &len, a, nd, 0, a->info.dim, stride, 0); | |||
863 | } | |||
864 | _strcat(&str, &written, &len, "\n]", 3); // Including the terminal \0. | |||
865 | str = (char*)ccreallocrealloc(str, written); // Don't need the extra spaces. | |||
866 | return str; | |||
867 | } |