Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor.c
Line
Count
Source
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#ifdef HAVE_CUDA
5
#include "gpu/ccv_nnc_compat.h"
6
#elif defined(HAVE_MPS)
7
#include "mps/ccv_nnc_mps.h"
8
#endif
9
#include <fcntl.h>
10
#include <sys/mman.h>
11
12
// MARK - Level-1 API
13
14
const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC] = {0};
15
16
ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
17
48.8k
{
18
48.8k
  ccv_nnc_tensor_t* tensor;
19
  // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t).
20
48.8k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC45.6k
&&
params.dim[2] > 039.6k
&&
params.dim[2] <= 4.81k
CCV_MAX_CHANNEL4.81k
&&
params.dim[0] > 04.81k
&&
params.dim[1] > 04.81k
&&
params.dim[3] == 04.81k
);
21
48.8k
  if (ptr || 
(flags & CCV_NO_DATA_ALLOC)47.1k
)
22
1.68k
  {
23
1.68k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
24
1.68k
    tensor->dataof = 0;
25
1.68k
    tensor->alias_ref = 0;
26
1.68k
    tensor->sig = 0;
27
1.68k
    tensor->refcount = 1;
28
1.68k
    tensor->info = params;
29
1.68k
    if (tfb)
30
59
    {
31
59
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
32
      // This corresponding to mat->step
33
59
      tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
34
59
    } else // This won't be recognized by ccv_dense_matrix_t
35
1.62k
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
36
1.68k
    tensor->data.u8 = (uint8_t*)ptr;
37
1.68k
    return tensor;
38
1.68k
  }
39
47.1k
  if (flags & CCV_TENSOR_CPU_MEMORY)
40
0
  {
41
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
42
47.1k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
43
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
44
0
  }
45
47.1k
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64;
46
47.1k
  const size_t size = ccv_nnc_tensor_data_size(params);
47
47.1k
#ifdef HAVE_CUDA
48
47.1k
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
49
2.65k
  {
50
2.65k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
51
2.65k
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
52
2.65k
    if (size > 0)
53
2.65k
      tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
54
0
    else
55
0
      tensor->data.u8 = 0;
56
44.4k
  } else {
57
44.4k
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
58
44.4k
    ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
59
44.4k
    if (size > 0)
60
44.4k
      tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
61
0
    else
62
0
      tensor->data.u8 = 0;
63
44.4k
  }
64
#elif defined(HAVE_MPS)
65
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
66
  {
67
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
68
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
69
    if (size > 0)
70
      tensor->data.u8 = (uint8_t*)mpobjmalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
71
    else
72
      tensor->data.u8 = 0;
73
  } else {
74
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
75
    ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
76
    if (size > 0)
77
      tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
78
    else
79
      tensor->data.u8 = 0;
80
  }
81
#else
82
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
83
  ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
84
  if (size > 0)
85
    tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
86
  else
87
    tensor->data.u8 = 0;
88
#endif
89
47.1k
  tensor->dataof = 0;
90
47.1k
  tensor->alias_ref = 0;
91
47.1k
  tensor->data_size = size;
92
47.1k
  tensor->sig = 0;
93
47.1k
  tensor->refcount = 1;
94
47.1k
  tensor->info = params;
95
47.1k
  if (tfb)
96
4.33k
  {
97
4.33k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
98
    // This corresponding to mat->step
99
4.33k
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
100
4.33k
  } else
101
42.8k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
102
47.1k
  return tensor;
103
47.1k
}
104
105
ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_file(const ccv_nnc_tensor_param_t params, const char* const filename, const off_t offset, const int flags)
106
4
{
107
4
  ccv_nnc_tensor_t* tensor;
108
  // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t).
109
4
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC2
&&
params.dim[2] > 02
&&
params.dim[2] <= 0
CCV_MAX_CHANNEL0
&&
params.dim[0] > 00
&&
params.dim[1] > 00
&&
params.dim[3] == 00
);
110
4
  tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
111
4
  tensor->dataof = 0;
112
4
  tensor->alias_ref = 0;
113
4
  tensor->sig = 0;
114
4
  tensor->refcount = 1;
115
4
  tensor->info = params;
116
4
  if (tfb)
117
0
  {
118
0
    tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
119
    // This corresponding to mat->step
120
0
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
121
0
  } else // This won't be recognized by ccv_dense_matrix_t
122
4
    tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
123
4
  const size_t size = ccv_nnc_tensor_data_size(params);
124
4
#ifdef HAVE_CUDA
125
4
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
126
2
  {
127
    // Remove this flag so it can be deallocated as usual.
128
2
    tensor->type &= ~CCV_NO_DATA_ALLOC;
129
2
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
130
2
    if (size > 0)
131
2
    {
132
      // This is not supported yet on CUDA.
133
2
      tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
134
2
      int fd = open(filename, O_RDONLY, 0);
135
2
      cufileread(fd, offset, tensor->data.u8, size);
136
2
      close(fd);
137
2
    } else
138
0
      tensor->data.u8 = 0;
139
2
  } else {
140
2
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
141
2
    if (size > 0)
142
2
    {
143
2
      int fd = open(filename, O_RDONLY, 0);
144
2
      void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
145
2
      close(fd);
146
2
      madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
147
2
      tensor->data.u8 = bufptr;
148
2
      tensor->type |= CCV_MAPPED_MEM;
149
2
    } else
150
0
      tensor->data.u8 = 0;
151
2
  }
152
#elif defined(HAVE_MPS)
153
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
154
  {
155
    // Remove this flag so it can be deallocated as usual.
156
    tensor->type &= ~CCV_NO_DATA_ALLOC;
157
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
158
    if (size > 0)
159
      tensor->data.u8 = (uint8_t*)mpmemmap(filename, size, offset, flags);
160
    else
161
      tensor->data.u8 = 0;
162
  } else {
163
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
164
    if (size > 0)
165
    {
166
      int fd = open(filename, O_RDONLY, 0);
167
      void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
168
      close(fd);
169
      madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
170
      tensor->data.u8 = bufptr;
171
      tensor->type |= CCV_MAPPED_MEM;
172
    } else
173
      tensor->data.u8 = 0;
174
  }
175
#else
176
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
177
  if (size > 0)
178
  {
179
    int fd = open(filename, O_RDONLY, 0);
180
    void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
181
    close(fd);
182
    madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
183
    tensor->data.u8 = bufptr;
184
    tensor->type |= CCV_MAPPED_MEM;
185
  } else
186
    tensor->data.u8 = 0;
187
#endif
188
4
  return tensor;
189
4
}
190
191
ccv_nnc_tensor_t* ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params)
192
1.27k
{
193
1.27k
  assert(!CCV_IS_TENSOR_VIEW(tensor));
194
1.27k
  assert(tensor->type & CCV_UNMANAGED);
195
1.27k
  assert(tensor->data_size > 0);
196
1.27k
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GET_MEMORY(tensor->info.type));
197
1.27k
  assert(CCV_TENSOR_GET_DEVICE(params.type) == CCV_TENSOR_GET_DEVICE(tensor->info.type));
198
1.27k
  const size_t size = ccv_nnc_tensor_data_size(params);
199
1.27k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC14
&&
params.dim[2] > 010
&&
params.dim[2] <= 10
CCV_MAX_CHANNEL10
&&
params.dim[0] > 010
&&
params.dim[1] > 010
&&
params.dim[3] == 010
);
200
1.27k
  tensor->info = params;
201
1.27k
#ifdef HAVE_CUDA
202
1.27k
  const int pinned_mem = (tensor->type & CCV_PINNED_MEM);
203
1.27k
#endif
204
1.27k
  if (tfb)
205
10
  {
206
10
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
207
    // This corresponding to mat->step
208
10
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
209
10
  } else
210
1.26k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
211
1.27k
  if (size <= tensor->data_size) // Nothing.
212
1.27k
  {
213
1.27k
#ifdef HAVE_CUDA
214
1.27k
    if (pinned_mem)
215
4
      tensor->type |= CCV_PINNED_MEM;
216
1.27k
#endif
217
1.27k
    return tensor;
218
1.27k
  }
219
1
  ccv_nnc_tensor_t* new_tensor = tensor;
220
1
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64;
221
1
#ifdef HAVE_CUDA
222
1
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
223
0
  {
224
0
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
225
0
    const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type);
226
0
    assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type));
227
0
    cufree(device_id, tensor->data.u8);
228
0
    new_tensor->data.u8 = (uint8_t*)cumalloc(device_id, size);
229
1
  } else {
230
1
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
231
1
    assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
232
    // pin memory again.
233
1
    if (pinned_mem)
234
0
      cuunregister(new_tensor->data.u8);
235
1
    new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
236
1
    new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
237
1
  }
238
#elif defined(HAVE_MPS)
239
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
240
  {
241
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
242
    const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type);
243
    assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type));
244
    mpobjfree(device_id, tensor->data.u8);
245
    new_tensor->data.u8 = (uint8_t*)mpobjmalloc(device_id, size);
246
  } else {
247
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
248
    assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
249
    new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
250
    new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
251
  }
252
#else
253
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
254
  new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
255
  new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
256
#endif
257
1
  new_tensor->data_size = size;
258
1
#ifdef HAVE_CUDA
259
1
  if (pinned_mem)
260
0
    ccv_nnc_tensor_pin_memory(new_tensor);
261
1
#endif
262
1
  return new_tensor;
263
1
}
264
265
ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
266
78.0k
{
267
  // this specific form can be toll-free bridging to ccv_dense_matrix_t
268
78.0k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC69.2k
&&
params.dim[2] > 068.9k
&&
params.dim[2] <= 899
CCV_MAX_CHANNEL899
&&
params.dim[0] > 0899
&&
params.dim[1] > 0899
&&
params.dim[3] == 0899
);
269
78.0k
  ccv_nnc_tensor_t tensor;
270
78.0k
  tensor.dataof = 0;
271
78.0k
  tensor.alias_ref = 0;
272
78.0k
  tensor.sig = 0;
273
78.0k
  tensor.refcount = 1;
274
78.0k
  tensor.info = params;
275
78.0k
  if (flags & CCV_TENSOR_CPU_MEMORY)
276
0
  {
277
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
278
78.0k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
279
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
280
0
  }
281
78.0k
  if (tfb)
282
192
  {
283
192
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
284
    // This corresponding to mat->step
285
192
    tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
286
192
  } else // This won't be recognized by ccv_dense_matrix_t
287
77.8k
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
288
78.0k
  if (params.dim[0] > 0)
289
78.0k
    tensor.data.u8 = (uint8_t*)ptr;
290
0
  else
291
0
    tensor.data.u8 = 0;
292
78.0k
  tensor.data_size = 0;
293
78.0k
  return tensor;
294
78.0k
}
295
296
int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor)
297
1.40k
{
298
1.40k
#ifdef HAVE_CUDA
299
1.40k
  assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
300
1.40k
  if (!(tensor->type & CCV_PINNED_MEM) && 
tensor->data_size146
)
301
146
  {
302
146
    const int success = curegister(tensor->data.u8, tensor->data_size);
303
146
    if (success)
304
146
      tensor->type |= CCV_PINNED_MEM;
305
146
    return success ? 0 : 
-10
;
306
146
  }
307
1.25k
#endif
308
1.25k
  return 0;
309
1.40k
}
310
311
void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor)
312
48.8k
{
313
48.8k
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY && 
tensor->type & CCV_MAPPED_MEM45.6k
)
314
2
  {
315
    // The size might be different than the ones when we allocated (for example, the tensor might rewrite its size to be smaller).
316
    // This might cause issues in the future.
317
2
    const size_t size = ccv_nnc_tensor_data_size(tensor->info);
318
2
    munmap(tensor->data.u8, size);
319
2
  }
320
48.8k
#ifdef HAVE_CUDA
321
48.8k
  if (tensor->type & CCV_PINNED_MEM)
322
146
    cuunregister(tensor->data.u8);
323
48.8k
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY &&
324
48.8k
    
!(tensor->type & CCV_NO_DATA_ALLOC)3.14k
) // If this is GPU memory and it is allocated, free.
325
2.65k
    cufree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8);
326
#elif defined(HAVE_MPS)
327
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY &&
328
    !(tensor->type & CCV_NO_DATA_ALLOC)) // If this is GPU memory and it is allocated, free.
329
    mpobjfree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8);
330
#endif
331
48.8k
  ccfree(tensor);
332
48.8k
}
333
334
static inline void _ccv_nnc_tensor_view_set(ccv_nnc_tensor_view_t* const tv, const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
335
165
{
336
165
  memcpy(tv->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
337
165
  memcpy(tv->info.dim, dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
338
165
  uint8_t* const p = tensor->data.u8;
339
165
  const off_t off = tv->off = ccv_nnc_tensor_view_offset(tv->info.datatype, stride, ofs);
340
165
  tv->contiguous = ccv_nnc_tensor_view_is_contiguous(dim, stride);
341
165
  assert(off + CCV_GET_DATA_TYPE_SIZE(tv->info.datatype) * ccv_nnc_dimension_upper_bound(tv->info.dim, tv->stride) <= CCV_GET_DATA_TYPE_SIZE(tensor->info.datatype) * ccv_nnc_tensor_count(tensor->info));
342
165
  ccv_nnc_tensor_data(tv->info, p, off + tensor->dataof, &tv->data, &tv->dataof);
343
165
}
344
345
ccv_nnc_tensor_view_t* ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
346
84
{
347
84
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)ccmalloc(sizeof(ccv_nnc_tensor_view_t));
348
84
  tv->type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW;
349
84
  tv->dataof = 0;
350
84
  tv->alias_ref = (uintptr_t)tensor;
351
84
  tv->refcount = 1;
352
84
  tv->sig = 0;
353
84
  tv->data_size = 0;
354
84
  assert(params.type == tensor->info.type);
355
84
  assert(params.datatype == tensor->info.datatype);
356
84
  tv->info = params;
357
84
  _ccv_nnc_tensor_view_set(tv, tensor, params.dim, ofs, stride);
358
84
  return tv;
359
84
}
360
361
ccv_nnc_tensor_view_t ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
362
81
{
363
81
  assert(!CCV_IS_TENSOR_VIEW(tensor));
364
81
  assert(params.type == tensor->info.type);
365
81
  assert(params.datatype == tensor->info.datatype);
366
81
  ccv_nnc_tensor_view_t tv = {
367
81
    .dataof = 0,
368
81
    .alias_ref = (uintptr_t)tensor,
369
81
    .type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW, // clean up the channel bits, and then add CCV_TENSOR_VIEW identifier
370
81
    .refcount = 1,
371
81
    .sig = 0,
372
81
    .info = params,
373
81
    .data_size = 0,
374
81
  };
375
81
  _ccv_nnc_tensor_view_set(&tv, tensor, params.dim, ofs, stride);
376
81
  return tv;
377
81
}
378
379
void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view)
380
84
{
381
84
  ccfree(tensor_view);
382
84
}
383
384
void _ccv_nnc_tensor_set_zero(unsigned char* u8, const int nd, const int* const dim, const int* const stride, const size_t data_size)
385
106
{
386
106
  if (nd == 1)
387
0
  {
388
0
    if (stride[0] == 1)
389
0
    {
390
0
      memset(u8, 0, data_size * dim[0]);
391
0
      return;
392
0
    }
393
0
    int i;
394
0
    for (i = 0; i < dim[0]; i++)
395
0
      memset(u8 + i * stride[0] * data_size, 0, data_size);
396
106
  } else if (nd == 2) {
397
1
    if (stride[1] == 1 && stride[0] == dim[1])
398
0
    {
399
0
      memset(u8, 0, data_size * dim[1] * dim[0]);
400
0
      return;
401
0
    }
402
1
    int x, y;
403
4
    for (y = 0; y < dim[0]; 
y++3
)
404
3
    {
405
3
      unsigned char* const u8y = u8 + y * stride[0] * data_size;
406
9
      for (x = 0; x < dim[1]; 
x++6
)
407
6
        memset(u8y + x * stride[1] * data_size, 0, data_size);
408
3
    }
409
105
  } else if (nd == 3) {
410
0
    if (stride[2] == 1 && stride[1] == dim[2] && stride[0] == dim[1] * dim[2])
411
0
    {
412
0
      memset(u8, 0, data_size * dim[2] * dim[1] * dim[0]);
413
0
      return;
414
0
    }
415
0
    int x, y, z;
416
0
    for (z = 0; z < dim[0]; z++)
417
0
    {
418
0
      unsigned char* const u8z = u8 + z * stride[0] * data_size;
419
0
      for (y = 0; y < dim[1]; y++)
420
0
      {
421
0
        unsigned char* const u8y = u8z + y * stride[1] * data_size;
422
0
        for (x = 0; x < dim[2]; x++)
423
0
          memset(u8y + x * stride[2] * data_size, 0, data_size);
424
0
      }
425
0
    }
426
105
  } else if (nd == 4) {
427
96
    if (stride[3] == 1 && stride[2] == dim[3] && 
stride[1] == dim[2] * dim[3]0
&&
stride[0] == dim[1] * dim[2] * dim[3]0
)
428
0
    {
429
0
      memset(u8, 0, data_size * dim[3] * dim[2] * dim[1] * dim[0]);
430
0
      return;
431
0
    }
432
96
    int x, y, z, s;
433
1.53k
    for (s = 0; s < dim[0]; 
s++1.44k
)
434
1.44k
    {
435
1.44k
      unsigned char* const u8s = u8 + s * stride[0] * data_size;
436
4.32k
      for (z = 0; z < dim[1]; 
z++2.88k
)
437
2.88k
      {
438
2.88k
        unsigned char* const u8z = u8s + z * stride[1] * data_size;
439
11.5k
        for (y = 0; y < dim[2]; 
y++8.64k
)
440
8.64k
        {
441
8.64k
          unsigned char* const u8y = u8z + y * stride[2] * data_size;
442
43.2k
          for (x = 0; x < dim[3]; 
x++34.5k
)
443
34.5k
            memset(u8y + x * stride[3] * data_size, 0, data_size);
444
8.64k
        }
445
2.88k
      }
446
1.44k
    }
447
96
  } else {
448
9
    int i;
449
113
    for (i = 0; i < dim[0]; 
i++104
)
450
104
      _ccv_nnc_tensor_set_zero(u8 + i * stride[0] * data_size, nd - 1, dim + 1, stride + 1, data_size);
451
9
  }
452
106
}
453
454
void ccv_nnc_tensor_zero(void* const tensor)
455
23.6k
{
456
23.6k
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)tensor;
457
23.6k
  const size_t data_size = CCV_GET_DATA_TYPE_SIZE(tv->info.datatype);
458
23.6k
  if (CCV_IS_TENSOR_CONTIGUOUS(tv))
459
23.6k
  {
460
23.6k
    memset(tv->data.u8, 0, data_size * ccv_nnc_tensor_count(tv->info));
461
23.6k
    return;
462
23.6k
  }
463
2
  const int nd = ccv_nnc_tensor_nd(tv->info.dim);
464
2
  assert(nd >= 1);
465
2
  const int* const tvstride = tv->stride;
466
  // Go through this recursively.
467
2
  _ccv_nnc_tensor_set_zero(tv->data.u8, nd, tv->info.dim, tvstride, data_size);
468
2
}
469
470
int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b)
471
686
{
472
686
  assert(!CCV_IS_TENSOR_VIEW(a));
473
686
  assert(!CCV_IS_TENSOR_VIEW(b));
474
  // If a is a dense matrix, just use ccv_matrix_eq
475
686
  if (CCV_TENSOR_IS_DENSE_MATRIX(a->type))
476
110
    return ccv_matrix_eq((ccv_matrix_t*)a, (ccv_matrix_t*)b);
477
  // Otherwise, do our own thing.
478
576
  if (CCV_GET_DATA_TYPE(a->type) != CCV_GET_DATA_TYPE(b->type))
479
0
    return -1;
480
576
  int i, c = 1;
481
1.66k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.08k
)
482
1.66k
  {
483
1.66k
    if (!a->info.dim[i] && 
!b->info.dim[i]576
)
484
576
      break;
485
1.08k
    if (a->info.dim[i] != b->info.dim[i])
486
0
      return -1;
487
1.08k
    c *= a->info.dim[i];
488
1.08k
  }
489
576
  if (CCV_GET_DATA_TYPE(a->type) == CCV_32S)
490
12
    return memcmp(a->data.i32, b->data.i32, sizeof(int) * c) == 0 ? 0 : 
-10
;
491
  // Only support 32F at this point.
492
576
  assert
(CCV_GET_DATA_TYPE(a->type) == CCV_32F || CCV_GET_DATA_TYPE(a->type) == CCV_64F)564
;
493
  // Read: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
494
  // http://floating-point-gui.de/errors/comparison/
495
564
  if (CCV_GET_DATA_TYPE(a->type) == CCV_32F)
496
562
  {
497
562
    static const float epsi = FLT_EPSILON;
498
562
    static const int32_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
499
30.3M
    for (i = 0; i < c; 
i++30.3M
)
500
30.3M
    {
501
      // Although this is float point, I use integer as a way to compare.
502
30.3M
      int32_t i32a = a->data.i32[i];
503
30.3M
      if (i32a < 0)
504
4.82M
        i32a = 0x80000000 - i32a;
505
30.3M
      int32_t i32b = b->data.i32[i];
506
30.3M
      if (i32b < 0)
507
4.82M
        i32b = 0x80000000 - i32b;
508
30.3M
      if (abs(i32a - i32b) > ulps && 
fabsf(a->data.f32[i] - b->data.f32[i]) > epsi9.11k
)
509
0
        return -1;
510
30.3M
    }
511
562
  } else 
if (2
CCV_GET_DATA_TYPE2
(a->type) == CCV_64F2
) {
512
2
    typedef union {
513
2
      double f64;
514
2
      int64_t i64;
515
2
    } Float64;
516
2
    static const double epsi = DBL_EPSILON;
517
2
    static const int64_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
518
15.8k
    for (i = 0; i < c; 
i++15.8k
)
519
15.8k
    {
520
      // Although this is float point, I use integer as a way to compare.
521
15.8k
      Float64 f64a, f64b;
522
15.8k
      f64a.f64 = a->data.f64[i];
523
15.8k
      f64b.f64 = b->data.f64[i];
524
15.8k
      if (f64a.i64 < 0)
525
0
        f64a.i64 = 0x8000000000000000 - f64a.i64;
526
15.8k
      if (f64b.i64 < 0)
527
0
        f64b.i64 = 0x8000000000000000 - f64b.i64;
528
15.8k
      if (llabs(f64a.i64 - f64b.i64) > ulps && 
fabs(a->data.f64[i] - b->data.f64[i]) > epsi0
)
529
0
        return -1;
530
15.8k
    }
531
2
  }
532
564
  return 0;
533
564
}
534
535
static void _strcat(char** str, int* written, size_t* len, char* from, int from_size)
536
1.34k
{
537
1.34k
  if (*len - *written < from_size)
538
0
  {
539
0
    *len += from_size * 2;
540
0
    *str = (char*)ccrealloc(*str, *len);
541
0
  }
542
1.34k
  memcpy(*str + *written, from, from_size);
543
1.34k
  *written += from_size;
544
1.34k
}
545
546
648
#define _STRPRINTF(str, written, len, format, ...) \
547
648
do { \
548
648
  const int newly_written = snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \
549
648
  if ((len) - (written) < newly_written) \
550
648
  { \
551
0
    (len) += newly_written * 2; \
552
0
    (str) = (char*)ccrealloc((str), (len)); \
553
0
    (written) += snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \
554
0
  } else \
555
648
    (written) += newly_written; \
556
648
} while (0)
557
558
static void _strv(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int i)
559
648
{
560
648
  if (a->info.datatype == CCV_32F)
561
0
    _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f32[i]);
562
648
  else if (a->info.datatype == CCV_64F)
563
0
    _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f64[i]);
564
648
  else if (a->info.datatype == CCV_16F) {
565
0
    float v;
566
0
    ccv_half_precision_to_float((uint16_t*)(a->data.f16 + i), &v, 1);
567
0
    _STRPRINTF(*str, *written, *len, "%10.5g", v);
568
648
  } else if (a->info.datatype == CCV_32S)
569
648
    _STRPRINTF(*str, *written, *len, "%10d", a->data.i32[i]);
570
0
  else if (a->info.datatype == CCV_64S)
571
0
    _STRPRINTF(*str, *written, *len, "%12lld", (long long int)a->data.i64[i]);
572
0
  else if (a->info.datatype == CCV_8U)
573
0
    _STRPRINTF(*str, *written, *len, "%3d", (int)a->data.u8[i]);
574
648
}
575
576
static void _strt(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int nd, int spacer, const int* const dim, const int* const stride, int idx)
577
28
{
578
28
  assert(nd != 1);
579
28
  if (nd == 2)
580
17
  {
581
    // Print columns and the rows.
582
17
    int i, j, k;
583
17
    if (dim[0] <= 8)
584
1
    {
585
5
      for (i = 0; i < dim[0]; 
i++4
)
586
4
      {
587
4
        if (i != 0)
588
3
        {
589
3
          _strcat(str, written, len, "  ", 2);
590
3
          for (k = 0; k < spacer; 
k++0
)
591
0
            _strcat(str, written, len, " ", 1);
592
3
        }
593
4
        _strcat(str, written, len, "[", 1);
594
4
        if (dim[1] <= 8)
595
4
        {
596
20
          for (j = 0; j < dim[1]; 
j++16
)
597
16
          {
598
16
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
599
16
            if (j < dim[1] - 1)
600
12
              _strcat(str, written, len, ", ", 2);
601
16
          }
602
4
          if (i < dim[0] - 1)
603
3
            _strcat(str, written, len, "],\n", 3);
604
4
        } else {
605
0
          for (j = 0; j < 3; j++)
606
0
          {
607
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
608
0
            _strcat(str, written, len, ", ", 2);
609
0
          }
610
0
          _strcat(str, written, len, " ..., ", 6);
611
0
          for (j = dim[1] - 3; j < dim[1]; j++)
612
0
          {
613
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
614
0
            if (j < dim[1] - 1)
615
0
              _strcat(str, written, len, ", ", 2);
616
0
          }
617
0
          if (i < dim[0] - 1)
618
0
            _strcat(str, written, len, "],\n", 3);
619
0
        }
620
4
      }
621
1
      _strcat(str, written, len, "]", 1);
622
16
    } else {
623
64
      for (i = 0; i < 3; 
i++48
)
624
48
      {
625
48
        if (i != 0)
626
32
        {
627
32
          _strcat(str, written, len, "  ", 2);
628
128
          for (k = 0; k < spacer; 
k++96
)
629
96
            _strcat(str, written, len, " ", 1);
630
32
        }
631
48
        _strcat(str, written, len, "[", 1);
632
48
        if (dim[1] <= 8)
633
0
        {
634
0
          for (j = 0; j < dim[1]; j++)
635
0
          {
636
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
637
0
            if (j < dim[1] - 1)
638
0
              _strcat(str, written, len, ", ", 2);
639
0
          }
640
0
          _strcat(str, written, len, "],\n", 3);
641
48
        } else {
642
192
          for (j = 0; j < 3; 
j++144
)
643
144
          {
644
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
645
144
            _strcat(str, written, len, ", ", 2);
646
144
          }
647
48
          _strcat(str, written, len, " ..., ", 6);
648
192
          for (j = dim[1] - 3; j < dim[1]; 
j++144
)
649
144
          {
650
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
651
144
            if (j < dim[1] - 1)
652
96
              _strcat(str, written, len, ", ", 2);
653
144
          }
654
48
          _strcat(str, written, len, "],\n", 3);
655
48
        }
656
48
      }
657
16
      _strcat(str, written, len, "  ", 2);
658
64
      for (k = 0; k < spacer; 
k++48
)
659
48
        _strcat(str, written, len, " ", 1);
660
16
      _strcat(str, written, len, "...,\n", 5);
661
64
      for (i = dim[0] - 3; i < dim[0]; 
i++48
)
662
48
      {
663
48
        _strcat(str, written, len, "  ", 2);
664
192
        for (k = 0; k < spacer; 
k++144
)
665
144
          _strcat(str, written, len, " ", 1);
666
48
        _strcat(str, written, len, "[", 1);
667
48
        if (dim[1] < 8)
668
0
        {
669
0
          for (j = 0; j < dim[1]; j++)
670
0
          {
671
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
672
0
            if (j < dim[1] - 1)
673
0
              _strcat(str, written, len, ", ", 2);
674
0
          }
675
0
          if (i < dim[0] - 1)
676
0
            _strcat(str, written, len, "],\n", 3);
677
48
        } else {
678
192
          for (j = 0; j < 3; 
j++144
)
679
144
          {
680
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
681
144
            _strcat(str, written, len, ", ", 2);
682
144
          }
683
48
          _strcat(str, written, len, " ..., ", 6);
684
192
          for (j = dim[1] - 3; j < dim[1]; 
j++144
)
685
144
          {
686
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
687
144
            if (j < dim[1] - 1)
688
96
              _strcat(str, written, len, ", ", 2);
689
144
          }
690
48
          if (i < dim[0] - 1)
691
32
            _strcat(str, written, len, "],\n", 3);
692
48
        }
693
48
      }
694
16
      _strcat(str, written, len, "]", 1);
695
16
    }
696
17
    return;
697
17
  }
698
11
  int i, j;
699
11
  if (dim[0] > 4)
700
2
  {
701
6
    for (i = 0; i < 2; 
i++4
)
702
4
    {
703
4
      _strcat(str, written, len, "[", 1);
704
4
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
705
4
      _strcat(str, written, len, "],\n  ", 5);
706
8
      for (j = 0; j < spacer; 
j++4
)
707
4
        _strcat(str, written, len, " ", 1);
708
4
    }
709
2
    _strcat(str, written, len, "...,\n", 5);
710
2
    _strcat(str, written, len, "  ", 2);
711
4
    for (j = 0; j < spacer; 
j++2
)
712
2
      _strcat(str, written, len, " ", 1);
713
6
    for (i = dim[0] - 2; i < dim[0]; 
i++4
)
714
4
    {
715
4
      _strcat(str, written, len, "[", 1);
716
4
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
717
4
      if (i < dim[0] - 1)
718
2
      {
719
2
        _strcat(str, written, len, "],\n  ", 5);
720
4
        for (j = 0; j < spacer; 
j++2
)
721
2
          _strcat(str, written, len, " ", 1);
722
2
      }
723
4
    }
724
2
    _strcat(str, written, len, "]", 1);
725
9
  } else {
726
27
    for (i = 0; i < dim[0]; 
i++18
)
727
18
    {
728
18
      _strcat(str, written, len, "[", 1);
729
18
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
730
18
      if (i < dim[0] - 1)
731
9
      {
732
9
        _strcat(str, written, len, "],\n", 3);
733
9
        _strcat(str, written, len, "  ", 2);
734
25
        for (j = 0; j < spacer; 
j++16
)
735
16
          _strcat(str, written, len, " ", 1);
736
9
      }
737
18
    }
738
9
    _strcat(str, written, len, "]", 1);
739
9
  }
740
11
}
741
742
char* ccv_nnc_tensor_format_new(const ccv_nnc_tensor_t* const a)
743
4
{
744
4
  const int nd = ccv_nnc_tensor_nd(a->info.dim);
745
4
  int i;
746
4
  int rows = 8; // 8 rows for the first one, and then just first and last.
747
7
  for (i = 2; i < nd; 
i++3
)
748
3
    rows *= 5; // Maximum 3 rows beyond the first two.
749
4
  int columns = nd * 2 + 16 * 8;
750
4
  size_t len = sizeof(char) * columns * rows;
751
  // Allocate return string buffer.
752
4
  char* str = (char*)ccmalloc(len);
753
4
  int written = 0;
754
4
  int stride[CCV_NNC_MAX_DIM_ALLOC];
755
4
  if (CCV_IS_TENSOR_VIEW(a))
756
0
    memcpy(stride, ((ccv_nnc_tensor_view_t*)a)->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
757
4
  else
758
4
    ccv_nnc_tensor_get_stride(a->info.dim, stride);
759
4
  _strcat(&str, &written, &len, "[\n  ", 4);
760
4
  if (nd == 1)
761
2
  {
762
    // Special casing for vector.
763
2
    if (a->info.dim[0] <= 64)
764
13
      
for (i = 0; 1
i < a->info.dim[0];
i++12
)
765
12
      {
766
12
        _strv(&str, &written, &len, a, i * stride[0]);
767
12
        if (i < a->info.dim[0] - 1)
768
11
        {
769
11
          if ((i + 1) % 8 == 0)
770
1
            _strcat(&str, &written, &len, ",\n  ", 4);
771
10
          else
772
10
            _strcat(&str, &written, &len, ", ", 2);
773
11
        }
774
12
      }
775
1
    else {
776
      // First 3 rows.
777
25
      for (i = 0; i < 24; 
i++24
)
778
24
      {
779
24
        _strv(&str, &written, &len, a, i * stride[0]);
780
24
        if ((i + 1) % 8 == 0)
781
3
          _strcat(&str, &written, &len, ",\n  ", 4);
782
21
        else
783
21
          _strcat(&str, &written, &len, ", ", 2);
784
24
      }
785
1
      _strcat(&str, &written, &len, "...,\n  ", 7);
786
      // Last 3 rows (aligned to 8 items per row).
787
1
      int start = ((a->info.dim[0] + 7) / 8 - 3) * 8;
788
21
      for (i = start; i < a->info.dim[0]; 
i++20
)
789
20
      {
790
20
        _strv(&str, &written, &len, a, i * stride[0]);
791
20
        if (i < a->info.dim[0] - 1)
792
19
        {
793
19
          if ((i + 1) % 8 == 0)
794
2
            _strcat(&str, &written, &len, ",\n  ", 4);
795
17
          else
796
17
            _strcat(&str, &written, &len, ", ", 2);
797
19
        }
798
20
      }
799
1
    }
800
2
  } else {
801
2
    _strt(&str, &written, &len, a, nd, 0, a->info.dim, stride, 0);
802
2
  }
803
4
  _strcat(&str, &written, &len, "\n]", 3); // Including the terminal \0.
804
4
  str = (char*)ccrealloc(str, written); // Don't need the extra spaces.
805
4
  return str;
806
4
}