Coverage Report

Created: 2025-05-08 13:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor.c
Line
Count
Source
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#ifdef HAVE_CUDA
5
#include "gpu/ccv_nnc_compat.h"
6
#elif defined(HAVE_MPS)
7
#include "mps/ccv_nnc_mps.h"
8
#endif
9
#include <fcntl.h>
10
#include <sys/mman.h>
11
12
// MARK - Level-1 API
13
14
const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC] = {0};
15
16
ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
17
49.1k
{
18
49.1k
  ccv_nnc_tensor_t* tensor;
19
  // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t).
20
49.1k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC46.0k
&&
params.dim[2] > 040.0k
&&
params.dim[2] <= 4.87k
CCV_MAX_CHANNEL4.87k
&&
params.dim[0] > 04.87k
&&
params.dim[1] > 04.87k
&&
params.dim[3] == 04.87k
);
21
49.1k
  if (ptr || 
(flags & CCV_NO_DATA_ALLOC)47.3k
)
22
1.78k
  {
23
1.78k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
24
1.78k
    tensor->dataof = 0;
25
1.78k
    tensor->alias_ref = 0;
26
1.78k
    tensor->sig = 0;
27
1.78k
    tensor->refcount = 1;
28
1.78k
    tensor->info = params;
29
1.78k
    if (tfb)
30
59
    {
31
59
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
32
      // This corresponding to mat->step
33
59
      tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
34
59
    } else // This won't be recognized by ccv_dense_matrix_t
35
1.72k
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
36
1.78k
    tensor->data.u8 = (uint8_t*)ptr;
37
1.78k
    return tensor;
38
1.78k
  }
39
47.3k
  if (flags & CCV_TENSOR_CPU_MEMORY)
40
0
  {
41
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
42
47.3k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
43
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
44
0
  }
45
47.3k
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64;
46
47.3k
  const size_t size = ccv_nnc_tensor_data_size(params);
47
47.3k
#ifdef HAVE_CUDA
48
47.3k
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
49
2.57k
  {
50
2.57k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
51
2.57k
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
52
2.57k
    if (size > 0)
53
2.57k
      tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
54
0
    else
55
0
      tensor->data.u8 = 0;
56
44.7k
  } else {
57
44.7k
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
58
44.7k
    ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
59
44.7k
    if (size > 0)
60
44.7k
      tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
61
0
    else
62
0
      tensor->data.u8 = 0;
63
44.7k
  }
64
#elif defined(HAVE_MPS)
65
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
66
  {
67
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
68
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
69
    if (size > 0)
70
      tensor->data.u8 = (uint8_t*)mpobjmalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
71
    else
72
      tensor->data.u8 = 0;
73
  } else {
74
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
75
    ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
76
    if (size > 0)
77
      tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
78
    else
79
      tensor->data.u8 = 0;
80
  }
81
#else
82
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
83
  ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
84
  if (size > 0)
85
    tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
86
  else
87
    tensor->data.u8 = 0;
88
#endif
89
47.3k
  tensor->dataof = 0;
90
47.3k
  tensor->alias_ref = 0;
91
47.3k
  tensor->data_size = size;
92
47.3k
  tensor->sig = 0;
93
47.3k
  tensor->refcount = 1;
94
47.3k
  tensor->info = params;
95
47.3k
  if (tfb)
96
4.39k
  {
97
4.39k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
98
    // This corresponding to mat->step
99
4.39k
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
100
4.39k
  } else
101
42.9k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
102
47.3k
  return tensor;
103
47.3k
}
104
105
ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_file(const ccv_nnc_tensor_param_t params, const char* const filename, const off_t offset, const int flags)
106
4
{
107
4
  ccv_nnc_tensor_t* tensor;
108
  // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t).
109
4
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC2
&&
params.dim[2] > 02
&&
params.dim[2] <= 0
CCV_MAX_CHANNEL0
&&
params.dim[0] > 00
&&
params.dim[1] > 00
&&
params.dim[3] == 00
);
110
4
  tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
111
4
  tensor->dataof = 0;
112
4
  tensor->alias_ref = 0;
113
4
  tensor->sig = 0;
114
4
  tensor->refcount = 1;
115
4
  tensor->info = params;
116
4
  if (tfb)
117
0
  {
118
0
    tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
119
    // This corresponding to mat->step
120
0
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
121
0
  } else // This won't be recognized by ccv_dense_matrix_t
122
4
    tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
123
4
  const size_t size = ccv_nnc_tensor_data_size(params);
124
4
#ifdef HAVE_CUDA
125
4
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
126
2
  {
127
    // Remove this flag so it can be deallocated as usual.
128
2
    tensor->type &= ~CCV_NO_DATA_ALLOC;
129
2
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
130
2
    if (size > 0)
131
2
    {
132
2
      void* ptr = 0;
133
      // This is not supported yet on CUDA.
134
2
      if (flags & CCV_NNC_TENSOR_MEMORY_MAP_ON_DEMAND)
135
0
        ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
136
2
      if (ptr) // If allocated successfully. Otherwise we go through the fallback path.
137
0
      {
138
0
        tensor->data.u8 = (uint8_t*)ptr;
139
0
        int fd = open(filename, O_RDONLY, 0);
140
0
        cufileread(fd, offset, tensor->data.u8, size);
141
0
        close(fd);
142
0
        cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type), tensor->data.u8, size);
143
0
        tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call.
144
2
      } else {
145
2
        tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
146
2
        int fd = open(filename, O_RDONLY, 0);
147
2
        cufileread(fd, offset, tensor->data.u8, size);
148
2
        close(fd);
149
2
      }
150
2
    } else
151
0
      tensor->data.u8 = 0;
152
2
  } else {
153
2
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
154
2
    if (size > 0)
155
2
    {
156
2
      int fd = open(filename, O_RDONLY, 0);
157
2
      void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
158
2
      close(fd);
159
2
      madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
160
2
      tensor->data.u8 = bufptr;
161
2
      tensor->type |= CCV_MAPPED_MEM;
162
2
    } else
163
0
      tensor->data.u8 = 0;
164
2
  }
165
#elif defined(HAVE_MPS)
166
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
167
  {
168
    // Remove this flag so it can be deallocated as usual.
169
    tensor->type &= ~CCV_NO_DATA_ALLOC;
170
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
171
    if (size > 0)
172
      tensor->data.u8 = (uint8_t*)mpmemmap(filename, size, offset, flags);
173
    else
174
      tensor->data.u8 = 0;
175
  } else {
176
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
177
    if (size > 0)
178
    {
179
      int fd = open(filename, O_RDONLY, 0);
180
      void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
181
      close(fd);
182
      madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
183
      tensor->data.u8 = bufptr;
184
      tensor->type |= CCV_MAPPED_MEM;
185
    } else
186
      tensor->data.u8 = 0;
187
  }
188
#else
189
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
190
  if (size > 0)
191
  {
192
    int fd = open(filename, O_RDONLY, 0);
193
    void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
194
    close(fd);
195
    madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
196
    tensor->data.u8 = bufptr;
197
    tensor->type |= CCV_MAPPED_MEM;
198
  } else
199
    tensor->data.u8 = 0;
200
#endif
201
4
  return tensor;
202
4
}
203
204
ccv_nnc_tensor_t* ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params)
205
1.27k
{
206
1.27k
  assert(!CCV_IS_TENSOR_VIEW(tensor));
207
1.27k
  assert(tensor->type & CCV_UNMANAGED);
208
1.27k
  assert(tensor->data_size > 0);
209
1.27k
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GET_MEMORY(tensor->info.type));
210
1.27k
  assert(CCV_TENSOR_GET_DEVICE(params.type) == CCV_TENSOR_GET_DEVICE(tensor->info.type));
211
1.27k
  const size_t size = ccv_nnc_tensor_data_size(params);
212
1.27k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC14
&&
params.dim[2] > 010
&&
params.dim[2] <= 10
CCV_MAX_CHANNEL10
&&
params.dim[0] > 010
&&
params.dim[1] > 010
&&
params.dim[3] == 010
);
213
1.27k
  tensor->info = params;
214
1.27k
#ifdef HAVE_CUDA
215
1.27k
  const int pinned_mem = (tensor->type & CCV_PINNED_MEM);
216
1.27k
#endif
217
1.27k
  if (tfb)
218
10
  {
219
10
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
220
    // This corresponding to mat->step
221
10
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
222
10
  } else
223
1.26k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
224
1.27k
  if (size <= tensor->data_size) // Nothing.
225
1.27k
  {
226
1.27k
#ifdef HAVE_CUDA
227
1.27k
    if (pinned_mem)
228
4
      tensor->type |= CCV_PINNED_MEM;
229
1.27k
#endif
230
1.27k
    return tensor;
231
1.27k
  }
232
1
  ccv_nnc_tensor_t* new_tensor = tensor;
233
1
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64;
234
1
#ifdef HAVE_CUDA
235
1
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
236
0
  {
237
0
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
238
0
    const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type);
239
0
    assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type));
240
0
    cufree(device_id, tensor->data.u8);
241
0
    new_tensor->data.u8 = (uint8_t*)cumalloc(device_id, size);
242
1
  } else {
243
1
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
244
1
    assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
245
    // pin memory again.
246
1
    if (pinned_mem)
247
0
      cuunregister(new_tensor->data.u8);
248
1
    new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
249
1
    new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
250
1
  }
251
#elif defined(HAVE_MPS)
252
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
253
  {
254
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
255
    const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type);
256
    assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type));
257
    mpobjfree(device_id, tensor->data.u8);
258
    new_tensor->data.u8 = (uint8_t*)mpobjmalloc(device_id, size);
259
  } else {
260
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
261
    assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
262
    new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
263
    new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
264
  }
265
#else
266
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
267
  new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
268
  new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
269
#endif
270
1
  new_tensor->data_size = size;
271
1
#ifdef HAVE_CUDA
272
1
  if (pinned_mem)
273
0
    ccv_nnc_tensor_pin_memory(new_tensor);
274
1
#endif
275
1
  return new_tensor;
276
1
}
277
278
ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
279
78.2k
{
280
  // this specific form can be toll-free bridging to ccv_dense_matrix_t
281
78.2k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC69.3k
&&
params.dim[2] > 069.0k
&&
params.dim[2] <= 901
CCV_MAX_CHANNEL901
&&
params.dim[0] > 0901
&&
params.dim[1] > 0901
&&
params.dim[3] == 0901
);
282
78.2k
  ccv_nnc_tensor_t tensor;
283
78.2k
  tensor.dataof = 0;
284
78.2k
  tensor.alias_ref = 0;
285
78.2k
  tensor.sig = 0;
286
78.2k
  tensor.refcount = 1;
287
78.2k
  tensor.info = params;
288
78.2k
  if (flags & CCV_TENSOR_CPU_MEMORY)
289
0
  {
290
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
291
78.2k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
292
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
293
0
  }
294
78.2k
  if (tfb)
295
194
  {
296
194
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
297
    // This corresponding to mat->step
298
194
    tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
299
194
  } else // This won't be recognized by ccv_dense_matrix_t
300
78.0k
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
301
78.2k
  if (params.dim[0] > 0)
302
78.2k
    tensor.data.u8 = (uint8_t*)ptr;
303
0
  else
304
0
    tensor.data.u8 = 0;
305
78.2k
  tensor.data_size = 0;
306
78.2k
  return tensor;
307
78.2k
}
308
309
int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor)
310
1.40k
{
311
1.40k
#ifdef HAVE_CUDA
312
1.40k
  assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
313
1.40k
  if (!(tensor->type & CCV_PINNED_MEM) && 
tensor->data_size146
)
314
146
  {
315
146
    const int success = curegister(tensor->data.u8, tensor->data_size);
316
146
    if (success)
317
146
      tensor->type |= CCV_PINNED_MEM;
318
146
    return success ? 0 : 
-10
;
319
146
  }
320
1.25k
#endif
321
1.25k
  return 0;
322
1.40k
}
323
324
void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor)
325
49.1k
{
326
49.1k
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY && 
(tensor->type & CCV_MAPPED_MEM)46.0k
)
327
2
  {
328
    // The size might be different than the ones when we allocated (for example, the tensor might rewrite its size to be smaller).
329
    // This might cause issues in the future.
330
2
    const size_t size = ccv_nnc_tensor_data_size(tensor->info);
331
2
    munmap(tensor->data.u8, size);
332
2
  }
333
49.1k
#ifdef HAVE_CUDA
334
49.1k
  if (tensor->type & CCV_PINNED_MEM)
335
146
    cuunregister(tensor->data.u8);
336
49.1k
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY &&
337
49.1k
    
!(tensor->type & CCV_NO_DATA_ALLOC)3.06k
) // If this is GPU memory and it is allocated, free.
338
2.57k
    cufree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8);
339
#elif defined(HAVE_MPS)
340
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY &&
341
    !(tensor->type & CCV_NO_DATA_ALLOC)) // If this is GPU memory and it is allocated, free.
342
    mpobjfree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8);
343
#endif
344
49.1k
  ccfree(tensor);
345
49.1k
}
346
347
static inline void _ccv_nnc_tensor_view_set(ccv_nnc_tensor_view_t* const tv, const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
348
177
{
349
177
  memcpy(tv->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
350
177
  memcpy(tv->info.dim, dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
351
177
  uint8_t* const p = tensor->data.u8;
352
177
  const off_t off = tv->off = ccv_nnc_tensor_view_offset(tv->info.datatype, stride, ofs);
353
177
  tv->contiguous = ccv_nnc_tensor_view_is_contiguous(dim, stride);
354
177
  assert(off + CCV_GET_DATA_TYPE_SIZE(tv->info.datatype) * ccv_nnc_dimension_upper_bound(tv->info.dim, tv->stride) <= CCV_GET_DATA_TYPE_SIZE(tensor->info.datatype) * ccv_nnc_tensor_count(tensor->info));
355
177
  ccv_nnc_tensor_data(tv->info, p, off + tensor->dataof, &tv->data, &tv->dataof);
356
177
}
357
358
ccv_nnc_tensor_view_t* ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
359
96
{
360
96
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)ccmalloc(sizeof(ccv_nnc_tensor_view_t));
361
96
  tv->type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW;
362
96
  tv->dataof = 0;
363
96
  tv->alias_ref = (uintptr_t)tensor;
364
96
  tv->refcount = 1;
365
96
  tv->sig = 0;
366
96
  tv->data_size = 0;
367
96
  assert(params.type == tensor->info.type);
368
96
  assert(params.datatype == tensor->info.datatype);
369
96
  tv->info = params;
370
96
  _ccv_nnc_tensor_view_set(tv, tensor, params.dim, ofs, stride);
371
96
  return tv;
372
96
}
373
374
ccv_nnc_tensor_view_t ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
375
81
{
376
81
  assert(!CCV_IS_TENSOR_VIEW(tensor));
377
81
  assert(params.type == tensor->info.type);
378
81
  assert(params.datatype == tensor->info.datatype);
379
81
  ccv_nnc_tensor_view_t tv = {
380
81
    .dataof = 0,
381
81
    .alias_ref = (uintptr_t)tensor,
382
81
    .type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW, // clean up the channel bits, and then add CCV_TENSOR_VIEW identifier
383
81
    .refcount = 1,
384
81
    .sig = 0,
385
81
    .info = params,
386
81
    .data_size = 0,
387
81
  };
388
81
  _ccv_nnc_tensor_view_set(&tv, tensor, params.dim, ofs, stride);
389
81
  return tv;
390
81
}
391
392
void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view)
393
96
{
394
96
  ccfree(tensor_view);
395
96
}
396
397
void _ccv_nnc_tensor_set_zero(unsigned char* u8, const int nd, const int* const dim, const int* const stride, const size_t data_size)
398
107
{
399
107
  if (nd == 1)
400
0
  {
401
0
    if (stride[0] == 1)
402
0
    {
403
0
      memset(u8, 0, data_size * dim[0]);
404
0
      return;
405
0
    }
406
0
    int i;
407
0
    for (i = 0; i < dim[0]; i++)
408
0
      memset(u8 + i * stride[0] * data_size, 0, data_size);
409
107
  } else if (nd == 2) {
410
2
    if (stride[1] == 1 && stride[0] == dim[1])
411
0
    {
412
0
      memset(u8, 0, data_size * dim[1] * dim[0]);
413
0
      return;
414
0
    }
415
2
    int x, y;
416
8
    for (y = 0; y < dim[0]; 
y++6
)
417
6
    {
418
6
      unsigned char* const u8y = u8 + y * stride[0] * data_size;
419
18
      for (x = 0; x < dim[1]; 
x++12
)
420
12
        memset(u8y + x * stride[1] * data_size, 0, data_size);
421
6
    }
422
105
  } else if (nd == 3) {
423
0
    if (stride[2] == 1 && stride[1] == dim[2] && stride[0] == dim[1] * dim[2])
424
0
    {
425
0
      memset(u8, 0, data_size * dim[2] * dim[1] * dim[0]);
426
0
      return;
427
0
    }
428
0
    int x, y, z;
429
0
    for (z = 0; z < dim[0]; z++)
430
0
    {
431
0
      unsigned char* const u8z = u8 + z * stride[0] * data_size;
432
0
      for (y = 0; y < dim[1]; y++)
433
0
      {
434
0
        unsigned char* const u8y = u8z + y * stride[1] * data_size;
435
0
        for (x = 0; x < dim[2]; x++)
436
0
          memset(u8y + x * stride[2] * data_size, 0, data_size);
437
0
      }
438
0
    }
439
105
  } else if (nd == 4) {
440
96
    if (stride[3] == 1 && stride[2] == dim[3] && 
stride[1] == dim[2] * dim[3]0
&&
stride[0] == dim[1] * dim[2] * dim[3]0
)
441
0
    {
442
0
      memset(u8, 0, data_size * dim[3] * dim[2] * dim[1] * dim[0]);
443
0
      return;
444
0
    }
445
96
    int x, y, z, s;
446
1.53k
    for (s = 0; s < dim[0]; 
s++1.44k
)
447
1.44k
    {
448
1.44k
      unsigned char* const u8s = u8 + s * stride[0] * data_size;
449
4.32k
      for (z = 0; z < dim[1]; 
z++2.88k
)
450
2.88k
      {
451
2.88k
        unsigned char* const u8z = u8s + z * stride[1] * data_size;
452
11.5k
        for (y = 0; y < dim[2]; 
y++8.64k
)
453
8.64k
        {
454
8.64k
          unsigned char* const u8y = u8z + y * stride[2] * data_size;
455
43.2k
          for (x = 0; x < dim[3]; 
x++34.5k
)
456
34.5k
            memset(u8y + x * stride[3] * data_size, 0, data_size);
457
8.64k
        }
458
2.88k
      }
459
1.44k
    }
460
96
  } else {
461
9
    int i;
462
113
    for (i = 0; i < dim[0]; 
i++104
)
463
104
      _ccv_nnc_tensor_set_zero(u8 + i * stride[0] * data_size, nd - 1, dim + 1, stride + 1, data_size);
464
9
  }
465
107
}
466
467
void ccv_nnc_tensor_zero(void* const tensor)
468
17.1k
{
469
17.1k
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)tensor;
470
17.1k
  const size_t data_size = CCV_GET_DATA_TYPE_SIZE(tv->info.datatype);
471
17.1k
  if (CCV_IS_TENSOR_CONTIGUOUS(tv))
472
17.1k
  {
473
17.1k
    memset(tv->data.u8, 0, data_size * ccv_nnc_tensor_count(tv->info));
474
17.1k
    return;
475
17.1k
  }
476
3
  const int nd = ccv_nnc_tensor_nd(tv->info.dim);
477
3
  assert(nd >= 1);
478
3
  const int* const tvstride = tv->stride;
479
  // Go through this recursively.
480
3
  _ccv_nnc_tensor_set_zero(tv->data.u8, nd, tv->info.dim, tvstride, data_size);
481
3
}
482
483
int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b)
484
838
{
485
838
  assert(!CCV_IS_TENSOR_VIEW(a));
486
838
  assert(!CCV_IS_TENSOR_VIEW(b));
487
  // If a is a dense matrix, just use ccv_matrix_eq
488
838
  if (CCV_TENSOR_IS_DENSE_MATRIX(a->type))
489
130
    return ccv_matrix_eq((ccv_matrix_t*)a, (ccv_matrix_t*)b);
490
  // Otherwise, do our own thing.
491
708
  if (CCV_GET_DATA_TYPE(a->type) != CCV_GET_DATA_TYPE(b->type))
492
0
    return -1;
493
708
  int i, c = 1;
494
2.01k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.30k
)
495
2.01k
  {
496
2.01k
    if (!a->info.dim[i] && 
!b->info.dim[i]708
)
497
708
      break;
498
1.30k
    if (a->info.dim[i] != b->info.dim[i])
499
0
      return -1;
500
1.30k
    c *= a->info.dim[i];
501
1.30k
  }
502
708
  if (CCV_GET_DATA_TYPE(a->type) == CCV_32S)
503
91
    return memcmp(a->data.i32, b->data.i32, sizeof(int) * c) == 0 ? 0 : 
-10
;
504
  // Only support 32F at this point.
505
708
  assert
(CCV_GET_DATA_TYPE(a->type) == CCV_32F || CCV_GET_DATA_TYPE(a->type) == CCV_64F)617
;
506
  // Read: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
507
  // http://floating-point-gui.de/errors/comparison/
508
617
  if (CCV_GET_DATA_TYPE(a->type) == CCV_32F)
509
615
  {
510
615
    static const float epsi = FLT_EPSILON;
511
615
    static const int32_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
512
30.4M
    for (i = 0; i < c; 
i++30.4M
)
513
30.4M
    {
514
      // Although this is float point, I use integer as a way to compare.
515
30.4M
      int32_t i32a = a->data.i32[i];
516
30.4M
      if (i32a < 0)
517
4.82M
        i32a = 0x80000000 - i32a;
518
30.4M
      int32_t i32b = b->data.i32[i];
519
30.4M
      if (i32b < 0)
520
4.82M
        i32b = 0x80000000 - i32b;
521
30.4M
      if (abs(i32a - i32b) > ulps && 
fabsf(a->data.f32[i] - b->data.f32[i]) > epsi9.11k
)
522
0
        return -1;
523
30.4M
    }
524
615
  } else 
if (2
CCV_GET_DATA_TYPE2
(a->type) == CCV_64F2
) {
525
2
    typedef union {
526
2
      double f64;
527
2
      int64_t i64;
528
2
    } Float64;
529
2
    static const double epsi = DBL_EPSILON;
530
2
    static const int64_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
531
15.8k
    for (i = 0; i < c; 
i++15.8k
)
532
15.8k
    {
533
      // Although this is float point, I use integer as a way to compare.
534
15.8k
      Float64 f64a, f64b;
535
15.8k
      f64a.f64 = a->data.f64[i];
536
15.8k
      f64b.f64 = b->data.f64[i];
537
15.8k
      if (f64a.i64 < 0)
538
0
        f64a.i64 = 0x8000000000000000 - f64a.i64;
539
15.8k
      if (f64b.i64 < 0)
540
0
        f64b.i64 = 0x8000000000000000 - f64b.i64;
541
15.8k
      if (llabs(f64a.i64 - f64b.i64) > ulps && 
fabs(a->data.f64[i] - b->data.f64[i]) > epsi0
)
542
0
        return -1;
543
15.8k
    }
544
2
  }
545
617
  return 0;
546
617
}
547
548
static void _strcat(char** str, int* written, size_t* len, char* from, int from_size)
549
1.34k
{
550
1.34k
  if (*len - *written < from_size)
551
0
  {
552
0
    *len += from_size * 2;
553
0
    *str = (char*)ccrealloc(*str, *len);
554
0
  }
555
1.34k
  memcpy(*str + *written, from, from_size);
556
1.34k
  *written += from_size;
557
1.34k
}
558
559
648
#define _STRPRINTF(str, written, len, format, ...) \
560
648
do { \
561
648
  const int newly_written = snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \
562
648
  if ((len) - (written) < newly_written) \
563
648
  { \
564
0
    (len) += newly_written * 2; \
565
0
    (str) = (char*)ccrealloc((str), (len)); \
566
0
    (written) += snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \
567
0
  } else \
568
648
    (written) += newly_written; \
569
648
} while (0)
570
571
static void _strv(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int i)
572
648
{
573
648
  if (a->info.datatype == CCV_32F)
574
0
    _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f32[i]);
575
648
  else if (a->info.datatype == CCV_64F)
576
0
    _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f64[i]);
577
648
  else if (a->info.datatype == CCV_16F) {
578
0
    float v;
579
0
    ccv_half_precision_to_float((uint16_t*)(a->data.f16 + i), &v, 1);
580
0
    _STRPRINTF(*str, *written, *len, "%10.5g", v);
581
648
  } else if (a->info.datatype == CCV_32S)
582
648
    _STRPRINTF(*str, *written, *len, "%10d", a->data.i32[i]);
583
0
  else if (a->info.datatype == CCV_64S)
584
0
    _STRPRINTF(*str, *written, *len, "%12lld", (long long int)a->data.i64[i]);
585
0
  else if (a->info.datatype == CCV_8U)
586
0
    _STRPRINTF(*str, *written, *len, "%3d", (int)a->data.u8[i]);
587
648
}
588
589
static void _strt(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int nd, int spacer, const int* const dim, const int* const stride, int idx)
590
28
{
591
28
  assert(nd != 1);
592
28
  if (nd == 2)
593
17
  {
594
    // Print columns and the rows.
595
17
    int i, j, k;
596
17
    if (dim[0] <= 8)
597
1
    {
598
5
      for (i = 0; i < dim[0]; 
i++4
)
599
4
      {
600
4
        if (i != 0)
601
3
        {
602
3
          _strcat(str, written, len, "  ", 2);
603
3
          for (k = 0; k < spacer; 
k++0
)
604
0
            _strcat(str, written, len, " ", 1);
605
3
        }
606
4
        _strcat(str, written, len, "[", 1);
607
4
        if (dim[1] <= 8)
608
4
        {
609
20
          for (j = 0; j < dim[1]; 
j++16
)
610
16
          {
611
16
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
612
16
            if (j < dim[1] - 1)
613
12
              _strcat(str, written, len, ", ", 2);
614
16
          }
615
4
          if (i < dim[0] - 1)
616
3
            _strcat(str, written, len, "],\n", 3);
617
4
        } else {
618
0
          for (j = 0; j < 3; j++)
619
0
          {
620
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
621
0
            _strcat(str, written, len, ", ", 2);
622
0
          }
623
0
          _strcat(str, written, len, " ..., ", 6);
624
0
          for (j = dim[1] - 3; j < dim[1]; j++)
625
0
          {
626
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
627
0
            if (j < dim[1] - 1)
628
0
              _strcat(str, written, len, ", ", 2);
629
0
          }
630
0
          if (i < dim[0] - 1)
631
0
            _strcat(str, written, len, "],\n", 3);
632
0
        }
633
4
      }
634
1
      _strcat(str, written, len, "]", 1);
635
16
    } else {
636
64
      for (i = 0; i < 3; 
i++48
)
637
48
      {
638
48
        if (i != 0)
639
32
        {
640
32
          _strcat(str, written, len, "  ", 2);
641
128
          for (k = 0; k < spacer; 
k++96
)
642
96
            _strcat(str, written, len, " ", 1);
643
32
        }
644
48
        _strcat(str, written, len, "[", 1);
645
48
        if (dim[1] <= 8)
646
0
        {
647
0
          for (j = 0; j < dim[1]; j++)
648
0
          {
649
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
650
0
            if (j < dim[1] - 1)
651
0
              _strcat(str, written, len, ", ", 2);
652
0
          }
653
0
          _strcat(str, written, len, "],\n", 3);
654
48
        } else {
655
192
          for (j = 0; j < 3; 
j++144
)
656
144
          {
657
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
658
144
            _strcat(str, written, len, ", ", 2);
659
144
          }
660
48
          _strcat(str, written, len, " ..., ", 6);
661
192
          for (j = dim[1] - 3; j < dim[1]; 
j++144
)
662
144
          {
663
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
664
144
            if (j < dim[1] - 1)
665
96
              _strcat(str, written, len, ", ", 2);
666
144
          }
667
48
          _strcat(str, written, len, "],\n", 3);
668
48
        }
669
48
      }
670
16
      _strcat(str, written, len, "  ", 2);
671
64
      for (k = 0; k < spacer; 
k++48
)
672
48
        _strcat(str, written, len, " ", 1);
673
16
      _strcat(str, written, len, "...,\n", 5);
674
64
      for (i = dim[0] - 3; i < dim[0]; 
i++48
)
675
48
      {
676
48
        _strcat(str, written, len, "  ", 2);
677
192
        for (k = 0; k < spacer; 
k++144
)
678
144
          _strcat(str, written, len, " ", 1);
679
48
        _strcat(str, written, len, "[", 1);
680
48
        if (dim[1] < 8)
681
0
        {
682
0
          for (j = 0; j < dim[1]; j++)
683
0
          {
684
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
685
0
            if (j < dim[1] - 1)
686
0
              _strcat(str, written, len, ", ", 2);
687
0
          }
688
0
          if (i < dim[0] - 1)
689
0
            _strcat(str, written, len, "],\n", 3);
690
48
        } else {
691
192
          for (j = 0; j < 3; 
j++144
)
692
144
          {
693
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
694
144
            _strcat(str, written, len, ", ", 2);
695
144
          }
696
48
          _strcat(str, written, len, " ..., ", 6);
697
192
          for (j = dim[1] - 3; j < dim[1]; 
j++144
)
698
144
          {
699
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
700
144
            if (j < dim[1] - 1)
701
96
              _strcat(str, written, len, ", ", 2);
702
144
          }
703
48
          if (i < dim[0] - 1)
704
32
            _strcat(str, written, len, "],\n", 3);
705
48
        }
706
48
      }
707
16
      _strcat(str, written, len, "]", 1);
708
16
    }
709
17
    return;
710
17
  }
711
11
  int i, j;
712
11
  if (dim[0] > 4)
713
2
  {
714
6
    for (i = 0; i < 2; 
i++4
)
715
4
    {
716
4
      _strcat(str, written, len, "[", 1);
717
4
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
718
4
      _strcat(str, written, len, "],\n  ", 5);
719
8
      for (j = 0; j < spacer; 
j++4
)
720
4
        _strcat(str, written, len, " ", 1);
721
4
    }
722
2
    _strcat(str, written, len, "...,\n", 5);
723
2
    _strcat(str, written, len, "  ", 2);
724
4
    for (j = 0; j < spacer; 
j++2
)
725
2
      _strcat(str, written, len, " ", 1);
726
6
    for (i = dim[0] - 2; i < dim[0]; 
i++4
)
727
4
    {
728
4
      _strcat(str, written, len, "[", 1);
729
4
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
730
4
      if (i < dim[0] - 1)
731
2
      {
732
2
        _strcat(str, written, len, "],\n  ", 5);
733
4
        for (j = 0; j < spacer; 
j++2
)
734
2
          _strcat(str, written, len, " ", 1);
735
2
      }
736
4
    }
737
2
    _strcat(str, written, len, "]", 1);
738
9
  } else {
739
27
    for (i = 0; i < dim[0]; 
i++18
)
740
18
    {
741
18
      _strcat(str, written, len, "[", 1);
742
18
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
743
18
      if (i < dim[0] - 1)
744
9
      {
745
9
        _strcat(str, written, len, "],\n", 3);
746
9
        _strcat(str, written, len, "  ", 2);
747
25
        for (j = 0; j < spacer; 
j++16
)
748
16
          _strcat(str, written, len, " ", 1);
749
9
      }
750
18
    }
751
9
    _strcat(str, written, len, "]", 1);
752
9
  }
753
11
}
754
755
char* ccv_nnc_tensor_format_new(const ccv_nnc_tensor_t* const a)
756
4
{
757
4
  const int nd = ccv_nnc_tensor_nd(a->info.dim);
758
4
  int i;
759
4
  int rows = 8; // 8 rows for the first one, and then just first and last.
760
7
  for (i = 2; i < nd; 
i++3
)
761
3
    rows *= 5; // Maximum 3 rows beyond the first two.
762
4
  int columns = nd * 2 + 16 * 8;
763
4
  size_t len = sizeof(char) * columns * rows;
764
  // Allocate return string buffer.
765
4
  char* str = (char*)ccmalloc(len);
766
4
  int written = 0;
767
4
  int stride[CCV_NNC_MAX_DIM_ALLOC];
768
4
  if (CCV_IS_TENSOR_VIEW(a))
769
0
    memcpy(stride, ((ccv_nnc_tensor_view_t*)a)->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
770
4
  else
771
4
    ccv_nnc_tensor_get_stride(a->info.dim, stride);
772
4
  _strcat(&str, &written, &len, "[\n  ", 4);
773
4
  if (nd == 1)
774
2
  {
775
    // Special casing for vector.
776
2
    if (a->info.dim[0] <= 64)
777
13
      
for (i = 0; 1
i < a->info.dim[0];
i++12
)
778
12
      {
779
12
        _strv(&str, &written, &len, a, i * stride[0]);
780
12
        if (i < a->info.dim[0] - 1)
781
11
        {
782
11
          if ((i + 1) % 8 == 0)
783
1
            _strcat(&str, &written, &len, ",\n  ", 4);
784
10
          else
785
10
            _strcat(&str, &written, &len, ", ", 2);
786
11
        }
787
12
      }
788
1
    else {
789
      // First 3 rows.
790
25
      for (i = 0; i < 24; 
i++24
)
791
24
      {
792
24
        _strv(&str, &written, &len, a, i * stride[0]);
793
24
        if ((i + 1) % 8 == 0)
794
3
          _strcat(&str, &written, &len, ",\n  ", 4);
795
21
        else
796
21
          _strcat(&str, &written, &len, ", ", 2);
797
24
      }
798
1
      _strcat(&str, &written, &len, "...,\n  ", 7);
799
      // Last 3 rows (aligned to 8 items per row).
800
1
      int start = ((a->info.dim[0] + 7) / 8 - 3) * 8;
801
21
      for (i = start; i < a->info.dim[0]; 
i++20
)
802
20
      {
803
20
        _strv(&str, &written, &len, a, i * stride[0]);
804
20
        if (i < a->info.dim[0] - 1)
805
19
        {
806
19
          if ((i + 1) % 8 == 0)
807
2
            _strcat(&str, &written, &len, ",\n  ", 4);
808
17
          else
809
17
            _strcat(&str, &written, &len, ", ", 2);
810
19
        }
811
20
      }
812
1
    }
813
2
  } else {
814
2
    _strt(&str, &written, &len, a, nd, 0, a->info.dim, stride, 0);
815
2
  }
816
4
  _strcat(&str, &written, &len, "\n]", 3); // Including the terminal \0.
817
4
  str = (char*)ccrealloc(str, written); // Don't need the extra spaces.
818
4
  return str;
819
4
}