Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#ifdef HAVE_CUDA
5
#include "gpu/ccv_nnc_compat.h"
6
#elif defined(HAVE_MPS)
7
#include "mps/ccv_nnc_mps.h"
8
#endif
9
#include <fcntl.h>
10
#include <sys/mman.h>
11
12
// MARK - Level-1 API
13
14
const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC] = {0};
15
16
ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
17
49.1k
{
18
49.1k
  ccv_nnc_tensor_t* tensor;
19
  // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t).
20
49.1k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC45.9k
&&
params.dim[2] > 039.9k
&&
params.dim[2] <= 5.06k
CCV_MAX_CHANNEL5.06k
&&
params.dim[0] > 05.06k
&&
params.dim[1] > 05.06k
&&
params.dim[3] == 05.06k
);
21
49.1k
  if (ptr || 
(flags & CCV_NO_DATA_ALLOC)47.4k
)
22
1.68k
  {
23
1.68k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
24
1.68k
    tensor->dataof = 0;
25
1.68k
    tensor->alias_ref = 0;
26
1.68k
    tensor->sig = 0;
27
1.68k
    tensor->refcount = 1;
28
1.68k
    tensor->info = params;
29
1.68k
    if (tfb)
30
59
    {
31
59
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
32
      // This corresponding to mat->step
33
59
      tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
34
59
    } else // This won't be recognized by ccv_dense_matrix_t
35
1.62k
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
36
1.68k
    tensor->data.u8 = (uint8_t*)ptr;
37
1.68k
    return tensor;
38
1.68k
  }
39
47.4k
  if (flags & CCV_TENSOR_CPU_MEMORY)
40
0
  {
41
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
42
47.4k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
43
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
44
0
  }
45
47.4k
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64;
46
47.4k
  const size_t size = ccv_nnc_tensor_data_size(params);
47
47.4k
#ifdef HAVE_CUDA
48
47.4k
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
49
2.77k
  {
50
2.77k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
51
2.77k
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
52
2.77k
    if (size > 0)
53
2.77k
      tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
54
0
    else
55
0
      tensor->data.u8 = 0;
56
44.7k
  } else {
57
44.7k
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
58
44.7k
    ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
59
44.7k
    if (size > 0)
60
44.7k
      tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
61
0
    else
62
0
      tensor->data.u8 = 0;
63
44.7k
  }
64
#elif defined(HAVE_MPS)
65
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
66
  {
67
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
68
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
69
    if (size > 0)
70
      tensor->data.u8 = (uint8_t*)mpobjmalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
71
    else
72
      tensor->data.u8 = 0;
73
  } else {
74
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
75
    ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
76
    if (size > 0)
77
      tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
78
    else
79
      tensor->data.u8 = 0;
80
  }
81
#else
82
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
83
  ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
84
  if (size > 0)
85
    tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
86
  else
87
    tensor->data.u8 = 0;
88
#endif
89
47.4k
  tensor->dataof = 0;
90
47.4k
  tensor->alias_ref = 0;
91
47.4k
  tensor->data_size = size;
92
47.4k
  tensor->sig = 0;
93
47.4k
  tensor->refcount = 1;
94
47.4k
  tensor->info = params;
95
47.4k
  if (tfb)
96
4.33k
  {
97
4.33k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
98
    // This corresponding to mat->step
99
4.33k
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
100
4.33k
  } else
101
43.1k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
102
47.4k
  return tensor;
103
47.4k
}
104
105
ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_file(const ccv_nnc_tensor_param_t params, const char* const filename, const off_t offset, const int flags)
106
4
{
107
4
  ccv_nnc_tensor_t* tensor;
108
  // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t).
109
4
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC2
&&
params.dim[2] > 02
&&
params.dim[2] <= 0
CCV_MAX_CHANNEL0
&&
params.dim[0] > 00
&&
params.dim[1] > 00
&&
params.dim[3] == 00
);
110
4
  tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
111
4
  tensor->dataof = 0;
112
4
  tensor->alias_ref = 0;
113
4
  tensor->sig = 0;
114
4
  tensor->refcount = 1;
115
4
  tensor->info = params;
116
4
  if (tfb)
117
0
  {
118
0
    tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
119
    // This corresponding to mat->step
120
0
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
121
0
  } else // This won't be recognized by ccv_dense_matrix_t
122
4
    tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
123
4
  const size_t size = ccv_nnc_tensor_data_size(params);
124
4
#ifdef HAVE_CUDA
125
4
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
126
2
  {
127
    // Remove this flag so it can be deallocated as usual.
128
2
    tensor->type &= ~CCV_NO_DATA_ALLOC;
129
2
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
130
2
    if (size > 0)
131
2
    {
132
      // This is not supported yet on CUDA.
133
2
      tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
134
2
      int fd = open(filename, O_RDONLY, 0);
135
2
      void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
136
2
      close(fd);
137
2
      madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
138
2
      cumemcpy(tensor->data.u8, CCV_TENSOR_GPU_MEMORY, bufptr, CCV_TENSOR_CPU_MEMORY, size);
139
2
      munmap(bufptr, size);
140
2
    } else
141
0
      tensor->data.u8 = 0;
142
2
  } else {
143
2
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
144
2
    if (size > 0)
145
2
    {
146
2
      int fd = open(filename, O_RDONLY, 0);
147
2
      void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
148
2
      close(fd);
149
2
      madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
150
2
      tensor->data.u8 = bufptr;
151
2
      tensor->type |= CCV_MAPPED_MEM;
152
2
    } else
153
0
      tensor->data.u8 = 0;
154
2
  }
155
#elif defined(HAVE_MPS)
156
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
157
  {
158
    // Remove this flag so it can be deallocated as usual.
159
    tensor->type &= ~CCV_NO_DATA_ALLOC;
160
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
161
    if (size > 0)
162
      tensor->data.u8 = (uint8_t*)mpmemmap(filename, size, offset, flags);
163
    else
164
      tensor->data.u8 = 0;
165
  } else {
166
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
167
    if (size > 0)
168
    {
169
      int fd = open(filename, O_RDONLY, 0);
170
      void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
171
      close(fd);
172
      madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
173
      tensor->data.u8 = bufptr;
174
      tensor->type |= CCV_MAPPED_MEM;
175
    } else
176
      tensor->data.u8 = 0;
177
  }
178
#else
179
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
180
  if (size > 0)
181
  {
182
    int fd = open(filename, O_RDONLY, 0);
183
    void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
184
    close(fd);
185
    madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
186
    tensor->data.u8 = bufptr;
187
    tensor->type |= CCV_MAPPED_MEM;
188
  } else
189
    tensor->data.u8 = 0;
190
#endif
191
4
  return tensor;
192
4
}
193
194
ccv_nnc_tensor_t* ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params)
195
1.27k
{
196
1.27k
  assert(!CCV_IS_TENSOR_VIEW(tensor));
197
1.27k
  assert(tensor->type & CCV_UNMANAGED);
198
1.27k
  assert(tensor->data_size > 0);
199
1.27k
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GET_MEMORY(tensor->info.type));
200
1.27k
  assert(CCV_TENSOR_GET_DEVICE(params.type) == CCV_TENSOR_GET_DEVICE(tensor->info.type));
201
1.27k
  const size_t size = ccv_nnc_tensor_data_size(params);
202
1.27k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC14
&&
params.dim[2] > 010
&&
params.dim[2] <= 10
CCV_MAX_CHANNEL10
&&
params.dim[0] > 010
&&
params.dim[1] > 010
&&
params.dim[3] == 010
);
203
1.27k
  tensor->info = params;
204
1.27k
#ifdef HAVE_CUDA
205
1.27k
  const int pinned_mem = (tensor->type & CCV_PINNED_MEM);
206
1.27k
#endif
207
1.27k
  if (tfb)
208
10
  {
209
10
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
210
    // This corresponding to mat->step
211
10
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
212
10
  } else
213
1.26k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
214
1.27k
  if (size <= tensor->data_size) // Nothing.
215
1.27k
  {
216
1.27k
#ifdef HAVE_CUDA
217
1.27k
    if (pinned_mem)
218
4
      tensor->type |= CCV_PINNED_MEM;
219
1.27k
#endif
220
1.27k
    return tensor;
221
1.27k
  }
222
1
  ccv_nnc_tensor_t* new_tensor = tensor;
223
1
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64;
224
1
#ifdef HAVE_CUDA
225
1
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
226
0
  {
227
0
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
228
0
    const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type);
229
0
    assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type));
230
0
    cufree(device_id, tensor->data.u8);
231
0
    new_tensor->data.u8 = (uint8_t*)cumalloc(device_id, size);
232
1
  } else {
233
1
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
234
1
    assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
235
    // pin memory again.
236
1
    if (pinned_mem)
237
0
      cuunregister(new_tensor->data.u8);
238
1
    new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
239
1
    new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
240
1
  }
241
#elif defined(HAVE_MPS)
242
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
243
  {
244
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
245
    const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type);
246
    assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type));
247
    mpobjfree(device_id, tensor->data.u8);
248
    new_tensor->data.u8 = (uint8_t*)mpobjmalloc(device_id, size);
249
  } else {
250
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
251
    assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
252
    new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
253
    new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
254
  }
255
#else
256
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
257
  new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
258
  new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
259
#endif
260
1
  new_tensor->data_size = size;
261
1
#ifdef HAVE_CUDA
262
1
  if (pinned_mem)
263
0
    ccv_nnc_tensor_pin_memory(new_tensor);
264
1
#endif
265
1
  return new_tensor;
266
1
}
267
268
ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
269
78.0k
{
270
  // this specific form can be toll-free bridging to ccv_dense_matrix_t
271
78.0k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC69.2k
&&
params.dim[2] > 068.9k
&&
params.dim[2] <= 899
CCV_MAX_CHANNEL899
&&
params.dim[0] > 0899
&&
params.dim[1] > 0899
&&
params.dim[3] == 0899
);
272
78.0k
  ccv_nnc_tensor_t tensor;
273
78.0k
  tensor.dataof = 0;
274
78.0k
  tensor.alias_ref = 0;
275
78.0k
  tensor.sig = 0;
276
78.0k
  tensor.refcount = 1;
277
78.0k
  tensor.info = params;
278
78.0k
  if (flags & CCV_TENSOR_CPU_MEMORY)
279
0
  {
280
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
281
78.0k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
282
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
283
0
  }
284
78.0k
  if (tfb)
285
192
  {
286
192
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
287
    // This corresponding to mat->step
288
192
    tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
289
192
  } else // This won't be recognized by ccv_dense_matrix_t
290
77.8k
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
291
78.0k
  if (params.dim[0] > 0)
292
78.0k
    tensor.data.u8 = (uint8_t*)ptr;
293
0
  else
294
0
    tensor.data.u8 = 0;
295
78.0k
  tensor.data_size = 0;
296
78.0k
  return tensor;
297
78.0k
}
298
299
int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor)
300
1.40k
{
301
1.40k
#ifdef HAVE_CUDA
302
1.40k
  assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
303
1.40k
  if (!(tensor->type & CCV_PINNED_MEM) && 
tensor->data_size146
)
304
146
  {
305
146
    const int success = curegister(tensor->data.u8, tensor->data_size);
306
146
    if (success)
307
146
      tensor->type |= CCV_PINNED_MEM;
308
146
    return success ? 0 : 
-10
;
309
146
  }
310
1.25k
#endif
311
1.25k
  return 0;
312
1.40k
}
313
314
void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor)
315
49.1k
{
316
49.1k
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY && 
tensor->type & CCV_MAPPED_MEM45.8k
)
317
2
  {
318
    // The size might be different than the ones when we allocated (for example, the tensor might rewrite its size to be smaller).
319
    // This might cause issues in the future.
320
2
    const size_t size = ccv_nnc_tensor_data_size(tensor->info);
321
2
    munmap(tensor->data.u8, size);
322
2
  }
323
49.1k
#ifdef HAVE_CUDA
324
49.1k
  if (tensor->type & CCV_PINNED_MEM)
325
146
    cuunregister(tensor->data.u8);
326
49.1k
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY &&
327
49.1k
    
!(tensor->type & CCV_NO_DATA_ALLOC)3.24k
) // If this is GPU memory and it is allocated, free.
328
2.76k
    cufree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8);
329
#elif defined(HAVE_MPS)
330
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY &&
331
    !(tensor->type & CCV_NO_DATA_ALLOC)) // If this is GPU memory and it is allocated, free.
332
    mpobjfree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8);
333
#endif
334
49.1k
  ccfree(tensor);
335
49.1k
}
336
337
static inline void _ccv_nnc_tensor_view_set(ccv_nnc_tensor_view_t* const tv, const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
338
165
{
339
165
  memcpy(tv->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
340
165
  memcpy(tv->info.dim, dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
341
165
  uint8_t* const p = tensor->data.u8;
342
165
  const off_t off = tv->off = ccv_nnc_tensor_view_offset(tv->info.datatype, stride, ofs);
343
165
  tv->contiguous = ccv_nnc_tensor_view_is_contiguous(dim, stride);
344
165
  assert(off + CCV_GET_DATA_TYPE_SIZE(tv->info.datatype) * ccv_nnc_dimension_upper_bound(tv->info.dim, tv->stride) <= CCV_GET_DATA_TYPE_SIZE(tensor->info.datatype) * ccv_nnc_tensor_count(tensor->info));
345
165
  ccv_nnc_tensor_data(tv->info, p, off + tensor->dataof, &tv->data, &tv->dataof);
346
165
}
347
348
ccv_nnc_tensor_view_t* ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
349
84
{
350
84
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)ccmalloc(sizeof(ccv_nnc_tensor_view_t));
351
84
  tv->type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW;
352
84
  tv->dataof = 0;
353
84
  tv->alias_ref = (uintptr_t)tensor;
354
84
  tv->refcount = 1;
355
84
  tv->sig = 0;
356
84
  tv->data_size = 0;
357
84
  assert(params.type == tensor->info.type);
358
84
  assert(params.datatype == tensor->info.datatype);
359
84
  tv->info = params;
360
84
  _ccv_nnc_tensor_view_set(tv, tensor, params.dim, ofs, stride);
361
84
  return tv;
362
84
}
363
364
ccv_nnc_tensor_view_t ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
365
81
{
366
81
  assert(!CCV_IS_TENSOR_VIEW(tensor));
367
81
  assert(params.type == tensor->info.type);
368
81
  assert(params.datatype == tensor->info.datatype);
369
81
  ccv_nnc_tensor_view_t tv = {
370
81
    .dataof = 0,
371
81
    .alias_ref = (uintptr_t)tensor,
372
81
    .type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW, // clean up the channel bits, and then add CCV_TENSOR_VIEW identifier
373
81
    .refcount = 1,
374
81
    .sig = 0,
375
81
    .info = params,
376
81
    .data_size = 0,
377
81
  };
378
81
  _ccv_nnc_tensor_view_set(&tv, tensor, params.dim, ofs, stride);
379
81
  return tv;
380
81
}
381
382
void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view)
383
84
{
384
84
  ccfree(tensor_view);
385
84
}
386
387
void _ccv_nnc_tensor_set_zero(unsigned char* u8, const int nd, const int* const dim, const int* const stride, const size_t data_size)
388
106
{
389
106
  if (nd == 1)
390
0
  {
391
0
    if (stride[0] == 1)
392
0
    {
393
0
      memset(u8, 0, data_size * dim[0]);
394
0
      return;
395
0
    }
396
0
    int i;
397
0
    for (i = 0; i < dim[0]; i++)
398
0
      memset(u8 + i * stride[0] * data_size, 0, data_size);
399
106
  } else if (nd == 2) {
400
1
    if (stride[1] == 1 && stride[0] == dim[1])
401
0
    {
402
0
      memset(u8, 0, data_size * dim[1] * dim[0]);
403
0
      return;
404
0
    }
405
1
    int x, y;
406
4
    for (y = 0; y < dim[0]; 
y++3
)
407
3
    {
408
3
      unsigned char* const u8y = u8 + y * stride[0] * data_size;
409
9
      for (x = 0; x < dim[1]; 
x++6
)
410
6
        memset(u8y + x * stride[1] * data_size, 0, data_size);
411
3
    }
412
105
  } else if (nd == 3) {
413
0
    if (stride[2] == 1 && stride[1] == dim[2] && stride[0] == dim[1] * dim[2])
414
0
    {
415
0
      memset(u8, 0, data_size * dim[2] * dim[1] * dim[0]);
416
0
      return;
417
0
    }
418
0
    int x, y, z;
419
0
    for (z = 0; z < dim[0]; z++)
420
0
    {
421
0
      unsigned char* const u8z = u8 + z * stride[0] * data_size;
422
0
      for (y = 0; y < dim[1]; y++)
423
0
      {
424
0
        unsigned char* const u8y = u8z + y * stride[1] * data_size;
425
0
        for (x = 0; x < dim[2]; x++)
426
0
          memset(u8y + x * stride[2] * data_size, 0, data_size);
427
0
      }
428
0
    }
429
105
  } else if (nd == 4) {
430
96
    if (stride[3] == 1 && stride[2] == dim[3] && 
stride[1] == dim[2] * dim[3]0
&&
stride[0] == dim[1] * dim[2] * dim[3]0
)
431
0
    {
432
0
      memset(u8, 0, data_size * dim[3] * dim[2] * dim[1] * dim[0]);
433
0
      return;
434
0
    }
435
96
    int x, y, z, s;
436
1.53k
    for (s = 0; s < dim[0]; 
s++1.44k
)
437
1.44k
    {
438
1.44k
      unsigned char* const u8s = u8 + s * stride[0] * data_size;
439
4.32k
      for (z = 0; z < dim[1]; 
z++2.88k
)
440
2.88k
      {
441
2.88k
        unsigned char* const u8z = u8s + z * stride[1] * data_size;
442
11.5k
        for (y = 0; y < dim[2]; 
y++8.64k
)
443
8.64k
        {
444
8.64k
          unsigned char* const u8y = u8z + y * stride[2] * data_size;
445
43.2k
          for (x = 0; x < dim[3]; 
x++34.5k
)
446
34.5k
            memset(u8y + x * stride[3] * data_size, 0, data_size);
447
8.64k
        }
448
2.88k
      }
449
1.44k
    }
450
96
  } else {
451
9
    int i;
452
113
    for (i = 0; i < dim[0]; 
i++104
)
453
104
      _ccv_nnc_tensor_set_zero(u8 + i * stride[0] * data_size, nd - 1, dim + 1, stride + 1, data_size);
454
9
  }
455
106
}
456
457
void ccv_nnc_tensor_zero(void* const tensor)
458
22.7k
{
459
22.7k
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)tensor;
460
22.7k
  const size_t data_size = CCV_GET_DATA_TYPE_SIZE(tv->info.datatype);
461
22.7k
  if (CCV_IS_TENSOR_CONTIGUOUS(tv))
462
22.7k
  {
463
22.7k
    memset(tv->data.u8, 0, data_size * ccv_nnc_tensor_count(tv->info));
464
22.7k
    return;
465
22.7k
  }
466
2
  const int nd = ccv_nnc_tensor_nd(tv->info.dim);
467
2
  assert(nd >= 1);
468
2
  const int* const tvstride = tv->stride;
469
  // Go through this recursively.
470
2
  _ccv_nnc_tensor_set_zero(tv->data.u8, nd, tv->info.dim, tvstride, data_size);
471
2
}
472
473
int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b)
474
691
{
475
691
  assert(!CCV_IS_TENSOR_VIEW(a));
476
691
  assert(!CCV_IS_TENSOR_VIEW(b));
477
  // If a is a dense matrix, just use ccv_matrix_eq
478
691
  if (CCV_TENSOR_IS_DENSE_MATRIX(a->type))
479
110
    return ccv_matrix_eq((ccv_matrix_t*)a, (ccv_matrix_t*)b);
480
  // Otherwise, do our own thing.
481
581
  if (CCV_GET_DATA_TYPE(a->type) != CCV_GET_DATA_TYPE(b->type))
482
0
    return -1;
483
581
  int i, c = 1;
484
1.67k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.09k
)
485
1.67k
  {
486
1.67k
    if (!a->info.dim[i] && 
!b->info.dim[i]581
)
487
581
      break;
488
1.09k
    if (a->info.dim[i] != b->info.dim[i])
489
0
      return -1;
490
1.09k
    c *= a->info.dim[i];
491
1.09k
  }
492
581
  if (CCV_GET_DATA_TYPE(a->type) == CCV_32S)
493
12
    return memcmp(a->data.i32, b->data.i32, sizeof(int) * c) == 0 ? 0 : 
-10
;
494
  // Only support 32F at this point.
495
569
  assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F || CCV_GET_DATA_TYPE(a->type) == CCV_64F);
496
  // Read: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
497
  // http://floating-point-gui.de/errors/comparison/
498
569
  if (CCV_GET_DATA_TYPE(a->type) == CCV_32F)
499
567
  {
500
567
    static const float epsi = FLT_EPSILON;
501
567
    static const int32_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
502
11.1M
    for (i = 0; i < c; 
i++11.1M
)
503
11.1M
    {
504
      // Although this is float point, I use integer as a way to compare.
505
11.1M
      int32_t i32a = a->data.i32[i];
506
11.1M
      if (i32a < 0)
507
25.3k
        i32a = 0x80000000 - i32a;
508
11.1M
      int32_t i32b = b->data.i32[i];
509
11.1M
      if (i32b < 0)
510
25.4k
        i32b = 0x80000000 - i32b;
511
11.1M
      if (abs(i32a - i32b) > ulps && 
fabsf(a->data.f32[i] - b->data.f32[i]) > epsi38
)
512
0
        return -1;
513
11.1M
    }
514
567
  } else 
if (2
CCV_GET_DATA_TYPE2
(a->type) == CCV_64F2
) {
515
2
    typedef union {
516
2
      double f64;
517
2
      int64_t i64;
518
2
    } Float64;
519
2
    static const double epsi = DBL_EPSILON;
520
2
    static const int64_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
521
15.8k
    for (i = 0; i < c; 
i++15.8k
)
522
15.8k
    {
523
      // Although this is float point, I use integer as a way to compare.
524
15.8k
      Float64 f64a, f64b;
525
15.8k
      f64a.f64 = a->data.f64[i];
526
15.8k
      f64b.f64 = b->data.f64[i];
527
15.8k
      if (f64a.i64 < 0)
528
0
        f64a.i64 = 0x8000000000000000 - f64a.i64;
529
15.8k
      if (f64b.i64 < 0)
530
0
        f64b.i64 = 0x8000000000000000 - f64b.i64;
531
15.8k
      if (llabs(f64a.i64 - f64b.i64) > ulps && 
fabs(a->data.f64[i] - b->data.f64[i]) > epsi0
)
532
0
        return -1;
533
15.8k
    }
534
2
  }
535
569
  return 0;
536
569
}
537
538
static void _strcat(char** str, int* written, size_t* len, char* from, int from_size)
539
1.34k
{
540
1.34k
  if (*len - *written < from_size)
541
0
  {
542
0
    *len += from_size * 2;
543
0
    *str = (char*)ccrealloc(*str, *len);
544
0
  }
545
1.34k
  memcpy(*str + *written, from, from_size);
546
1.34k
  *written += from_size;
547
1.34k
}
548
549
648
#define _STRPRINTF(str, written, len, format, ...) \
550
648
do { \
551
648
  const int newly_written = snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \
552
648
  if ((len) - (written) < newly_written) \
553
648
  { \
554
0
    (len) += newly_written * 2; \
555
0
    (str) = (char*)ccrealloc((str), (len)); \
556
0
    (written) += snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \
557
0
  } else \
558
648
    (written) += newly_written; \
559
648
} while (0)
560
561
static void _strv(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int i)
562
648
{
563
648
  if (a->info.datatype == CCV_32F)
564
0
    _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f32[i]);
565
648
  else if (a->info.datatype == CCV_64F)
566
0
    _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f64[i]);
567
648
  else if (a->info.datatype == CCV_16F) {
568
0
    float v;
569
0
    ccv_half_precision_to_float((uint16_t*)(a->data.f16 + i), &v, 1);
570
0
    _STRPRINTF(*str, *written, *len, "%10.5g", v);
571
648
  } else if (a->info.datatype == CCV_32S)
572
648
    _STRPRINTF(*str, *written, *len, "%10d", a->data.i32[i]);
573
0
  else if (a->info.datatype == CCV_64S)
574
0
    _STRPRINTF(*str, *written, *len, "%12lld", (long long int)a->data.i64[i]);
575
0
  else if (a->info.datatype == CCV_8U)
576
0
    _STRPRINTF(*str, *written, *len, "%3d", (int)a->data.u8[i]);
577
648
}
578
579
static void _strt(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int nd, int spacer, const int* const dim, const int* const stride, int idx)
580
28
{
581
28
  assert(nd != 1);
582
28
  if (nd == 2)
583
17
  {
584
    // Print columns and the rows.
585
17
    int i, j, k;
586
17
    if (dim[0] <= 8)
587
1
    {
588
5
      for (i = 0; i < dim[0]; 
i++4
)
589
4
      {
590
4
        if (i != 0)
591
3
        {
592
3
          _strcat(str, written, len, "  ", 2);
593
3
          for (k = 0; k < spacer; 
k++0
)
594
0
            _strcat(str, written, len, " ", 1);
595
3
        }
596
4
        _strcat(str, written, len, "[", 1);
597
4
        if (dim[1] <= 8)
598
4
        {
599
20
          for (j = 0; j < dim[1]; 
j++16
)
600
16
          {
601
16
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
602
16
            if (j < dim[1] - 1)
603
12
              _strcat(str, written, len, ", ", 2);
604
16
          }
605
4
          if (i < dim[0] - 1)
606
3
            _strcat(str, written, len, "],\n", 3);
607
4
        } else {
608
0
          for (j = 0; j < 3; j++)
609
0
          {
610
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
611
0
            _strcat(str, written, len, ", ", 2);
612
0
          }
613
0
          _strcat(str, written, len, " ..., ", 6);
614
0
          for (j = dim[1] - 3; j < dim[1]; j++)
615
0
          {
616
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
617
0
            if (j < dim[1] - 1)
618
0
              _strcat(str, written, len, ", ", 2);
619
0
          }
620
0
          if (i < dim[0] - 1)
621
0
            _strcat(str, written, len, "],\n", 3);
622
0
        }
623
4
      }
624
1
      _strcat(str, written, len, "]", 1);
625
16
    } else {
626
64
      for (i = 0; i < 3; 
i++48
)
627
48
      {
628
48
        if (i != 0)
629
32
        {
630
32
          _strcat(str, written, len, "  ", 2);
631
128
          for (k = 0; k < spacer; 
k++96
)
632
96
            _strcat(str, written, len, " ", 1);
633
32
        }
634
48
        _strcat(str, written, len, "[", 1);
635
48
        if (dim[1] <= 8)
636
0
        {
637
0
          for (j = 0; j < dim[1]; j++)
638
0
          {
639
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
640
0
            if (j < dim[1] - 1)
641
0
              _strcat(str, written, len, ", ", 2);
642
0
          }
643
0
          _strcat(str, written, len, "],\n", 3);
644
48
        } else {
645
192
          for (j = 0; j < 3; 
j++144
)
646
144
          {
647
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
648
144
            _strcat(str, written, len, ", ", 2);
649
144
          }
650
48
          _strcat(str, written, len, " ..., ", 6);
651
192
          for (j = dim[1] - 3; j < dim[1]; 
j++144
)
652
144
          {
653
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
654
144
            if (j < dim[1] - 1)
655
96
              _strcat(str, written, len, ", ", 2);
656
144
          }
657
48
          _strcat(str, written, len, "],\n", 3);
658
48
        }
659
48
      }
660
16
      _strcat(str, written, len, "  ", 2);
661
64
      for (k = 0; k < spacer; 
k++48
)
662
48
        _strcat(str, written, len, " ", 1);
663
16
      _strcat(str, written, len, "...,\n", 5);
664
64
      for (i = dim[0] - 3; i < dim[0]; 
i++48
)
665
48
      {
666
48
        _strcat(str, written, len, "  ", 2);
667
192
        for (k = 0; k < spacer; 
k++144
)
668
144
          _strcat(str, written, len, " ", 1);
669
48
        _strcat(str, written, len, "[", 1);
670
48
        if (dim[1] < 8)
671
0
        {
672
0
          for (j = 0; j < dim[1]; j++)
673
0
          {
674
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
675
0
            if (j < dim[1] - 1)
676
0
              _strcat(str, written, len, ", ", 2);
677
0
          }
678
0
          if (i < dim[0] - 1)
679
0
            _strcat(str, written, len, "],\n", 3);
680
48
        } else {
681
192
          for (j = 0; j < 3; 
j++144
)
682
144
          {
683
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
684
144
            _strcat(str, written, len, ", ", 2);
685
144
          }
686
48
          _strcat(str, written, len, " ..., ", 6);
687
192
          for (j = dim[1] - 3; j < dim[1]; 
j++144
)
688
144
          {
689
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
690
144
            if (j < dim[1] - 1)
691
96
              _strcat(str, written, len, ", ", 2);
692
144
          }
693
48
          if (i < dim[0] - 1)
694
32
            _strcat(str, written, len, "],\n", 3);
695
48
        }
696
48
      }
697
16
      _strcat(str, written, len, "]", 1);
698
16
    }
699
17
    return;
700
17
  }
701
11
  int i, j;
702
11
  if (dim[0] > 4)
703
2
  {
704
6
    for (i = 0; i < 2; 
i++4
)
705
4
    {
706
4
      _strcat(str, written, len, "[", 1);
707
4
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
708
4
      _strcat(str, written, len, "],\n  ", 5);
709
8
      for (j = 0; j < spacer; 
j++4
)
710
4
        _strcat(str, written, len, " ", 1);
711
4
    }
712
2
    _strcat(str, written, len, "...,\n", 5);
713
2
    _strcat(str, written, len, "  ", 2);
714
4
    for (j = 0; j < spacer; 
j++2
)
715
2
      _strcat(str, written, len, " ", 1);
716
6
    for (i = dim[0] - 2; i < dim[0]; 
i++4
)
717
4
    {
718
4
      _strcat(str, written, len, "[", 1);
719
4
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
720
4
      if (i < dim[0] - 1)
721
2
      {
722
2
        _strcat(str, written, len, "],\n  ", 5);
723
4
        for (j = 0; j < spacer; 
j++2
)
724
2
          _strcat(str, written, len, " ", 1);
725
2
      }
726
4
    }
727
2
    _strcat(str, written, len, "]", 1);
728
9
  } else {
729
27
    for (i = 0; i < dim[0]; 
i++18
)
730
18
    {
731
18
      _strcat(str, written, len, "[", 1);
732
18
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
733
18
      if (i < dim[0] - 1)
734
9
      {
735
9
        _strcat(str, written, len, "],\n", 3);
736
9
        _strcat(str, written, len, "  ", 2);
737
25
        for (j = 0; j < spacer; 
j++16
)
738
16
          _strcat(str, written, len, " ", 1);
739
9
      }
740
18
    }
741
9
    _strcat(str, written, len, "]", 1);
742
9
  }
743
11
}
744
745
char* ccv_nnc_tensor_format_new(const ccv_nnc_tensor_t* const a)
746
4
{
747
4
  const int nd = ccv_nnc_tensor_nd(a->info.dim);
748
4
  int i;
749
4
  int rows = 8; // 8 rows for the first one, and then just first and last.
750
7
  for (i = 2; i < nd; 
i++3
)
751
3
    rows *= 5; // Maximum 3 rows beyond the first two.
752
4
  int columns = nd * 2 + 16 * 8;
753
4
  size_t len = sizeof(char) * columns * rows;
754
  // Allocate return string buffer.
755
4
  char* str = (char*)ccmalloc(len);
756
4
  int written = 0;
757
4
  int stride[CCV_NNC_MAX_DIM_ALLOC];
758
4
  if (CCV_IS_TENSOR_VIEW(a))
759
0
    memcpy(stride, ((ccv_nnc_tensor_view_t*)a)->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
760
4
  else
761
4
    ccv_nnc_tensor_get_stride(a->info.dim, stride);
762
4
  _strcat(&str, &written, &len, "[\n  ", 4);
763
4
  if (nd == 1)
764
2
  {
765
    // Special casing for vector.
766
2
    if (a->info.dim[0] <= 64)
767
13
      
for (i = 0; 1
i < a->info.dim[0];
i++12
)
768
12
      {
769
12
        _strv(&str, &written, &len, a, i * stride[0]);
770
12
        if (i < a->info.dim[0] - 1)
771
11
        {
772
11
          if ((i + 1) % 8 == 0)
773
1
            _strcat(&str, &written, &len, ",\n  ", 4);
774
10
          else
775
10
            _strcat(&str, &written, &len, ", ", 2);
776
11
        }
777
12
      }
778
1
    else {
779
      // First 3 rows.
780
25
      for (i = 0; i < 24; 
i++24
)
781
24
      {
782
24
        _strv(&str, &written, &len, a, i * stride[0]);
783
24
        if ((i + 1) % 8 == 0)
784
3
          _strcat(&str, &written, &len, ",\n  ", 4);
785
21
        else
786
21
          _strcat(&str, &written, &len, ", ", 2);
787
24
      }
788
1
      _strcat(&str, &written, &len, "...,\n  ", 7);
789
      // Last 3 rows (aligned to 8 items per row).
790
1
      int start = ((a->info.dim[0] + 7) / 8 - 3) * 8;
791
21
      for (i = start; i < a->info.dim[0]; 
i++20
)
792
20
      {
793
20
        _strv(&str, &written, &len, a, i * stride[0]);
794
20
        if (i < a->info.dim[0] - 1)
795
19
        {
796
19
          if ((i + 1) % 8 == 0)
797
2
            _strcat(&str, &written, &len, ",\n  ", 4);
798
17
          else
799
17
            _strcat(&str, &written, &len, ", ", 2);
800
19
        }
801
20
      }
802
1
    }
803
2
  } else {
804
2
    _strt(&str, &written, &len, a, nd, 0, a->info.dim, stride, 0);
805
2
  }
806
4
  _strcat(&str, &written, &len, "\n]", 3); // Including the terminal \0.
807
4
  str = (char*)ccrealloc(str, written); // Don't need the extra spaces.
808
4
  return str;
809
4
}