Coverage Report

Created: 2022-08-03 23:52

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc_tensor.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#ifdef HAVE_CUDA
5
#include "gpu/ccv_nnc_compat.h"
6
#endif
7
8
// MARK - Level-1 API
9
10
const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC] = {0};
11
12
ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
13
49.7k
{
14
49.7k
  ccv_nnc_tensor_t* tensor;
15
  // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t).
16
49.7k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC42.8k
&&
params.dim[2] > 039.0k
&&
params.dim[2] <= 4.60k
CCV_MAX_CHANNEL4.60k
&&
params.dim[0] > 04.60k
&&
params.dim[1] > 04.60k
&&
params.dim[3] == 04.60k
);
17
49.7k
  if (ptr)
18
1.95k
  {
19
1.95k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
20
1.95k
    tensor->alias_ref = 0;
21
1.95k
    tensor->sig = 0;
22
1.95k
    tensor->refcount = 1;
23
1.95k
    tensor->info = params;
24
1.95k
    if (tfb)
25
54
    {
26
54
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | params.datatype | params.dim[2];
27
      // This corresponding to mat->step
28
54
      tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2]));
29
54
    } else // This won't be recognized by ccv_dense_matrix_t
30
1.89k
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | params.datatype;
31
1.95k
    tensor->data.u8 = (uint8_t*)ptr;
32
1.95k
    return tensor;
33
1.95k
  }
34
47.8k
  if (flags & CCV_TENSOR_CPU_MEMORY)
35
0
  {
36
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
37
47.8k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
38
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
39
0
  }
40
47.8k
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 15) & -16;
41
47.8k
  const size_t size = ccv_nnc_tensor_data_size(params);
42
47.8k
#ifdef HAVE_CUDA
43
47.8k
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
44
5.80k
  {
45
5.80k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
46
5.80k
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
47
5.80k
    tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
48
42.0k
  } else {
49
42.0k
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
50
41.4k
    ccmemalign((void **)&tensor, 16, tensor_hdr_size + size);
51
41.4k
    tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
52
41.4k
  }
53
#else
54
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
55
  ccmemalign((void **)&tensor, 16, tensor_hdr_size + size);
56
  tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
57
#endif
58
47.2k
  tensor->alias_ref = 0;
59
47.2k
  tensor->data_size = size;
60
47.2k
  tensor->sig = 0;
61
47.2k
  tensor->refcount = 1;
62
47.2k
  tensor->info = params;
63
47.2k
  if (tfb)
64
4.27k
  {
65
4.27k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype | params.dim[2];
66
    // This corresponding to mat->step
67
4.27k
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2]));
68
4.27k
  } else
69
42.9k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype;
70
47.2k
  return tensor;
71
47.8k
}
72
73
ccv_nnc_tensor_t* ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params)
74
1.27k
{
75
1.27k
  assert(!CCV_IS_TENSOR_VIEW(tensor));
76
1.27k
  assert(tensor->type & CCV_UNMANAGED);
77
1.27k
  assert(tensor->data_size > 0);
78
1.27k
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GET_MEMORY(tensor->info.type));
79
1.27k
  assert(CCV_TENSOR_GET_DEVICE(params.type) == CCV_TENSOR_GET_DEVICE(tensor->info.type));
80
1.27k
  const size_t size = ccv_nnc_tensor_data_size(params);
81
1.27k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC14
&&
params.dim[2] > 010
&&
params.dim[2] <= 10
CCV_MAX_CHANNEL10
&&
params.dim[0] > 010
&&
params.dim[1] > 010
&&
params.dim[3] == 010
);
82
1.27k
  tensor->info = params;
83
1.27k
#ifdef HAVE_CUDA
84
1.27k
  const int pinned_mem = (tensor->type & CCV_PINNED_MEM);
85
1.27k
#endif
86
1.27k
  if (tfb)
87
10
  {
88
10
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype | params.dim[2];
89
    // This corresponding to mat->step
90
10
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2]));
91
10
  } else
92
1.26k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype;
93
1.27k
  if (size <= tensor->data_size) // Nothing.
94
1.27k
  {
95
1.27k
#ifdef HAVE_CUDA
96
1.27k
    if (pinned_mem)
97
3
      tensor->type |= CCV_PINNED_MEM;
98
1.27k
#endif
99
1.27k
    return tensor;
100
1.27k
  }
101
3
  ccv_nnc_tensor_t* new_tensor = tensor;
102
3
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 15) & -16;
103
3
#ifdef HAVE_CUDA
104
3
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
105
1
  {
106
1
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
107
1
    const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type);
108
1
    assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type));
109
1
    cufree(device_id, tensor->data.u8);
110
1
    new_tensor->data.u8 = (uint8_t*)cumalloc(device_id, size);
111
2
  } else {
112
2
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
113
2
    assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
114
    // pin memory again.
115
2
    if (pinned_mem)
116
1
      cuunregister(new_tensor->data.u8);
117
2
    new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
118
2
    new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
119
2
  }
120
#else
121
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
122
  new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
123
  new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
124
#endif
125
3
  new_tensor->data_size = size;
126
3
#ifdef HAVE_CUDA
127
3
  if (pinned_mem)
128
1
    ccv_nnc_tensor_pin_memory(new_tensor);
129
3
#endif
130
3
  return new_tensor;
131
3
}
132
133
ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
134
79.5k
{
135
  // this specific form can be toll-free bridging to ccv_dense_matrix_t
136
79.5k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC70.4k
&&
params.dim[2] > 070.2k
&&
params.dim[2] <= 435
CCV_MAX_CHANNEL435
&&
params.dim[0] > 0435
&&
params.dim[1] > 0435
&&
params.dim[3] == 0435
);
137
79.5k
  ccv_nnc_tensor_t tensor;
138
79.5k
  tensor.alias_ref = 0;
139
79.5k
  tensor.sig = 0;
140
79.5k
  tensor.refcount = 1;
141
79.5k
  tensor.info = params;
142
79.5k
  if (flags & CCV_TENSOR_CPU_MEMORY)
143
0
  {
144
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
145
79.5k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
146
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
147
0
  }
148
79.5k
  if (tfb)
149
134
  {
150
134
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype | params.dim[2];
151
    // This corresponding to mat->step
152
134
    tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2]));
153
134
  } else // This won't be recognized by ccv_dense_matrix_t
154
79.4k
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype;
155
79.5k
  tensor.data.u8 = (uint8_t*)ptr;
156
79.5k
  tensor.data_size = 0;
157
79.5k
  return tensor;
158
79.5k
}
159
160
int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor)
161
1.40k
{
162
1.40k
#ifdef HAVE_CUDA
163
1.40k
  assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
164
1.40k
  if (!(tensor->type & CCV_PINNED_MEM) && 
tensor->data_size149
)
165
149
  {
166
149
    const int success = curegister(tensor->data.u8, tensor->data_size);
167
149
    if (success)
168
149
      tensor->type |= CCV_PINNED_MEM;
169
149
    return success ? 0 : 
-10
;
170
149
  }
171
1.25k
#endif
172
1.25k
  return 0;
173
1.40k
}
174
175
void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor)
176
51.5k
{
177
51.5k
#ifdef HAVE_CUDA
178
51.5k
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY &&
179
51.5k
    
!(tensor->type & CCV_NO_DATA_ALLOC)6.55k
) // If this is GPU memory and it is allocated, free.
180
5.79k
    cufree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8);
181
51.5k
  if (tensor->type & CCV_PINNED_MEM)
182
148
    cuunregister(tensor->data.u8);
183
51.5k
#endif
184
51.5k
  ccfree(tensor);
185
51.5k
}
186
187
static inline void _ccv_nnc_tensor_view_set(ccv_nnc_tensor_view_t* const tv, const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC])
188
137
{
189
137
  memcpy(tv->inc, inc, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
190
137
  memcpy(tv->info.dim, dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
191
137
  uint8_t* const p = tensor->data.u8;
192
137
  const off_t off = tv->off = ccv_nnc_tensor_view_offset(tv->info.datatype, inc, ofs);
193
137
  tv->contiguous = ccv_nnc_tensor_view_is_contiguous(dim, inc, ofs);
194
137
  tv->data.u8 = p + off;
195
137
}
196
197
ccv_nnc_tensor_view_t* ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC])
198
44
{
199
44
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)ccmalloc(sizeof(ccv_nnc_tensor_view_t));
200
44
  tv->type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW;
201
44
  tv->alias_ref = (uintptr_t)tensor;
202
44
  tv->refcount = 1;
203
44
  tv->sig = 0;
204
44
  tv->data_size = 0;
205
44
  assert(params.type == tensor->info.type);
206
44
  assert(params.datatype == tensor->info.datatype);
207
44
  tv->info = params;
208
44
  _ccv_nnc_tensor_view_set(tv, tensor, params.dim, ofs, inc);
209
44
  return tv;
210
44
}
211
212
ccv_nnc_tensor_view_t ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC])
213
93
{
214
93
  assert(!CCV_IS_TENSOR_VIEW(tensor));
215
93
  assert(params.type == tensor->info.type);
216
93
  assert(params.datatype == tensor->info.datatype);
217
93
  ccv_nnc_tensor_view_t tv = {
218
93
    .alias_ref = (uintptr_t)tensor,
219
93
    .type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW, // clean up the channel bits, and then add CCV_TENSOR_VIEW identifier
220
93
    .refcount = 1,
221
93
    .sig = 0,
222
93
    .info = params,
223
93
    .data_size = 0,
224
93
  };
225
93
  _ccv_nnc_tensor_view_set(&tv, tensor, params.dim, ofs, inc);
226
93
  return tv;
227
93
}
228
229
void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view)
230
44
{
231
44
  ccfree(tensor_view);
232
44
}
233
234
void ccv_nnc_tensor_zero(void* const tensor)
235
12.5k
{
236
12.5k
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)tensor;
237
12.5k
  const size_t data_size = CCV_GET_DATA_TYPE_SIZE(tv->info.datatype);
238
12.5k
  if (CCV_IS_TENSOR_CONTIGUOUS(tv))
239
12.5k
  {
240
12.5k
    memset(tv->data.u8, 0, data_size * ccv_nnc_tensor_count(tv->info));
241
12.5k
    return;
242
12.5k
  }
243
2
  const int nd = ccv_nnc_tensor_nd(tv->info.dim);
244
2
  assert(nd >= 1);
245
2
  const int* const tvinc = tv->inc;
246
  // reset it to 0.
247
2
  int c, x, y;
248
2
  int count = 1;
249
2
  int mod[CCV_NNC_MAX_DIM_ALLOC - 3];
250
2
  size_t mod_inc[CCV_NNC_MAX_DIM_ALLOC - 2];
251
2
  const size_t top_mod_inc = nd > 2 ? 
data_size * tvinc[nd - 3] * tvinc[nd - 2] * tvinc[nd - 1]1
:
data_size1
;
252
2
  if (nd > 2)
253
1
    mod_inc[nd - 3] = top_mod_inc;
254
5
  for (c = nd - 4; c >= 0; 
c--3
)
255
3
  {
256
    // Compute the mod.
257
3
    mod[c] = c == nd - 4 ? 
tv->info.dim[c]1
:
mod[c + 1] * tv->info.dim[c]2
;
258
3
    mod_inc[c] = mod_inc[c + 1] * tvinc[c];
259
3
    count *= tv->info.dim[c];
260
3
  }
261
5
  for (c = 0; c < nd - 3; 
c++3
)
262
3
    mod_inc[c] = mod_inc[c + 1] * (tvinc[c] - tv->info.dim[c]);
263
2
  uint8_t* tvd = tv->data.u8;
264
2
  const size_t tvinc_1 = data_size * tvinc[nd - 1];
265
2
  const size_t tvinc_21 = tvinc_1 * (nd >= 2 ? tvinc[nd - 2] : 
10
);
266
2
  const size_t tvdim_1 = data_size * tv->info.dim[nd - 1];
267
2
  const int max_y = ccv_max(1, nd >= 3 ? tv->info.dim[nd - 3] : 1);
268
2
  const int max_x = ccv_max(1, nd >= 2 ? tv->info.dim[nd - 2] : 1);
269
1.44k
  for (c = 0; c < count; 
c++1.44k
)
270
1.44k
  {
271
4.32k
    for (y = 0; y < max_y; 
y++2.88k
)
272
2.88k
    {
273
2.88k
      uint8_t* tvp = tvd + y * tvinc_21;
274
11.5k
      for (x = 0; x < max_x; 
x++8.64k
)
275
8.64k
      {
276
8.64k
        memset(tvp, 0, tvdim_1);
277
8.64k
        tvp += tvinc_1;
278
8.64k
      }
279
2.88k
    }
280
1.44k
    tvd += top_mod_inc;
281
1.54k
    for (y = nd - 4; y >= 0; 
y--105
)
282
1.54k
      if ((c + 1) % mod[y] != 0)
283
1.43k
        break; // cannot be mod, break out.
284
105
      else
285
105
        tvd += mod_inc[y];
286
1.44k
  }
287
2
}
288
289
int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b)
290
523
{
291
523
  assert(!CCV_IS_TENSOR_VIEW(a));
292
523
  assert(!CCV_IS_TENSOR_VIEW(b));
293
  // If a is a dense matrix, just use ccv_matrix_eq
294
523
  if (CCV_TENSOR_IS_DENSE_MATRIX(a->type))
295
93
    return ccv_matrix_eq((ccv_matrix_t*)a, (ccv_matrix_t*)b);
296
  // Otherwise, do our own thing.
297
430
  if (CCV_GET_DATA_TYPE(a->type) != CCV_GET_DATA_TYPE(b->type))
298
0
    return -1;
299
430
  int i, c = 1;
300
1.18k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++756
)
301
1.18k
  {
302
1.18k
    if (!a->info.dim[i] && 
!b->info.dim[i]430
)
303
430
      break;
304
756
    if (a->info.dim[i] != b->info.dim[i])
305
0
      return -1;
306
756
    c *= a->info.dim[i];
307
756
  }
308
430
  if (CCV_GET_DATA_TYPE(a->type) == CCV_32S)
309
3
    return memcmp(a->data.ptr, b->data.ptr, sizeof(int) * c) == 0 ? 0 : 
-10
;
310
  // Only support 32F at this point.
311
427
  assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F || CCV_GET_DATA_TYPE(a->type) == CCV_64F);
312
  // Read: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
313
  // http://floating-point-gui.de/errors/comparison/
314
427
  if (CCV_GET_DATA_TYPE(a->type) == CCV_32F)
315
425
  {
316
425
    static const float epsi = FLT_EPSILON;
317
425
    static const int32_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
318
1.40M
    for (i = 0; i < c; 
i++1.40M
)
319
1.40M
    {
320
      // Although this is float point, I use integer as a way to compare.
321
1.40M
      int32_t i32a = a->data.i32[i];
322
1.40M
      if (i32a < 0)
323
18.7k
        i32a = 0x80000000 - i32a;
324
1.40M
      int32_t i32b = b->data.i32[i];
325
1.40M
      if (i32b < 0)
326
18.8k
        i32b = 0x80000000 - i32b;
327
1.40M
      if (abs(i32a - i32b) > ulps && 
fabsf(a->data.f32[i] - b->data.f32[i]) > epsi6
)
328
0
        return -1;
329
1.40M
    }
330
425
  } else 
if (2
CCV_GET_DATA_TYPE2
(a->type) == CCV_64F2
) {
331
2
    typedef union {
332
2
      double f64;
333
2
      int64_t i64;
334
2
    } Float64;
335
2
    static const double epsi = DBL_EPSILON;
336
2
    static const int64_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
337
15.8k
    for (i = 0; i < c; 
i++15.8k
)
338
15.8k
    {
339
      // Although this is float point, I use integer as a way to compare.
340
15.8k
      Float64 f64a, f64b;
341
15.8k
      f64a.f64 = a->data.f64[i];
342
15.8k
      f64b.f64 = b->data.f64[i];
343
15.8k
      if (f64a.i64 < 0)
344
0
        f64a.i64 = 0x8000000000000000 - f64a.i64;
345
15.8k
      if (f64b.i64 < 0)
346
0
        f64b.i64 = 0x8000000000000000 - f64b.i64;
347
15.8k
      if (llabs(f64a.i64 - f64b.i64) > ulps && 
fabs(a->data.f64[i] - b->data.f64[i]) > epsi0
)
348
0
        return -1;
349
15.8k
    }
350
2
  }
351
427
  return 0;
352
427
}