Coverage Report

Created: 2021-04-07 21:56

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc_tensor.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#ifdef HAVE_CUDA
5
#include "gpu/ccv_nnc_compat.h"
6
#endif
7
8
// MARK - Level-1 API
9
10
const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC] = {0};
11
12
ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
13
49.0k
{
14
49.0k
  ccv_nnc_tensor_t* tensor;
15
49.0k
  // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t).
16
49.0k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC42.4k
&&
params.dim[2] > 038.2k
&&
params.dim[2] <= 4.55k
CCV_MAX_CHANNEL4.55k
&&
params.dim[0] > 04.55k
&&
params.dim[1] > 04.55k
&&
params.dim[3] == 04.55k
);
17
49.0k
  if (ptr)
18
1.91k
  {
19
1.91k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
20
1.91k
    tensor->alias_ref = 0;
21
1.91k
    tensor->sig = 0;
22
1.91k
    tensor->refcount = 1;
23
1.91k
    tensor->info = params;
24
1.91k
    if (tfb)
25
54
    {
26
54
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | params.datatype | params.dim[2];
27
54
      // This corresponding to mat->step
28
54
      tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2]));
29
54
    } else // This won't be recognized by ccv_dense_matrix_t
30
1.86k
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | params.datatype;
31
1.91k
    tensor->data.u8 = (uint8_t*)ptr;
32
1.91k
    return tensor;
33
1.91k
  }
34
47.1k
  if (flags & CCV_TENSOR_CPU_MEMORY)
35
0
  {
36
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
37
47.1k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
38
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
39
0
  }
40
47.1k
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 15) & -16;
41
47.1k
  const size_t size = ccv_nnc_tensor_data_size(params);
42
47.1k
#ifdef HAVE_CUDA
43
47.1k
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
44
5.63k
  {
45
5.63k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
46
5.63k
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
47
5.63k
    tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
48
41.5k
  } else {
49
41.5k
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
50
41.8k
    ccmemalign((void **)&tensor, 16, tensor_hdr_size + size);
51
41.8k
    tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
52
41.8k
  }
53
#else
54
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
55
  ccmemalign((void **)&tensor, 16, tensor_hdr_size + size);
56
  tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
57
#endif
58
47.4k
  tensor->alias_ref = 0;
59
47.4k
  tensor->data_size = size;
60
47.4k
  tensor->sig = 0;
61
47.4k
  tensor->refcount = 1;
62
47.4k
  tensor->info = params;
63
47.4k
  if (tfb)
64
4.22k
  {
65
4.22k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype | params.dim[2];
66
4.22k
    // This corresponding to mat->step
67
4.22k
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2]));
68
4.22k
  } else
69
43.2k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype;
70
47.4k
  return tensor;
71
47.1k
}
72
73
ccv_nnc_tensor_t* ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params)
74
1.26k
{
75
1.26k
  assert(!CCV_IS_TENSOR_VIEW(tensor));
76
1.26k
  assert(tensor->type & CCV_UNMANAGED);
77
1.26k
  assert(tensor->data_size > 0);
78
1.26k
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GET_MEMORY(tensor->info.type));
79
1.26k
  assert(CCV_TENSOR_GET_DEVICE(params.type) == CCV_TENSOR_GET_DEVICE(tensor->info.type));
80
1.26k
  const size_t size = ccv_nnc_tensor_data_size(params);
81
1.26k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC10
&&
params.dim[2] > 010
&&
params.dim[2] <= 10
CCV_MAX_CHANNEL10
&&
params.dim[0] > 010
&&
params.dim[1] > 010
&&
params.dim[3] == 010
);
82
1.26k
  tensor->info = params;
83
1.26k
#ifdef HAVE_CUDA
84
1.26k
  const int pinned_mem = (tensor->type & CCV_PINNED_MEM);
85
1.26k
#endif
86
1.26k
  if (tfb)
87
10
  {
88
10
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype | params.dim[2];
89
10
    // This corresponding to mat->step
90
10
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2]));
91
10
  } else
92
1.25k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype;
93
1.26k
  if (size <= tensor->data_size) // Nothing.
94
1.26k
  {
95
1.26k
#ifdef HAVE_CUDA
96
1.26k
    if (pinned_mem)
97
0
      tensor->type |= CCV_PINNED_MEM;
98
1.26k
#endif
99
1.26k
    return tensor;
100
1.26k
  }
101
1
  ccv_nnc_tensor_t* new_tensor = tensor;
102
1
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 15) & -16;
103
1
#ifdef HAVE_CUDA
104
1
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
105
0
  {
106
0
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
107
0
    const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type);
108
0
    assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type));
109
0
    cufree(device_id, tensor->data.u8);
110
0
    new_tensor->data.u8 = (uint8_t*)cumalloc(device_id, size);
111
1
  } else {
112
1
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
113
1
    assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
114
1
    // pin memory again.
115
1
    if (pinned_mem)
116
0
      cuunregister(new_tensor->data.u8);
117
1
    new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
118
1
    new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
119
1
  }
120
#else
121
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
122
  new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
123
  new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
124
#endif
125
1
  new_tensor->data_size = size;
126
1
#ifdef HAVE_CUDA
127
1
  if (pinned_mem)
128
0
    ccv_nnc_tensor_pin_memory(new_tensor);
129
1
#endif
130
1
  return new_tensor;
131
1
}
132
133
ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
134
79.4k
{
135
79.4k
  // this specific form can be toll-free bridging to ccv_dense_matrix_t
136
79.4k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC70.4k
&&
params.dim[2] > 070.1k
&&
params.dim[2] <= 435
CCV_MAX_CHANNEL435
&&
params.dim[0] > 0435
&&
params.dim[1] > 0435
&&
params.dim[3] == 0435
);
137
79.4k
  ccv_nnc_tensor_t tensor;
138
79.4k
  tensor.alias_ref = 0;
139
79.4k
  tensor.sig = 0;
140
79.4k
  tensor.refcount = 1;
141
79.4k
  tensor.info = params;
142
79.4k
  if (flags & CCV_TENSOR_CPU_MEMORY)
143
0
  {
144
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
145
79.4k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
146
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
147
0
  }
148
79.4k
  if (tfb)
149
134
  {
150
134
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype | params.dim[2];
151
134
    // This corresponding to mat->step
152
134
    tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2]));
153
134
  } else // This won't be recognized by ccv_dense_matrix_t
154
79.3k
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype;
155
79.4k
  tensor.data.u8 = (uint8_t*)ptr;
156
79.4k
  tensor.data_size = 0;
157
79.4k
  return tensor;
158
79.4k
}
159
160
int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor)
161
1.39k
{
162
1.39k
#ifdef HAVE_CUDA
163
1.39k
  assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
164
1.39k
  if (!(tensor->type & CCV_PINNED_MEM) && 
tensor->data_size140
)
165
140
  {
166
140
    const int success = curegister(tensor->data.u8, tensor->data_size);
167
140
    if (success)
168
140
      tensor->type |= CCV_PINNED_MEM;
169
140
    return success ? 0 : 
-10
;
170
140
  }
171
1.25k
#endif
172
1.25k
  return 0;
173
1.25k
}
174
175
void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor)
176
50.4k
{
177
50.4k
#ifdef HAVE_CUDA
178
50.4k
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY &&
179
50.4k
    
!(tensor->type & CCV_NO_DATA_ALLOC)6.36k
) // If this is GPU memory and it is allocated, free.
180
5.63k
    cufree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8);
181
50.4k
  if (tensor->type & CCV_PINNED_MEM)
182
140
    cuunregister(tensor->data.u8);
183
50.4k
#endif
184
50.4k
  ccfree(tensor);
185
50.4k
}
186
187
static inline void _ccv_nnc_tensor_view_set(ccv_nnc_tensor_view_t* const tv, const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC])
188
138
{
189
138
  memcpy(tv->inc, inc, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
190
138
  memcpy(tv->info.dim, dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
191
138
  uint8_t* const p = tensor->data.u8;
192
138
  const off_t off = tv->off = ccv_nnc_tensor_view_offset(tv->info.datatype, tv->inc, ofs);
193
138
  tv->data.u8 = p + off;
194
138
}
195
196
ccv_nnc_tensor_view_t* ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC])
197
45
{
198
45
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)ccmalloc(sizeof(ccv_nnc_tensor_view_t));
199
45
  tv->type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW;
200
45
  tv->alias_ref = (uintptr_t)tensor;
201
45
  tv->refcount = 1;
202
45
  tv->sig = 0;
203
45
  tv->data_size = 0;
204
45
  assert(params.type == tensor->info.type);
205
45
  assert(params.datatype == tensor->info.datatype);
206
45
  tv->info = params;
207
45
  _ccv_nnc_tensor_view_set(tv, tensor, params.dim, ofs, inc);
208
45
  return tv;
209
45
}
210
211
ccv_nnc_tensor_view_t ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC])
212
93
{
213
93
  assert(!CCV_IS_TENSOR_VIEW(tensor));
214
93
  assert(params.type == tensor->info.type);
215
93
  assert(params.datatype == tensor->info.datatype);
216
93
  ccv_nnc_tensor_view_t tv = {
217
93
    .alias_ref = (uintptr_t)tensor,
218
93
    .type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW, // clean up the channel bits, and then add CCV_TENSOR_VIEW identifier
219
93
    .refcount = 1,
220
93
    .sig = 0,
221
93
    .info = params,
222
93
    .data_size = 0,
223
93
  };
224
93
  _ccv_nnc_tensor_view_set(&tv, tensor, params.dim, ofs, inc);
225
93
  return tv;
226
93
}
227
228
void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view)
229
45
{
230
45
  ccfree(tensor_view);
231
45
}
232
233
void ccv_nnc_tensor_zero(void* const tensor)
234
14.9k
{
235
14.9k
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)tensor;
236
14.9k
  const size_t data_size = CCV_GET_DATA_TYPE_SIZE(tv->info.datatype);
237
14.9k
  if (!CCV_IS_TENSOR_VIEW(tv))
238
14.9k
  {
239
14.9k
    memset(tv->data.u8, 0, data_size * ccv_nnc_tensor_count(tv->info));
240
14.9k
    return;
241
14.9k
  }
242
2
  const int nd = ccv_nnc_tensor_nd(tv->info.dim);
243
2
  assert(nd >= 1);
244
2
  const int* const tvinc = tv->inc;
245
2
  // reset it to 0.
246
2
  int c, x, y;
247
2
  int count = 1;
248
2
  int mod[CCV_NNC_MAX_DIM_ALLOC - 3];
249
2
  size_t mod_inc[CCV_NNC_MAX_DIM_ALLOC - 2];
250
2
  const size_t top_mod_inc = nd > 2 ? 
data_size * tvinc[nd - 3] * tvinc[nd - 2] * tvinc[nd - 1]1
:
data_size1
;
251
2
  if (nd > 2)
252
1
    mod_inc[nd - 3] = top_mod_inc;
253
5
  for (c = nd - 4; c >= 0; 
c--3
)
254
3
  {
255
3
    // Compute the mod.
256
3
    mod[c] = c == nd - 4 ? 
tv->info.dim[c]1
:
mod[c + 1] * tv->info.dim[c]2
;
257
3
    mod_inc[c] = mod_inc[c + 1] * tvinc[c];
258
3
    count *= tv->info.dim[c];
259
3
  }
260
5
  for (c = 0; c < nd - 3; 
c++3
)
261
3
    mod_inc[c] = mod_inc[c + 1] * (tvinc[c] - tv->info.dim[c]);
262
2
  uint8_t* tvd = tv->data.u8;
263
2
  const size_t tvinc_1 = data_size * tvinc[nd - 1];
264
2
  const size_t tvinc_21 = tvinc_1 * (nd >= 2 ? tvinc[nd - 2] : 
10
);
265
2
  const size_t tvdim_1 = data_size * tv->info.dim[nd - 1];
266
2
  const int max_y = ccv_max(1, nd >= 3 ? tv->info.dim[nd - 3] : 1);
267
2
  const int max_x = ccv_max(1, nd >= 2 ? tv->info.dim[nd - 2] : 1);
268
1.44k
  for (c = 0; c < count; 
c++1.44k
)
269
1.44k
  {
270
4.32k
    for (y = 0; y < max_y; 
y++2.88k
)
271
2.88k
    {
272
2.88k
      uint8_t* tvp = tvd + y * tvinc_21;
273
11.5k
      for (x = 0; x < max_x; 
x++8.64k
)
274
8.64k
      {
275
8.64k
        memset(tvp, 0, tvdim_1);
276
8.64k
        tvp += tvinc_1;
277
8.64k
      }
278
2.88k
    }
279
1.44k
    tvd += top_mod_inc;
280
1.54k
    for (y = nd - 4; y >= 0; 
y--105
)
281
1.54k
      if ((c + 1) % mod[y] != 0)
282
1.43k
        break; // cannot be mod, break out.
283
105
      else
284
105
        tvd += mod_inc[y];
285
1.44k
  }
286
2
}
287
288
int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b)
289
474
{
290
474
  assert(!CCV_IS_TENSOR_VIEW(a));
291
474
  assert(!CCV_IS_TENSOR_VIEW(b));
292
474
  // If a is a dense matrix, just use ccv_matrix_eq
293
474
  if (CCV_TENSOR_IS_DENSE_MATRIX(a->type))
294
474
    
return ccv_matrix_eq((ccv_matrix_t*)a, (ccv_matrix_t*)b)93
;
295
381
  // Otherwise, do our own thing.
296
381
  if (CCV_GET_DATA_TYPE(a->type) != CCV_GET_DATA_TYPE(b->type))
297
381
    
return -10
;
298
381
  // Only support 32F at this point.
299
381
  assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F);
300
381
  int i, c = 1;
301
1.05k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++676
)
302
1.05k
  {
303
1.05k
    if (!a->info.dim[i] && 
!b->info.dim[i]381
)
304
381
      break;
305
676
    if (a->info.dim[i] != b->info.dim[i])
306
0
      return -1;
307
676
    c *= a->info.dim[i];
308
676
  }
309
381
  // Read: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
310
381
  // http://floating-point-gui.de/errors/comparison/
311
381
  static const float epsi = FLT_EPSILON;
312
381
  static const int32_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
313
1.39M
  for (i = 0; i < c; 
i++1.39M
)
314
1.39M
  {
315
1.39M
    // Although this is float point, I use integer as a way to compare.
316
1.39M
    int32_t i32a = a->data.i32[i];
317
1.39M
    if (i32a < 0)
318
16.4k
      i32a = 0x80000000 - i32a;
319
1.39M
    int32_t i32b = b->data.i32[i];
320
1.39M
    if (i32b < 0)
321
16.5k
      i32b = 0x80000000 - i32b;
322
1.39M
    if (abs(i32a - i32b) > ulps && 
fabsf(a->data.f32[i] - b->data.f32[i]) > epsi6
)
323
0
      return -1;
324
1.39M
  }
325
381
  return 0;
326
381
}