Coverage Report

Created: 2017-11-12 13:27

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc_tensor.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#ifdef HAVE_CUDA
5
#include "gpu/ccv_nnc_compat.h"
6
#endif
7
8
const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC] = {0};
9
10
ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
11
199
{
12
199
  ccv_nnc_tensor_t* tensor;
13
199
  // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t).
14
199
  int tfb = (
CCV_TENSOR_GET_MEMORY199
(params.type) == CCV_TENSOR_CPU_MEMORY &&
params.format == CCV_TENSOR_FORMAT_NHWC194
&&
params.dim[2] > 0190
&&
params.dim[2] <= 119
CCV_MAX_CHANNEL119
&&
params.dim[0] > 0119
&&
params.dim[1] > 0119
&&
params.dim[3] == 0119
);
15
199
  if (ptr)
16
3
  {
17
3
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
18
3
    tensor->alias_ref = 0;
19
3
    tensor->sig = 0;
20
3
    tensor->refcount = 1;
21
3
    tensor->info = params;
22
3
    if (tfb)
23
0
    {
24
0
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | params.datatype | params.dim[2];
25
0
      // This corresponding to mat->step
26
0
      tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2]));
27
0
    } else // This won't be recognized by ccv_dense_matrix_t
28
3
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | params.datatype;
29
3
    tensor->data.u8 = (uint8_t*)ptr;
30
3
    return tensor;
31
3
  }
32
196
  
if (196
flags & CCV_TENSOR_CPU_MEMORY196
)
33
0
  {
34
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
35
196
  } else 
if (196
flags & CCV_TENSOR_GPU_MEMORY196
)
{0
36
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
37
0
  }
38
196
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 15) & -16;
39
196
  const size_t size = ccv_nnc_tensor_data_size(params);
40
196
#ifdef HAVE_CUDA
41
196
  if (
CCV_TENSOR_GET_MEMORY196
(params.type) == CCV_TENSOR_GPU_MEMORY196
)
42
5
  {
43
5
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
44
5
    tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
45
191
  } else {
46
191
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
47
191
    ccmemalign((void **)&tensor, 16, tensor_hdr_size + size);
48
191
    tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
49
191
  }
50
196
#else
51
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
52
  ccmemalign((void **)&tensor, 16, tensor_hdr_size + size);
53
  tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
54
#endif
55
196
  tensor->alias_ref = 0;
56
196
  tensor->sig = 0;
57
196
  tensor->refcount = 1;
58
196
  tensor->info = params;
59
196
  if (tfb)
60
85
  {
61
85
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype | params.dim[2];
62
85
    // This corresponding to mat->step
63
85
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2]));
64
85
  } else
65
111
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype;
66
196
  return tensor;
67
199
}
68
69
ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
70
223
{
71
223
  // this specific form can be toll-free bridging to ccv_dense_matrix_t
72
223
  int tfb = (
CCV_TENSOR_GET_MEMORY223
(params.type) == CCV_TENSOR_CPU_MEMORY &&
params.format == CCV_TENSOR_FORMAT_NHWC223
&&
params.dim[2] > 0223
&&
params.dim[2] <= 81
CCV_MAX_CHANNEL81
&&
params.dim[0] > 081
&&
params.dim[1] > 081
&&
params.dim[3] == 081
);
73
223
  ccv_nnc_tensor_t tensor;
74
223
  tensor.alias_ref = 0;
75
223
  tensor.sig = 0;
76
223
  tensor.refcount = 1;
77
223
  tensor.info = params;
78
223
  if (flags & CCV_TENSOR_CPU_MEMORY)
79
0
  {
80
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
81
223
  } else 
if (223
flags & CCV_TENSOR_GPU_MEMORY223
)
{0
82
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
83
0
  }
84
223
  if (tfb)
85
62
  {
86
62
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype | params.dim[2];
87
62
    // This corresponding to mat->step
88
62
    tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (params.datatype | params.dim[2]));
89
62
  } else // This won't be recognized by ccv_dense_matrix_t
90
161
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | params.datatype;
91
223
  tensor.data.u8 = (uint8_t*)ptr;
92
223
  return tensor;
93
223
}
94
95
void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor)
96
199
{
97
199
#ifdef HAVE_CUDA
98
199
  if (
CCV_TENSOR_GET_MEMORY199
(tensor->info.type) == CCV_TENSOR_GPU_MEMORY199
)
99
5
    
cufree(5
CCV_TENSOR_GET_DEVICE_ID5
(tensor->info.type), tensor->data.u8);
100
199
#endif
101
199
  ccfree(tensor);
102
199
}
103
104
static inline void _ccv_nnc_tensor_view_set(ccv_nnc_tensor_view_t* const tv, const ccv_nnc_tensor_t* const tensor, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int dim[CCV_NNC_MAX_DIM_ALLOC])
105
43
{
106
43
  memcpy(tv->inc, tensor->info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
107
43
  memcpy(tv->info.dim, dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
108
43
  uint8_t* const p = tensor->data.u8;
109
43
  const off_t off = tv->off = ccv_nnc_tensor_view_offset(tv, ofs);
110
43
  tv->data.u8 = p + off;
111
43
}
112
113
ccv_nnc_tensor_view_t* ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int dim[CCV_NNC_MAX_DIM_ALLOC])
114
0
{
115
0
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)ccmalloc(sizeof(ccv_nnc_tensor_view_t));
116
0
  tv->type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW;
117
0
  tv->alias_ref = (uintptr_t)tensor;
118
0
  tv->refcount = 1;
119
0
  tv->sig = 0;
120
0
  tv->info = tensor->info;
121
0
  _ccv_nnc_tensor_view_set(tv, tensor, ofs, dim);
122
0
  return tv;
123
0
}
124
125
ccv_nnc_tensor_view_t ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int dim[CCV_NNC_MAX_DIM_ALLOC])
126
43
{
127
43
  assert(!CCV_IS_TENSOR_VIEW(tensor));
128
43
  ccv_nnc_tensor_view_t tv = {
129
43
    .alias_ref = (uintptr_t)tensor,
130
43
    .type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW, // clean up the channel bits, and then add CCV_TENSOR_VIEW identifier
131
43
    .refcount = 1,
132
43
    .sig = 0,
133
43
    .info = tensor->info,
134
43
  };
135
43
  _ccv_nnc_tensor_view_set(&tv, tensor, ofs, dim);
136
43
  return tv;
137
43
}
138
139
void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view)
140
0
{
141
0
  ccfree(tensor_view);
142
0
}
143
144
void ccv_nnc_tensor_zero(void* const tensor)
145
12
{
146
12
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)tensor;
147
12
  const size_t data_size = CCV_GET_DATA_TYPE_SIZE(tv->info.datatype);
148
12
  if (
!12
CCV_IS_TENSOR_VIEW12
(tv))
149
11
  {
150
11
    memset(tv->data.u8, 0, data_size * ccv_nnc_tensor_count(tv->info));
151
11
    return;
152
11
  }
153
1
  const int nd = ccv_nnc_tensor_nd(tv->info.dim);
154
1
  const int* tvinc = tv->inc;
155
1
  // reset it to 0.
156
1
  int c, x, y;
157
1
  int count = 1;
158
1
  int mod[CCV_NNC_MAX_DIM_ALLOC - 3];
159
1
  size_t mod_inc[CCV_NNC_MAX_DIM_ALLOC - 2];
160
1
  mod_inc[nd - 3] = data_size * tvinc[nd - 3] * tvinc[nd - 2] * tvinc[nd - 1];
161
4
  for (c = nd - 4; 
c >= 04
;
c--3
)
162
3
  {
163
3
    // Compute the mod.
164
2
    mod[c] = c == nd - 4 ? 
tv->info.dim[c]1
:
mod[c + 1] * tv->info.dim[c]2
;
165
3
    mod_inc[c] = mod_inc[c + 1] * tvinc[c];
166
3
    count *= tv->info.dim[c];
167
3
  }
168
4
  for (c = 0; 
c < nd - 34
;
c++3
)
169
3
    mod_inc[c] = mod_inc[c + 1] * (tvinc[c] - tv->info.dim[c]);
170
1
  uint8_t* tvd = tv->data.u8;
171
1
  const size_t tvinc_21 = data_size * tvinc[nd - 2] * tvinc[nd - 1];
172
1
  const size_t tvinc_1 = data_size * tvinc[nd - 1];
173
1
  const size_t tvdim_1 = data_size * tv->info.dim[nd - 1];
174
1.44k
  for (c = 0; 
c < count1.44k
;
c++1.44k
)
175
1.44k
  {
176
4.32k
    for (y = 0; 
y < 4.32k
ccv_max4.32k
(1, tv->info.dim[nd - 3]);
y++2.88k
)
177
2.88k
    {
178
2.88k
      uint8_t* tvp = tvd + y * tvinc_21;
179
11.5k
      for (x = 0; 
x < 11.5k
ccv_max11.5k
(1, tv->info.dim[nd - 2]);
x++8.64k
)
180
8.64k
      {
181
8.64k
        memset(tvp, 0, tvdim_1);
182
8.64k
        tvp += tvinc_1;
183
8.64k
      }
184
2.88k
    }
185
1.44k
    tvd += mod_inc[nd - 3];
186
1.54k
    for (y = nd - 4; 
y >= 01.54k
;
y--105
)
187
1.54k
      
if (1.54k
(c + 1) % mod[y] != 01.54k
)
188
1.43k
        break; // cannot be mod, break out.
189
1.54k
      else
190
105
        tvd += mod_inc[y];
191
1.44k
  }
192
1
}
193
194
int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b)
195
23
{
196
23
  assert(!CCV_IS_TENSOR_VIEW(a));
197
23
  assert(!CCV_IS_TENSOR_VIEW(b));
198
23
  // If a is a dense matrix, just use ccv_matrix_eq
199
23
  if (CCV_TENSOR_IS_DENSE_MATRIX(a->type))
200
10
    return ccv_matrix_eq((ccv_matrix_t*)a, (ccv_matrix_t*)b);
201
23
  // Otherwise, do our own thing.
202
13
  
if (13
CCV_GET_DATA_TYPE13
(a->type) != 13
CCV_GET_DATA_TYPE13
(b->type))
203
0
    return -1;
204
13
  // Only support 32F at this point.
205
13
  assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F);
206
13
  int i, c = 1;
207
42
  for (i = 0; 
i < 42
CCV_NNC_MAX_DIM_ALLOC42
;
i++29
)
208
42
  {
209
42
    if (
!a->info.dim[i] && 42
!b->info.dim[i]13
)
210
13
      break;
211
29
    
if (29
a->info.dim[i] != b->info.dim[i]29
)
212
0
      return -1;
213
29
    c *= a->info.dim[i];
214
29
  }
215
13
  // Read: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
216
13
  // http://floating-point-gui.de/errors/comparison/
217
13
  static const float epsi = FLT_EPSILON;
218
13
  static const int32_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
219
720k
  for (i = 0; 
i < c720k
;
i++720k
)
220
720k
  {
221
720k
    // Although this is float point, I use integer as a way to compare.
222
720k
    int32_t i32a = a->data.i32[i];
223
720k
    if (i32a < 0)
224
58
      i32a = 0x80000000 - i32a;
225
720k
    int32_t i32b = b->data.i32[i];
226
720k
    if (i32b < 0)
227
58
      i32b = 0x80000000 - i32b;
228
720k
    if (
abs(i32a - i32b) > ulps && 720k
fabsf(a->data.f32[i] - b->data.f32[i]) > epsi0
)
229
0
      return -1;
230
720k
  }
231
13
  return 0;
232
13
}