Coverage Report

Created: 2017-11-12 13:27

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/util/ccv_nnc_util_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include <ccv.h>
2
#include <ccv_internal.h>
3
#include <nnc/ccv_nnc.h>
4
#include <nnc/ccv_nnc_easy.h>
5
#include <nnc/ccv_nnc_internal.h>
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
#include "../_ccv_nnc_cpu_ref.h"
13
14
void _ccv_nnc_tensor_transfer_cpu_ref(const ccv_nnc_tensor_view_t* const a, ccv_nnc_tensor_view_t* const b)
15
9
{
16
9
  // Assuming this is float 32.
17
9
  int dim[CCV_NNC_MAX_DIM + 2];
18
9
  int ainc[CCV_NNC_MAX_DIM + 2];
19
9
  int binc[CCV_NNC_MAX_DIM + 2];
20
9
  assert(a->info.dim[CCV_NNC_MAX_DIM + 2] == 0);
21
9
  assert(b->info.dim[CCV_NNC_MAX_DIM + 2] == 0);
22
9
  if (
!9
CCV_IS_TENSOR_VIEW9
(a) &&
!8
CCV_IS_TENSOR_VIEW8
(b))
23
5
  {
24
5
    // Super optimal case, just do memcpy.
25
5
    memcpy(b->data.f32, a->data.f32, ccv_nnc_tensor_count(a->info) * sizeof(float));
26
5
    return;
27
5
  }
28
4
  ccv_nnc_tensor_view_get_dim(a, dim);
29
4
  ccv_nnc_tensor_view_check_dim(b, dim);
30
4
  ccv_nnc_tensor_view_get_inc(a, ainc);
31
4
  ccv_nnc_tensor_view_get_inc(b, binc);
32
4
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
33
4
  int i[CCV_NNC_MAX_DIM + 2];
34
4
  float* ap = a->data.f32;
35
4
  float* bp = b->data.f32;
36
4
  if (
ainc[3] == dim[3] && 4
binc[3] == dim[3]3
)
37
0
  {
38
0
    // Special casing if the ainc[3] is the same as dim[3] (do memcpy for the last two dim)
39
0
    for (i[0] = 0; 
i[0] < dim[0]0
;
i[0]++0
)
40
0
    {
41
0
      for (i[1] = 0; 
i[1] < dim[1]0
;
i[1]++0
)
42
0
      {
43
0
        memcpy(bp, ap, dim[2] * dim[3] * sizeof(float));
44
0
        ap += ainc[2] * ainc[3];
45
0
        bp += binc[2] * binc[3];
46
0
      }
47
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
48
0
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
49
0
    }
50
0
    return;
51
0
  }
52
4
  // Non-optimal case, need to do skip copy.
53
8
  
for (i[0] = 0; 4
i[0] < dim[0]8
;
i[0]++4
)
54
4
  {
55
9
    for (i[1] = 0; 
i[1] < dim[1]9
;
i[1]++5
)
56
5
    {
57
14
      for (i[2] = 0; 
i[2] < dim[2]14
;
i[2]++9
)
58
9
      {
59
9
        memcpy(bp, ap, dim[3] * sizeof(float));
60
9
        ap += ainc[3];
61
9
        bp += binc[3];
62
9
      }
63
5
      ap += (ainc[2] - dim[2]) * ainc[3];
64
5
      bp += (binc[2] - dim[2]) * binc[3];
65
5
    }
66
4
    ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
67
4
    bp += (binc[1] - dim[1]) * binc[2] * binc[3];
68
4
  }
69
4
}
70
71
void _ccv_nnc_tensor_set_cpu_ref(ccv_nnc_tensor_view_t* const a, const float b)
72
0
{
73
0
  // Assuming this is float 32.
74
0
  int dim[CCV_NNC_MAX_DIM + 2];
75
0
  int ainc[CCV_NNC_MAX_DIM + 2];
76
0
  assert(a->info.dim[CCV_NNC_MAX_DIM + 2] == 0);
77
0
  int x;
78
0
  if (
!0
CCV_IS_TENSOR_VIEW0
(a))
79
0
  {
80
0
    // Super optimal case, just do one for-loop for sum.
81
0
    const int tensor_count = ccv_nnc_tensor_count(a->info);
82
0
    for (x = 0; 
x < tensor_count0
;
x++0
)
83
0
      a->data.f32[x] = b;
84
0
    return;
85
0
  }
86
0
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
87
0
  ccv_nnc_tensor_view_get_dim(a, dim);
88
0
  ccv_nnc_tensor_view_get_inc(a, ainc);
89
0
  int i[CCV_NNC_MAX_DIM + 2];
90
0
  float* ap = a->data.f32;
91
0
  const int count = dim[2] * dim[3];
92
0
  if (ainc[3] == dim[3])
93
0
  {
94
0
    // Special casing if the ainc[3] is the same as dim[3]
95
0
    for (i[0] = 0; 
i[0] < dim[0]0
;
i[0]++0
)
96
0
    {
97
0
      for (i[1] = 0; 
i[1] < dim[1]0
;
i[1]++0
)
98
0
      {
99
0
        for (x = 0; 
x < count0
;
x++0
)
100
0
          ap[x] = b;
101
0
        ap += ainc[2] * ainc[3];
102
0
      }
103
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
104
0
    }
105
0
    return;
106
0
  }
107
0
  // Non-optimal case, need to do skip copy.
108
0
  
for (i[0] = 0; 0
i[0] < dim[0]0
;
i[0]++0
)
109
0
  {
110
0
    for (i[1] = 0; 
i[1] < dim[1]0
;
i[1]++0
)
111
0
    {
112
0
      for (i[2] = 0; 
i[2] < dim[2]0
;
i[2]++0
)
113
0
      {
114
0
        for (x = 0; 
x < dim[3]0
;
x++0
)
115
0
          ap[x] = b;
116
0
        ap += ainc[3];
117
0
      }
118
0
      ap += (ainc[2] - dim[2]) * ainc[3];
119
0
    }
120
0
    ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
121
0
  }
122
0
}
123
124
static int _ccv_nnc_data_transfer(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* const stream_context)
125
1
{
126
1
  assert(output_size == input_size);
127
1
  int i;
128
2
  for (i = 0; 
i < input_size2
;
i++1
)
129
1
  {
130
1
    const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[i];
131
1
    ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[i];
132
1
    if (a != b) // Only do transfer if these are two different tensors.
133
1
      _ccv_nnc_tensor_transfer_cpu_ref(a, b);
134
1
  }
135
1
  return CCV_NNC_EXEC_SUCCESS;
136
1
}
137
138
REGISTER_COMMAND_BACKEND(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
139
1
{
140
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
141
1
  registry->tensor_datatypes = CCV_32F;
142
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
143
1
  registry->algorithms = 1;
144
1
  registry->exec = _ccv_nnc_data_transfer;
145
1
}
146
147
REGISTER_COMMAND_BACKEND(CCV_NNC_DATA_TRANSFER_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
148
1
{
149
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
150
1
  registry->tensor_datatypes = CCV_32F;
151
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
152
1
  registry->algorithms = 1;
153
1
  registry->exec = _ccv_nnc_data_transfer;
154
1
}
155
156
static int _ccv_nnc_set(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* const stream_context)
157
6
{
158
6
  int i;
159
6
  if (cmd.info.blas.a[0] == 0)
160
12
    
for (i = 0; 6
i < output_size12
;
i++6
)
161
6
      ccv_nnc_tensor_zero(outputs[i]);
162
6
  else
163
0
    
for (i = 0; 0
i < output_size0
;
i++0
)
164
0
      _ccv_nnc_tensor_set_cpu_ref((ccv_nnc_tensor_view_t*)outputs[i], cmd.info.blas.a[0]);
165
6
  return CCV_NNC_EXEC_SUCCESS;
166
6
}
167
168
REGISTER_COMMAND_BACKEND(CCV_NNC_SET_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
169
1
{
170
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
171
1
  registry->tensor_datatypes = CCV_32F;
172
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
173
1
  registry->algorithms = 1;
174
1
  registry->exec = _ccv_nnc_set;
175
1
}
176
177
REGISTER_COMMAND_BACKEND(CCV_NNC_SET_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
178
1
{
179
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
180
1
  registry->tensor_datatypes = CCV_32F;
181
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
182
1
  registry->algorithms = 1;
183
1
  registry->exec = _ccv_nnc_set;
184
1
}
185
186
static void _ccv_nnc_tensor_nhwc_nchw(const ccv_nnc_tensor_view_t* a, ccv_nnc_tensor_view_t* b)
187
2
{
188
2
  // Assuming this is float 32.
189
2
  int ainc[CCV_NNC_MAX_DIM + 2];
190
2
  int binc[CCV_NNC_MAX_DIM + 2];
191
2
  int k;
192
2
  // In case it is Toll-free bridged matrix object (NHWC format is possible).
193
2
  assert(a->info.dim[CCV_NNC_MAX_DIM + 2] == 0 || a->info.dim[CCV_NNC_MAX_DIM + 1] == 0);
194
2
  assert(b->info.dim[CCV_NNC_MAX_DIM + 2] == 0);
195
2
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
196
2
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
197
2
  const int a_offset = CCV_NNC_MAX_DIM + 2 - a_nd;
198
2
  assert(a_offset == 0 || a_offset == 1);
199
2
  const int b_offset = CCV_NNC_MAX_DIM + 2 - b_nd;
200
2
  assert(b_offset == 0 || b_offset == 1);
201
2
  ccv_nnc_tensor_view_get_inc(a, ainc);
202
2
  ccv_nnc_tensor_view_get_inc(b, binc);
203
2
  // Comparing N
204
2
  assert((a_offset == 0 ? a->info.dim[0] : 1) == (b_offset == 0 ? b->info.dim[0] : 1));
205
2
  const int n = (a_offset == 0 ? 
a->info.dim[0]0
:
12
);
206
2
  // Comparing C
207
2
  assert(a->info.dim[a_nd - 1] == b->info.dim[1 - b_offset]);
208
2
  const int c = a->info.dim[a_nd - 1];
209
2
  // Comparing HW
210
2
  int hw[CCV_NNC_MAX_DIM];
211
6
  for (k = 0; 
k < 6
CCV_NNC_MAX_DIM6
;
k++4
)
212
4
  {
213
4
    assert(a->info.dim[k + 1 - a_offset] == b->info.dim[k + 2 - b_offset]);
214
4
    hw[k] = a->info.dim[k + 1 - a_offset];
215
4
  }
216
2
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
217
2
  int i[CCV_NNC_MAX_DIM + 2];
218
2
  float* ap = a->data.f32;
219
2
  float* bp = b->data.f32;
220
2
  // Non-optimal case, need to do skip copy.
221
4
  for (i[0] = 0; 
i[0] < n4
;
i[0]++2
)
222
2
  {
223
6
    for (i[3] = 0; 
i[3] < c6
;
i[3]++4
)
224
4
    {
225
4
      float* apu = ap + i[3];
226
20
      for (i[1] = 0; 
i[1] < hw[0]20
;
i[1]++16
)
227
16
      {
228
64
        for (i[2] = 0; 
i[2] < hw[1]64
;
i[2]++48
)
229
48
          bp[i[2]] = apu[i[2] * ainc[3]];
230
16
        apu += ainc[2] * ainc[3];
231
16
        bp += binc[3];
232
16
      }
233
4
      bp += (binc[2] - hw[0]) * binc[3];
234
4
    }
235
2
    ap += ainc[1] * ainc[2] * ainc[3];
236
2
    bp += (binc[1] - c) * binc[2] * binc[3];
237
2
  }
238
2
}
239
240
static void _ccv_nnc_tensor_nchw_nhwc(const ccv_nnc_tensor_view_t* a, ccv_nnc_tensor_view_t* b)
241
2
{
242
2
  // Assuming this is float 32.
243
2
  int ainc[CCV_NNC_MAX_DIM + 2];
244
2
  int binc[CCV_NNC_MAX_DIM + 2];
245
2
  int k;
246
2
  // In case it is Toll-free bridged matrix object (NHWC format is possible).
247
2
  assert(a->info.dim[CCV_NNC_MAX_DIM + 2] == 0);
248
2
  assert(b->info.dim[CCV_NNC_MAX_DIM + 2] == 0 || b->info.dim[CCV_NNC_MAX_DIM + 1] == 0);
249
2
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
250
2
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
251
2
  const int a_offset = CCV_NNC_MAX_DIM + 2 - a_nd;
252
2
  assert(a_offset == 0 || a_offset == 1);
253
2
  const int b_offset = CCV_NNC_MAX_DIM + 2 - b_nd;
254
2
  assert(b_offset == 0 || b_offset == 1);
255
2
  ccv_nnc_tensor_view_get_inc(a, ainc);
256
2
  ccv_nnc_tensor_view_get_inc(b, binc);
257
2
  // Comparing N
258
2
  assert((a_offset == 0 ? a->info.dim[0] : 1) == (b_offset == 0 ? b->info.dim[0] : 1));
259
2
  const int n = (a_offset == 0 ? 
a->info.dim[0]0
:
12
);
260
2
  // Comparing C
261
2
  assert(a->info.dim[1 - a_offset] == b->info.dim[b_nd - 1]);
262
2
  const int c = a->info.dim[1 - a_offset];
263
2
  // Comparing HW
264
2
  int hw[CCV_NNC_MAX_DIM];
265
6
  for (k = 0; 
k < 6
CCV_NNC_MAX_DIM6
;
k++4
)
266
4
  {
267
4
    assert(a->info.dim[k + 2 - a_offset] == b->info.dim[k + 1 - b_offset]);
268
4
    hw[k] = a->info.dim[k + 2 - a_offset];
269
4
  }
270
2
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
271
2
  int i[CCV_NNC_MAX_DIM + 2];
272
2
  float* ap = a->data.f32;
273
2
  float* bp = b->data.f32;
274
2
  // Non-optimal case, need to do skip copy.
275
4
  for (i[0] = 0; 
i[0] < n4
;
i[0]++2
)
276
2
  {
277
6
    for (i[3] = 0; 
i[3] < c6
;
i[3]++4
)
278
4
    {
279
4
      float* bpu = bp + i[3];
280
20
      for (i[1] = 0; 
i[1] < hw[0]20
;
i[1]++16
)
281
16
      {
282
64
        for (i[2] = 0; 
i[2] < hw[1]64
;
i[2]++48
)
283
48
          bpu[i[2] * binc[3]] = ap[i[2]];
284
16
        ap += ainc[3];
285
16
        bpu += binc[2] * binc[3];
286
16
      }
287
4
      ap += (ainc[2] - hw[0]) * ainc[3];
288
4
    }
289
2
    ap += (ainc[1] - c) * ainc[2] * ainc[3];
290
2
    bp += binc[1] * binc[2] * binc[3];
291
2
  }
292
2
}
293
294
static int _ccv_nnc_format_transform(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* const stream_context)
295
4
{
296
4
  assert(output_size == input_size);
297
4
  int i;
298
8
  for (i = 0; 
i < input_size8
;
i++4
)
299
4
  {
300
4
    const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[i];
301
4
    ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[i];
302
4
    assert(a != b); // Cannot do inplace transform.
303
4
    if (
a->info.format == b->info.format4
)
{0
304
0
      // If it is the same, just do a normal data transfer.
305
0
      _ccv_nnc_tensor_transfer_cpu_ref(a, b);
306
4
    } else 
if (4
a->info.format == CCV_TENSOR_FORMAT_NHWC && 4
b->info.format == CCV_TENSOR_FORMAT_NCHW2
)
{2
307
2
      _ccv_nnc_tensor_nhwc_nchw(a, b);
308
2
    } else 
if (2
a->info.format == CCV_TENSOR_FORMAT_NHWC && 2
b->info.format == CCV_TENSOR_FORMAT_CHWN0
)
{0
309
2
    } else 
if (2
a->info.format == CCV_TENSOR_FORMAT_NCHW && 2
b->info.format == CCV_TENSOR_FORMAT_NHWC2
)
{2
310
2
      _ccv_nnc_tensor_nchw_nhwc(a, b);
311
0
    } else 
if (0
a->info.format == CCV_TENSOR_FORMAT_NCHW && 0
b->info.format == CCV_TENSOR_FORMAT_CHWN0
)
{0
312
0
    } else 
if (0
a->info.format == CCV_TENSOR_FORMAT_CHWN && 0
b->info.format == CCV_TENSOR_FORMAT_NHWC0
)
{0
313
0
    } else 
if (0
a->info.format == CCV_TENSOR_FORMAT_CHWN && 0
b->info.format == CCV_TENSOR_FORMAT_NCHW0
)
{0
314
0
    }
315
4
  }
316
4
  return CCV_NNC_EXEC_SUCCESS;
317
4
}
318
319
REGISTER_COMMAND_BACKEND(CCV_NNC_FORMAT_TRANSFORM_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
320
1
{
321
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
322
1
  registry->tensor_datatypes = CCV_32F;
323
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
324
1
  registry->algorithms = 1;
325
1
  registry->exec = _ccv_nnc_format_transform;
326
1
}
327
328
REGISTER_COMMAND_BACKEND(CCV_NNC_FORMAT_TRANSFORM_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
329
1
{
330
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
331
1
  registry->tensor_datatypes = CCV_32F;
332
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
333
1
  registry->algorithms = 1;
334
1
  registry->exec = _ccv_nnc_format_transform;
335
1
}