Coverage Report

Created: 2017-11-12 13:27

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/blas/ccv_nnc_axpy_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include <ccv.h>
2
#include <ccv_internal.h>
3
#include <nnc/ccv_nnc.h>
4
#include <nnc/ccv_nnc_easy.h>
5
#include <nnc/ccv_nnc_internal.h>
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
// Shared methods.
14
#include "../_ccv_nnc_cpu_ref.h"
15
16
static int _ccv_nnc_axpy_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* const stream_context)
17
5
{
18
5
  if (
input_size == 1 || 5
inputs[1] == 02
)
19
3
  {
20
3
    // It cannot be set otherwise we have trouble.
21
3
    assert(cmd.info.blas.a[1] == 0);
22
3
    if (cmd.info.blas.a[0] == 1)
23
2
    {
24
2
      _ccv_nnc_tensor_transfer_cpu_ref((ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)outputs[0]);
25
2
      return CCV_NNC_EXEC_SUCCESS;
26
1
    } else 
if (1
cmd.info.blas.a[0] == 01
)
{0
27
0
      ccv_nnc_tensor_zero(outputs[0]);
28
0
      return CCV_NNC_EXEC_SUCCESS;
29
0
    }
30
3
    // Assuming this is float 32.
31
1
    int dim[CCV_NNC_MAX_DIM + 2];
32
1
    int ainc[CCV_NNC_MAX_DIM + 2];
33
1
    int binc[CCV_NNC_MAX_DIM + 2];
34
1
    ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
35
1
    ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
36
1
    assert(a->info.dim[CCV_NNC_MAX_DIM + 2] == 0);
37
1
    assert(b->info.dim[CCV_NNC_MAX_DIM + 2] == 0);
38
1
    const float p = cmd.info.blas.a[0];
39
1
    ccv_nnc_tensor_view_get_dim(a, dim);
40
1
    ccv_nnc_tensor_view_check_dim(b, dim);
41
1
    int x;
42
1
    if (
!1
CCV_IS_TENSOR_VIEW1
(a) &&
!1
CCV_IS_TENSOR_VIEW1
(b))
43
1
    {
44
1
      // Super optimal case, just do one for-loop for sum.
45
1
      const int tensor_count = ccv_nnc_tensor_count(a->info);
46
2
      for (x = 0; 
x < tensor_count2
;
x++1
)
47
1
        b->data.f32[x] = p * a->data.f32[x];
48
1
      return CCV_NNC_EXEC_SUCCESS;
49
1
    }
50
0
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
51
0
    ccv_nnc_tensor_view_get_inc(a, ainc);
52
0
    ccv_nnc_tensor_view_get_inc(b, binc);
53
0
    int i[CCV_NNC_MAX_DIM + 2];
54
0
    float* ap = a->data.f32;
55
0
    float* bp = b->data.f32;
56
0
    const int count = dim[2] * dim[3];
57
0
    if (
ainc[3] == dim[3] && 0
binc[3] == dim[3]0
)
58
0
    {
59
0
      // Special casing if the ainc[3] is the same as dim[3]
60
0
      for (i[0] = 0; 
i[0] < dim[0]0
;
i[0]++0
)
61
0
      {
62
0
        for (i[1] = 0; 
i[1] < dim[1]0
;
i[1]++0
)
63
0
        {
64
0
          for (x = 0; 
x < count0
;
x++0
)
65
0
            bp[x] = p * ap[x];
66
0
          ap += ainc[2] * ainc[3];
67
0
          bp += binc[2] * binc[3];
68
0
        }
69
0
        ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
70
0
        bp += (binc[1] - dim[1]) * binc[2] * binc[3];
71
0
      }
72
0
      return CCV_NNC_EXEC_SUCCESS;
73
0
    }
74
0
    // Non-optimal case, need to do skip copy.
75
0
    
for (i[0] = 0; 0
i[0] < dim[0]0
;
i[0]++0
)
76
0
    {
77
0
      for (i[1] = 0; 
i[1] < dim[1]0
;
i[1]++0
)
78
0
      {
79
0
        for (i[2] = 0; 
i[2] < dim[2]0
;
i[2]++0
)
80
0
        {
81
0
          for (x = 0; 
x < dim[3]0
;
x++0
)
82
0
            bp[x] = p * ap[x];
83
0
          ap += ainc[3];
84
0
          bp += binc[3];
85
0
        }
86
0
        ap += (ainc[2] - dim[2]) * ainc[3];
87
0
        bp += (binc[2] - dim[2]) * binc[3];
88
0
      }
89
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
90
0
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
91
0
    }
92
0
    return CCV_NNC_EXEC_SUCCESS;
93
0
  }
94
2
  
if (2
cmd.info.blas.a[0] == 1 && 2
cmd.info.blas.a[1] == 12
)
95
1
  {
96
1
    ccv_nnc_cmd_t forw_cmd = cmd;
97
1
    forw_cmd.cmd = CCV_NNC_EWSUM_FORWARD;
98
1
    return _ccv_nnc_ewsum_forw_cpu_ref(cmd, hint, flags, inputs, input_size, outputs, output_size, stream_context);
99
1
  } else 
if (1
cmd.info.blas.a[0] == 1 && 1
cmd.info.blas.a[1] == 01
)
{0
100
0
    _ccv_nnc_tensor_transfer_cpu_ref((const ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)outputs[0]);
101
0
    return CCV_NNC_EXEC_SUCCESS;
102
1
  } else 
if (1
cmd.info.blas.a[0] == 0 && 1
cmd.info.blas.a[1] == 10
)
{0
103
0
    _ccv_nnc_tensor_transfer_cpu_ref((const ccv_nnc_tensor_view_t*)inputs[1], (ccv_nnc_tensor_view_t*)outputs[0]);
104
0
    return CCV_NNC_EXEC_SUCCESS;
105
1
  } else 
if (1
cmd.info.blas.a[0] == 0 && 1
cmd.info.blas.a[1] == 00
)
{0
106
0
    ccv_nnc_tensor_zero(outputs[0]);
107
0
    return CCV_NNC_EXEC_SUCCESS;
108
0
  }
109
2
  // Assuming this is float 32.
110
1
  int dim[CCV_NNC_MAX_DIM + 2];
111
1
  int ainc[CCV_NNC_MAX_DIM + 2];
112
1
  int binc[CCV_NNC_MAX_DIM + 2];
113
1
  int cinc[CCV_NNC_MAX_DIM + 2];
114
1
  ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
115
1
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
116
1
  ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
117
1
  assert(a->info.dim[CCV_NNC_MAX_DIM + 2] == 0);
118
1
  assert(b->info.dim[CCV_NNC_MAX_DIM + 2] == 0);
119
1
  assert(c->info.dim[CCV_NNC_MAX_DIM + 2] == 0);
120
1
  const float p = cmd.info.blas.a[0];
121
1
  const float q = cmd.info.blas.a[1];
122
1
  ccv_nnc_tensor_view_get_dim(a, dim);
123
1
  ccv_nnc_tensor_view_check_dim(b, dim);
124
1
  ccv_nnc_tensor_view_check_dim(c, dim);
125
1
  int x;
126
1
  if (
!1
CCV_IS_TENSOR_VIEW1
(a) &&
!1
CCV_IS_TENSOR_VIEW1
(b) &&
!1
CCV_IS_TENSOR_VIEW1
(c))
127
1
  {
128
1
    // Super optimal case, just do one for-loop for sum.
129
1
    const int tensor_count = ccv_nnc_tensor_count(a->info);
130
2
    for (x = 0; 
x < tensor_count2
;
x++1
)
131
1
      c->data.f32[x] = p * a->data.f32[x] + q * b->data.f32[x];
132
1
    return CCV_NNC_EXEC_SUCCESS;
133
1
  }
134
0
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
135
0
  ccv_nnc_tensor_view_get_inc(a, ainc);
136
0
  ccv_nnc_tensor_view_get_inc(b, binc);
137
0
  ccv_nnc_tensor_view_get_inc(c, cinc);
138
0
  int i[CCV_NNC_MAX_DIM + 2];
139
0
  float* ap = a->data.f32;
140
0
  float* bp = b->data.f32;
141
0
  float* cp = c->data.f32;
142
0
  const int count = dim[2] * dim[3];
143
0
  if (
ainc[3] == dim[3] && 0
binc[3] == dim[3]0
&&
cinc[3] == dim[3]0
)
144
0
  {
145
0
    // Special casing if the ainc[3] is the same as dim[3]
146
0
    for (i[0] = 0; 
i[0] < dim[0]0
;
i[0]++0
)
147
0
    {
148
0
      for (i[1] = 0; 
i[1] < dim[1]0
;
i[1]++0
)
149
0
      {
150
0
        for (x = 0; 
x < count0
;
x++0
)
151
0
          cp[x] = p * ap[x] + q * bp[x];
152
0
        ap += ainc[2] * ainc[3];
153
0
        bp += binc[2] * binc[3];
154
0
        cp += cinc[2] * cinc[3];
155
0
      }
156
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
157
0
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
158
0
      cp += (cinc[1] - dim[1]) * cinc[2] * cinc[3];
159
0
    }
160
0
    return CCV_NNC_EXEC_SUCCESS;
161
0
  }
162
0
  // Non-optimal case, need to do skip copy.
163
0
  
for (i[0] = 0; 0
i[0] < dim[0]0
;
i[0]++0
)
164
0
  {
165
0
    for (i[1] = 0; 
i[1] < dim[1]0
;
i[1]++0
)
166
0
    {
167
0
      for (i[2] = 0; 
i[2] < dim[2]0
;
i[2]++0
)
168
0
      {
169
0
        for (x = 0; 
x < dim[3]0
;
x++0
)
170
0
          cp[x] = p * ap[x] + q * bp[x];
171
0
        ap += ainc[3];
172
0
        bp += binc[3];
173
0
        cp += cinc[3];
174
0
      }
175
0
      ap += (ainc[2] - dim[2]) * ainc[3];
176
0
      bp += (binc[2] - dim[2]) * binc[3];
177
0
      cp += (cinc[2] - dim[2]) * cinc[3];
178
0
    }
179
0
    ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
180
0
    bp += (binc[1] - dim[1]) * binc[2] * binc[3];
181
0
    cp += (cinc[1] - dim[1]) * cinc[2] * cinc[3];
182
0
  }
183
0
  return CCV_NNC_EXEC_SUCCESS;
184
0
}
185
186
static int _ccv_nnc_axpy_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* const stream_context)
187
2
{
188
2
  if (inputs[0] == 0)
189
0
  {
190
0
    if (outputs[0])
191
0
      _ccv_nnc_tensor_set_cpu_ref((ccv_nnc_tensor_view_t*)outputs[0], cmd.info.blas.a[0]);
192
0
    if (
output_size > 1 && 0
outputs[1]0
)
193
0
      _ccv_nnc_tensor_set_cpu_ref((ccv_nnc_tensor_view_t*)outputs[1], cmd.info.blas.a[1]);
194
2
  } else {
195
2
    ccv_nnc_cmd_t forw_cmd = cmd;
196
2
    forw_cmd.cmd = CCV_NNC_AXPY_FORWARD;
197
2
    memset(forw_cmd.info.blas.a, 0, sizeof(forw_cmd.info.blas.a));
198
2
    if (outputs[0])
199
2
    {
200
2
      forw_cmd.info.blas.a[0] = cmd.info.blas.a[0];
201
2
      _ccv_nnc_axpy_forw(forw_cmd, hint, flags, inputs, 1, outputs, 1, stream_context);
202
2
    }
203
2
    if (
output_size > 1 && 2
outputs[1]2
)
204
1
    {
205
1
      forw_cmd.info.blas.a[0] = cmd.info.blas.a[1];
206
1
      _ccv_nnc_axpy_forw(forw_cmd, hint, flags, inputs, 1, outputs + 1, 1, stream_context);
207
1
    }
208
2
  }
209
2
  return CCV_NNC_EXEC_SUCCESS;
210
2
}
211
212
REGISTER_COMMAND_BACKEND(CCV_NNC_AXPY_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
213
1
{
214
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
215
1
  registry->tensor_datatypes = CCV_32F;
216
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
217
1
  registry->algorithms = 1;
218
1
  registry->exec = _ccv_nnc_axpy_forw;
219
1
}
220
221
REGISTER_COMMAND_BACKEND(CCV_NNC_AXPY_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
222
1
{
223
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
224
1
  registry->tensor_datatypes = CCV_32F;
225
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
226
1
  registry->algorithms = 1;
227
1
  registry->exec = _ccv_nnc_axpy_back;
228
1
}