Coverage Report

Created: 2019-07-03 22:50

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/blas/ccv_nnc_gemm_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include <ccv.h>
2
#include <ccv_internal.h>
3
#include <nnc/ccv_nnc.h>
4
#include <nnc/ccv_nnc_easy.h>
5
#include <nnc/ccv_nnc_internal.h>
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_gemm_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
2.13k
{
15
2.13k
  assert(input_size >= 2);
16
2.13k
  const ccv_nnc_tensor_view_t* a = (const ccv_nnc_tensor_view_t*)inputs[0];
17
2.13k
  const ccv_nnc_tensor_view_t* w = (const ccv_nnc_tensor_view_t*)inputs[1];
18
2.13k
  const ccv_nnc_tensor_view_t* bias = input_size > 2 ? 
(const ccv_nnc_tensor_view_t*)inputs[2]2.12k
:
04
;
19
2.13k
  // Copy the most of parameters, but reshape the dimension of a to a vector.
20
2.13k
  assert(a->info.dim[2] == 0); // It is a 2-d array.
21
2.13k
  assert(output_size == 1);
22
2.13k
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
23
2.13k
  assert(b->info.dim[2] == 0); // It is a 2-d array.
24
2.13k
  assert(w->info.dim[2] == 0); // It is a 2-d array
25
2.13k
  assert(!bias || bias->info.dim[1] == 0); // It is a 1-d array
26
2.13k
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
27
2.13k
  assert(a_nd == 1 || a_nd == 2);
28
2.13k
  const int* adim = (a_nd == 1) ? 
a->info.dim54
:
a->info.dim + 12.07k
;
29
2.13k
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
30
2.13k
  assert(b_nd == 1 || b_nd == 2);
31
2.13k
  const int* bdim = (b_nd == 1) ? 
b->info.dim54
:
b->info.dim + 12.07k
;
32
2.13k
  const int batch_size = a_nd == 1 ? 
154
:
ccv_max2.07k
(1, a->info.dim[0]);
33
2.13k
  assert(batch_size == (b_nd == 1) ? 1 : ccv_max(1, b->info.dim[0]));
34
2.13k
  assert(!bias || bdim[0] == bias->info.dim[0]);
35
2.13k
  assert(bdim[0] == w->info.dim[0]);
36
2.13k
  assert(adim[0] == w->info.dim[1]);
37
2.13k
  const int a_batch_inc = CCV_IS_TENSOR_VIEW(a) ? 
(a_nd == 1 9
?
a->inc[0]0
:
a->inc[1]9
) :
adim[0]2.12k
;
38
2.13k
  const int b_batch_inc = CCV_IS_TENSOR_VIEW(b) ? 
(b_nd == 1 9
?
b->inc[0]0
:
b->inc[1]9
) :
bdim[0]2.12k
;
39
2.13k
  const int* winc = CCV_IS_TENSOR_VIEW(w) ? 
w->inc0
: w->info.dim;
40
2.13k
  int i;
41
6.30k
  for (i = 0; i < batch_size; 
i++4.16k
)
42
4.16k
  {
43
4.16k
    const float* const ap = a->data.f32 + i * a_batch_inc;
44
4.16k
    float* const bp = b->data.f32 + i * b_batch_inc;
45
4.16k
    parallel_for(j, bdim[0]) {
46
18.4E
      float v = bias ? 
bias->data.f32[j]75.9k
: 0;
47
0
      const float* const wp = w->data.f32 + j * winc[1];
48
0
      int k;
49
30.8M
      for (k = 0; k < adim[0]; k++)
50
30.8M
        v += wp[k] * ap[k];
51
0
      bp[j] = v;
52
4.16k
    } parallel_endfor
53
4.16k
  }
54
2.13k
  return CCV_NNC_EXEC_SUCCESS;
55
2.13k
}
56
57
static int _ccv_nnc_gemm_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
58
2.04k
{
59
2.04k
  // inputs: gradient, forw prop input, [w]
60
2.04k
  // outputs: [output gradient], weight updates, bias updates
61
2.04k
  assert(input_size >= 2 && output_size >= 2);
62
2.04k
  const ccv_nnc_tensor_view_t* g = (const ccv_nnc_tensor_view_t*)inputs[0];
63
2.04k
  assert(g->info.dim[2] == 0); // It is a 2-d array.
64
2.04k
  const ccv_nnc_tensor_view_t* a = (const ccv_nnc_tensor_view_t*)inputs[1];
65
2.04k
  assert(a->info.dim[2] == 0); // It is a 2-d array.
66
2.04k
  ccv_nnc_tensor_view_t* dw = (ccv_nnc_tensor_view_t*)outputs[1];
67
2.04k
  assert(dw->info.dim[2] == 0); // It is a 2-d array.
68
2.04k
  ccv_nnc_tensor_view_t* bias = output_size > 2 ? (ccv_nnc_tensor_view_t*)outputs[2] : 
00
;
69
2.04k
  assert(!bias || bias->info.dim[1] == 0); // It is a 1-d array.
70
2.04k
  const int* dwinc = CCV_IS_TENSOR_VIEW(dw) ? 
dw->inc0
: dw->info.dim;
71
2.04k
  if (!(flags & CCV_NNC_ACCUMULATE_OUTPUT)) // reset the gradients to 0
72
2.04k
  {
73
2.04k
    memset(dw->data.u8, 0, sizeof(float) * dwinc[1] * dw->info.dim[0]);
74
2.04k
    if (bias)
75
40
      memset(bias->data.u8, 0, sizeof(float) * bias->info.dim[0]);
76
2.04k
  }
77
2.04k
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
78
2.04k
  assert(a_nd == 1 || a_nd == 2);
79
2.04k
  const int* adim = (a_nd == 1) ? 
a->info.dim1
:
a->info.dim + 12.04k
;
80
2.04k
  const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
81
2.04k
  assert(g_nd == 1 || g_nd == 2);
82
2.04k
  const int* gdim = (g_nd == 1) ? 
g->info.dim1
:
g->info.dim + 12.04k
;
83
2.04k
  const int batch_size = a_nd == 1 ? 
11
:
ccv_max2.04k
(1, a->info.dim[0]);
84
2.04k
  assert(batch_size == (g_nd == 1) ? 1 : ccv_max(1, g->info.dim[0]));
85
2.04k
  assert(!bias || bias->info.dim[0] == gdim[0]);
86
2.04k
  int i, j;
87
2.04k
  float* gp = g->data.f32;
88
2.04k
  const int g_batch_inc = CCV_IS_TENSOR_VIEW(g) ? 
((g_nd == 1) 6
?
g->inc[0]0
:
g->inc[1]6
) :
gdim[0]2.04k
;
89
2.04k
  if (bias)
90
40
  {
91
40
    float* bp = bias->data.f32;
92
99
    for (i = 0; i < batch_size; 
i++59
)
93
59
    {
94
3.76k
      for (j = 0; j < gdim[0]; 
j++3.70k
)
95
3.70k
        bp[j] += gp[j];
96
59
      gp += g_batch_inc;
97
59
    }
98
40
  }
99
2.04k
  assert(gdim[0] == dw->info.dim[0]);
100
2.04k
  assert(adim[0] == dw->info.dim[1]);
101
2.04k
  const int a_batch_inc = CCV_IS_TENSOR_VIEW(a) ? 
((a_nd == 1) 6
?
a->inc[0]0
:
a->inc[1]6
) :
adim[0]2.04k
;
102
6.13k
  for (i = 0; i < batch_size; 
i++4.08k
)
103
4.08k
  {
104
4.08k
    const float* const gp = g->data.f32 + i * g_batch_inc;
105
4.08k
    const float* const ap = a->data.f32 + i * a_batch_inc;
106
4.08k
    parallel_for(j, gdim[0]) {
107
0
      float* const dwp = dw->data.f32 + j * dwinc[1];
108
0
      const float v = gp[j];
109
0
      int k;
110
88.1k
      for (k = 0; k < adim[0]; k++)
111
88.1k
        dwp[k] += ap[k] * v;
112
4.08k
    } parallel_endfor
113
4.08k
  }
114
2.04k
  ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
115
2.04k
  if (h)
116
2.04k
  {
117
2.04k
    const ccv_nnc_tensor_view_t* w = (const ccv_nnc_tensor_view_t*)inputs[2];
118
2.04k
    assert(h->info.dim[2] == 0); // It is a 2-d array.
119
2.04k
    const int h_nd = ccv_nnc_tensor_nd(h->info.dim);
120
2.04k
    assert(h_nd == 1 || h_nd == 2);
121
2.04k
    const int* hdim = (h_nd == 1) ? 
h->info.dim1
:
h->info.dim + 12.04k
;
122
2.04k
    assert(hdim[0] == adim[0]);
123
2.04k
    assert(batch_size == (h_nd == 1) ? 1 : ccv_max(1, h->info.dim[0]));
124
2.04k
    const int h_batch_inc = CCV_IS_TENSOR_VIEW(h) ? 
((h_nd == 1) 6
?
h->inc[0]0
:
h->inc[1]6
) :
hdim[0]2.04k
;
125
2.04k
    const int* winc = CCV_IS_TENSOR_VIEW(w) ? 
w->inc0
: w->info.dim;
126
6.13k
    for (i = 0; i < batch_size; 
i++4.08k
)
127
4.08k
    {
128
4.08k
      const float* const gp = g->data.f32 + i * g_batch_inc;
129
4.08k
      float* const hp = h->data.f32 + i * h_batch_inc;
130
4.08k
      parallel_for(j, hdim[0]) {
131
0
        const float* const wp = w->data.f32 + j;
132
0
        float v = 0;
133
0
        int k;
134
93.2k
        for (k = 0; k < gdim[0]; k++)
135
93.2k
          v += wp[k * winc[1]] * gp[k];
136
0
        hp[j] = v;
137
4.08k
      } parallel_endfor
138
4.08k
    }
139
2.04k
  }
140
2.04k
  return CCV_NNC_EXEC_SUCCESS;
141
2.04k
}
142
143
REGISTER_COMMAND_BACKEND(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
144
1
{
145
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
146
1
  registry->tensor_datatypes = CCV_32F;
147
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
148
1
  registry->algorithms = 1;
149
1
  registry->exec = _ccv_nnc_gemm_forw;
150
1
}
151
152
REGISTER_COMMAND_BACKEND(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
153
1
{
154
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
155
1
  registry->tensor_datatypes = CCV_32F;
156
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
157
1
  registry->algorithms = 1;
158
1
  registry->exec = _ccv_nnc_gemm_back;
159
1
}