Coverage Report

Created: 2017-11-12 13:27

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/blas/ccv_nnc_gemm_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include <ccv.h>
2
#include <ccv_internal.h>
3
#include <nnc/ccv_nnc.h>
4
#include <nnc/ccv_nnc_easy.h>
5
#include <nnc/ccv_nnc_internal.h>
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_gemm_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* const stream_context)
14
23
{
15
23
  assert(input_size == 3);
16
23
  const ccv_nnc_tensor_view_t* a = (const ccv_nnc_tensor_view_t*)inputs[0];
17
23
  const ccv_nnc_tensor_view_t* w = (const ccv_nnc_tensor_view_t*)inputs[1];
18
23
  const ccv_nnc_tensor_view_t* bias = (const ccv_nnc_tensor_view_t*)inputs[2];
19
23
  // Copy the most of parameters, but reshape the dimension of a to a vector.
20
23
  assert(a->info.dim[2] == 0); // It is a 2-d array.
21
23
  assert(output_size == 1);
22
23
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
23
23
  assert(b->info.dim[2] == 0); // It is a 2-d array.
24
23
  assert(w->info.dim[2] == 0); // It is a 2-d array
25
23
  assert(bias->info.dim[1] == 0); // It is a 1-d array
26
23
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
27
23
  assert(a_nd == 1 || a_nd == 2);
28
19
  const int* adim = (a_nd == 1) ? 
a->info.dim19
:
a->info.dim + 14
;
29
23
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
30
23
  assert(b_nd == 1 || b_nd == 2);
31
19
  const int* bdim = (b_nd == 1) ? 
b->info.dim19
:
b->info.dim + 14
;
32
19
  const int batch_size = a_nd == 1 ? 
119
:
ccv_max4
(1, a->info.dim[0]);
33
23
  assert(batch_size == (b_nd == 1) ? 1 : ccv_max(1, b->info.dim[0]));
34
23
  assert(bdim[0] == bias->info.dim[0]);
35
23
  assert(bdim[0] == w->info.dim[0]);
36
23
  assert(adim[0] == w->info.dim[1]);
37
23
  const int* ainc = 
CCV_IS_TENSOR_VIEW23
(a) ?
(a_nd == 1 ? 3
a->inc0
:
a->inc + 13
) :
adim20
;
38
23
  const int* binc = 
CCV_IS_TENSOR_VIEW23
(b) ?
(b_nd == 1 ? 3
b->inc0
:
b->inc + 13
) :
bdim20
;
39
23
  const int* winc = 
CCV_IS_TENSOR_VIEW23
(w) ?
w->inc0
:
w->info.dim23
;
40
23
  int i;
41
47
  for (i = 0; 
i < batch_size47
;
i++24
)
42
24
  {
43
24
    const float* const ap = a->data.f32 + i * ainc[0];
44
24
    float* const bp = b->data.f32 + i * binc[0];
45
24
    
parallel_for24
(j, bdim[0]) {0
46
0
      float v = bias->data.f32[j];
47
0
      const float* const wp = w->data.f32 + j * winc[1];
48
0
      int k;
49
53.9M
      for (k = 0; 
k < adim[0]53.9M
;
k++53.9M
)
50
53.9M
        v += wp[k] * ap[k];
51
0
      bp[j] = v;
52
24
    } parallel_endfor
53
24
  }
54
23
  return CCV_NNC_EXEC_SUCCESS;
55
23
}
56
57
static int _ccv_nnc_gemm_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* const stream_context)
58
2
{
59
2
  // inputs: gradient, forw prop input, [w]
60
2
  // outputs: [output gradient], weight updates, bias updates
61
2
  assert((input_size == 2 && output_size == 3) || (input_size == 3 && output_size == 3));
62
2
  const ccv_nnc_tensor_view_t* g = (const ccv_nnc_tensor_view_t*)inputs[0];
63
2
  assert(g->info.dim[2] == 0); // It is a 2-d array.
64
2
  const ccv_nnc_tensor_view_t* a = (const ccv_nnc_tensor_view_t*)inputs[1];
65
2
  assert(a->info.dim[2] == 0); // It is a 2-d array.
66
2
  ccv_nnc_tensor_view_t* dw = (ccv_nnc_tensor_view_t*)outputs[1];
67
2
  assert(dw->info.dim[2] == 0); // It is a 2-d array.
68
2
  ccv_nnc_tensor_view_t* bias = (ccv_nnc_tensor_view_t*)outputs[2];
69
2
  assert(bias->info.dim[1] == 0); // It is a 1-d array.
70
2
  const int* dwinc = 
CCV_IS_TENSOR_VIEW2
(dw) ?
dw->inc0
:
dw->info.dim2
;
71
2
  if (!(flags & CCV_NNC_ACCUMULATE_OUTPUT)) // reset the gradients to 0
72
2
  {
73
2
    memset(dw->data.u8, 0, sizeof(float) * dwinc[1] * dw->info.dim[0]);
74
2
    memset(bias->data.u8, 0, sizeof(float) * bias->info.dim[0]);
75
2
  }
76
2
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
77
2
  assert(a_nd == 1 || a_nd == 2);
78
1
  const int* adim = (a_nd == 1) ? 
a->info.dim1
:
a->info.dim + 11
;
79
2
  const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
80
2
  assert(g_nd == 1 || g_nd == 2);
81
1
  const int* gdim = (g_nd == 1) ? 
g->info.dim1
:
g->info.dim + 11
;
82
1
  const int batch_size = a_nd == 1 ? 
11
:
ccv_max1
(1, a->info.dim[0]);
83
2
  assert(batch_size == (g_nd == 1) ? 1 : ccv_max(1, g->info.dim[0]));
84
2
  assert(bias->info.dim[0] == gdim[0]);
85
2
  int i, j;
86
2
  float* gp = g->data.f32;
87
2
  float* bp = bias->data.f32;
88
2
  const int* ginc = 
CCV_IS_TENSOR_VIEW2
(g) ?
((g_nd == 1) ? 0
g->inc0
:
g->inc + 10
) :
gdim2
;
89
5
  for (i = 0; 
i < batch_size5
;
i++3
)
90
3
  {
91
15
    for (j = 0; 
j < gdim[0]15
;
j++12
)
92
12
      bp[j] += gp[j];
93
3
    gp += ginc[0];
94
3
  }
95
2
  assert(gdim[0] == dw->info.dim[0]);
96
2
  assert(adim[0] == dw->info.dim[1]);
97
2
  const int* ainc = 
CCV_IS_TENSOR_VIEW2
(a) ?
((a_nd == 1) ? 0
a->inc0
:
a->inc + 10
) :
adim2
;
98
5
  for (i = 0; 
i < batch_size5
;
i++3
)
99
3
  {
100
3
    const float* const gp = g->data.f32 + i * ginc[0];
101
3
    const float* const ap = a->data.f32 + i * ainc[0];
102
3
    
parallel_for3
(j, gdim[0]) {0
103
0
      float* const dwp = dw->data.f32 + j * dwinc[1];
104
0
      const float v = gp[j];
105
0
      int k;
106
30
      for (k = 0; 
k < adim[0]30
;
k++30
)
107
30
        dwp[k] += ap[k] * v;
108
3
    } parallel_endfor
109
3
  }
110
2
  ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
111
2
  if (h)
112
2
  {
113
2
    const ccv_nnc_tensor_view_t* w = (const ccv_nnc_tensor_view_t*)inputs[2];
114
2
    assert(h->info.dim[2] == 0); // It is a 2-d array.
115
2
    const int h_nd = ccv_nnc_tensor_nd(h->info.dim);
116
2
    assert(h_nd == 1 || h_nd == 2);
117
1
    const int* hdim = (h_nd == 1) ? 
h->info.dim1
:
h->info.dim + 11
;
118
2
    assert(hdim[0] == adim[0]);
119
2
    assert(batch_size == (h_nd == 1) ? 1 : ccv_max(1, h->info.dim[0]));
120
2
    const int* hinc = 
CCV_IS_TENSOR_VIEW2
(h) ?
((h_nd == 1) ? 0
h->inc0
:
h->inc + 10
) :
hdim2
;
121
2
    const int* winc = 
CCV_IS_TENSOR_VIEW2
(w) ?
w->inc0
:
w->info.dim2
;
122
5
    for (i = 0; 
i < batch_size5
;
i++3
)
123
3
    {
124
3
      const float* const gp = g->data.f32 + i * ginc[0];
125
3
      float* const hp = h->data.f32 + i * hinc[0];
126
3
      
parallel_for3
(j, hdim[0]) {0
127
0
        const float* const wp = w->data.f32 + j;
128
0
        float v = 0;
129
0
        int k;
130
29
        for (k = 0; 
k < gdim[0]29
;
k++29
)
131
29
          v += wp[k * winc[1]] * gp[k];
132
0
        hp[j] = v;
133
3
      } parallel_endfor
134
3
    }
135
2
  }
136
2
  return CCV_NNC_EXEC_SUCCESS;
137
2
}
138
139
REGISTER_COMMAND_BACKEND(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
140
1
{
141
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
142
1
  registry->tensor_datatypes = CCV_32F;
143
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
144
1
  registry->algorithms = 1;
145
1
  registry->exec = _ccv_nnc_gemm_forw;
146
1
}
147
148
REGISTER_COMMAND_BACKEND(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
149
1
{
150
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
151
1
  registry->tensor_datatypes = CCV_32F;
152
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
153
1
  registry->algorithms = 1;
154
1
  registry->exec = _ccv_nnc_gemm_back;
155
1
}