Coverage Report

Created: 2021-04-12 03:25

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/blas/ccv_nnc_gemm_cpu_opt.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
7
#include "_ccv_nnc_gemm_cpu_opt.h"
8
9
FIND_FILE(cpu_opt/_ccv_nnc_gemm_cpu_opt.c, cpu_sys/_ccv_nnc_gemm_cpu_sys.c)
10
11
enum {
12
  CCV_NNC_CMD_OPT_GEMM_ALGO_DIRECT, // Direct multiplication
13
  CCV_NNC_CMD_OPT_GEMM_ALGO_SYSTEM, // Use system GEMM
14
  CCV_NNC_CMD_OPT_GEMM_ALGO_COUNT
15
};
16
17
static int _ccv_nnc_gemm_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
18
7.42k
{
19
7.42k
  assert(input_size >= 2);
20
7.42k
  const ccv_nnc_tensor_view_t* w = (const ccv_nnc_tensor_view_t*)inputs[1];
21
7.42k
  const ccv_nnc_tensor_view_t* bias = input_size > 2 ? 
(const ccv_nnc_tensor_view_t*)inputs[2]6.35k
:
01.07k
;
22
7.42k
  const ccv_nnc_tensor_view_t* a = (const ccv_nnc_tensor_view_t*)inputs[0];
23
7.42k
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
24
7.42k
  // Cannot compute if w is not transposed and dimensions are batched.
25
7.42k
  // Copy the most of parameters, but reshape the dimension of a to a vector.
26
7.42k
  assert(output_size == 1);
27
7.42k
  switch (cmd.algorithm)
28
7.42k
  {
29
162
    case CCV_NNC_CMD_OPT_GEMM_ALGO_DIRECT:
30
162
      // Cannot handle this with DIRECT.
31
162
      if (ccv_nnc_tensor_nd(a->info.dim) > 2 || ccv_nnc_tensor_nd(b->info.dim) > 2 ||
32
162
        ccv_nnc_tensor_nd(w->info.dim) > 2 ||
33
162
        (bias && 
ccv_nnc_tensor_nd(bias->info.dim) > 1135
) ||
34
162
        cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1] ||
35
162
        cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1])
36
3
        break;
37
159
      return _ccv_nnc_gemm_forw_cpu_opt(a, w, bias, b);
38
7.26k
    case CCV_NNC_CMD_OPT_GEMM_ALGO_SYSTEM:
39
7.26k
      return _ccv_nnc_gemm_forw_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, a, w, bias, b);
40
159
    case -1:
41
0
      // Pass-through
42
0
      break;
43
3
  }
44
3
#if (defined HAVE_CBLAS || defined HAVE_ACCELERATE_FRAMEWORK)
45
3
  return _ccv_nnc_gemm_forw_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, a, w, bias, b);
46
3
#endif
47
3
  
if (0
ccv_nnc_tensor_nd(a->info.dim) > 20
||
ccv_nnc_tensor_nd(b->info.dim) > 20
||
48
0
    ccv_nnc_tensor_nd(w->info.dim) > 2 ||
49
0
    (bias && ccv_nnc_tensor_nd(bias->info.dim) > 1) ||
50
0
    cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1] ||
51
0
    cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1])
52
0
    return CCV_NNC_EXEC_INVALID;
53
0
  assert(w->info.dim[2] == 0); // It is a 2-d array
54
0
  assert(!bias || bias->info.dim[1] == 0); // It is a 1-d array
55
0
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
56
0
  assert(a_nd == 1 || a_nd == 2);
57
0
  const int* adim = (a_nd == 1) ? a->info.dim : a->info.dim + 1;
58
0
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
59
0
  assert(b_nd == 1 || b_nd == 2);
60
0
  const int* bdim = (b_nd == 1) ? b->info.dim : b->info.dim + 1;
61
0
  const int batch_size = a_nd == 1 ? 1 : ccv_max(1, a->info.dim[0]);
62
0
  assert(batch_size == (b_nd == 1) ? 1 : ccv_max(1, b->info.dim[0]));
63
0
  assert(!bias || bdim[0] == bias->info.dim[0]);
64
0
  assert(bdim[0] == w->info.dim[0]);
65
0
  assert(adim[0] == w->info.dim[1]);
66
0
  return _ccv_nnc_gemm_forw_cpu_opt(a, w, bias, b);
67
0
}
68
69
static int _ccv_nnc_gemm_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
70
7.34k
{
71
7.34k
  // inputs: gradient, forw prop input, [w]
72
7.34k
  // outputs: [output gradient], weight updates, bias updates
73
7.34k
  assert(input_size >= 2 && output_size >= 2);
74
7.34k
  const ccv_nnc_tensor_view_t* g = (const ccv_nnc_tensor_view_t*)inputs[0];
75
7.34k
  const ccv_nnc_tensor_view_t* a = (const ccv_nnc_tensor_view_t*)inputs[1];
76
7.34k
  ccv_nnc_tensor_view_t* dw = (ccv_nnc_tensor_view_t*)outputs[1];
77
7.34k
  ccv_nnc_tensor_view_t* bias = output_size > 2 ? 
(ccv_nnc_tensor_view_t*)outputs[2]6.32k
:
01.02k
;
78
7.34k
  const ccv_nnc_tensor_view_t* w = (input_size > 2) ? (const ccv_nnc_tensor_view_t*)inputs[2] : 
00
;
79
7.34k
  ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
80
7.34k
  // Cannot compute if w is not transposed and dimensions are batched.
81
7.34k
  switch (cmd.algorithm)
82
7.34k
  {
83
75
    case CCV_NNC_CMD_OPT_GEMM_ALGO_DIRECT:
84
75
      // Cannot handle this with DIRECT.
85
75
      if (ccv_nnc_tensor_nd(a->info.dim) > 2 || ccv_nnc_tensor_nd(g->info.dim) > 2 ||
86
75
        ccv_nnc_tensor_nd(dw->info.dim) > 2 ||
87
75
        (bias && 
ccv_nnc_tensor_nd(bias->info.dim) > 166
) ||
88
75
        cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1] ||
89
75
        cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1])
90
3
        break;
91
72
      return _ccv_nnc_gemm_back_cpu_opt(g, a, w, dw, bias, h, flags);
92
7.26k
    case CCV_NNC_CMD_OPT_GEMM_ALGO_SYSTEM:
93
7.26k
      return _ccv_nnc_gemm_back_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, g, a, w, dw, bias, h, flags);
94
72
    case -1:
95
0
      // Pass-through
96
0
      break;
97
3
  }
98
3
#if (defined HAVE_CBLAS || defined HAVE_ACCELERATE_FRAMEWORK)
99
3
  return _ccv_nnc_gemm_back_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, g, a, w, dw, bias, h, flags);
100
#else
101
  if (ccv_nnc_tensor_nd(a->info.dim) > 2 || ccv_nnc_tensor_nd(g->info.dim) > 2 ||
102
    ccv_nnc_tensor_nd(dw->info.dim) > 2 ||
103
    (bias && ccv_nnc_tensor_nd(bias->info.dim) > 1) ||
104
    cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1] ||
105
    cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1])
106
    return CCV_NNC_EXEC_INVALID;
107
  assert(dw->info.dim[2] == 0); // It is a 2-d array.
108
  assert(!bias || bias->info.dim[1] == 0); // It is a 1-d array.
109
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
110
  assert(a_nd == 1 || a_nd == 2);
111
  const int* adim = (a_nd == 1) ? a->info.dim : a->info.dim + 1;
112
  const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
113
  assert(g_nd == 1 || g_nd == 2);
114
  const int* gdim = (g_nd == 1) ? g->info.dim : g->info.dim + 1;
115
  const int batch_size = a_nd == 1 ? 1 : ccv_max(1, a->info.dim[0]);
116
  assert(batch_size == (g_nd == 1) ? 1 : ccv_max(1, g->info.dim[0]));
117
  assert(!bias || bias->info.dim[0] == gdim[0]);
118
  assert(gdim[0] == dw->info.dim[0]);
119
  assert(adim[0] == dw->info.dim[1]);
120
  if (h)
121
  {
122
    assert(h->info.dim[2] == 0); // It is a 2-d array.
123
    const int h_nd = ccv_nnc_tensor_nd(h->info.dim);
124
    assert(h_nd == 1 || h_nd == 2);
125
    const int* hdim = (h_nd == 1) ? h->info.dim : h->info.dim + 1;
126
    assert(hdim[0] == adim[0]);
127
  }
128
  if (w)
129
  {
130
    assert(w->info.dim[2] == 0); // It is a 2-d array.
131
    assert(w->info.dim[0] == dw->info.dim[0]);
132
    assert(w->info.dim[1] == dw->info.dim[1]);
133
  }
134
  return _ccv_nnc_gemm_back_cpu_opt(g, a, w, dw, bias, h, flags);
135
#endif
136
3
}
137
138
REGISTER_COMMAND_BACKEND(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_CPU_OPT)(ccv_nnc_cmd_backend_registry_t* const registry)
139
1
{
140
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
141
1
  registry->tensor_datatypes = CCV_32F;
142
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
143
1
  registry->algorithms = CCV_NNC_CMD_OPT_GEMM_ALGO_COUNT;
144
1
  registry->exec = _ccv_nnc_gemm_forw;
145
1
}
146
147
REGISTER_COMMAND_BACKEND(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_CPU_OPT)(ccv_nnc_cmd_backend_registry_t* const registry)
148
1
{
149
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
150
1
  registry->tensor_datatypes = CCV_32F;
151
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
152
1
  registry->algorithms = CCV_NNC_CMD_OPT_GEMM_ALGO_COUNT;
153
1
  registry->exec = _ccv_nnc_gemm_back;
154
1
}