Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/blas/ccv_nnc_gemm_cpu_opt.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
7
#include "_ccv_nnc_gemm_cpu_opt.h"
8
9
FIND_FILE(cpu_opt/_ccv_nnc_gemm_cpu_opt.c, cpu_sys/_ccv_nnc_gemm_cpu_sys.c)
10
11
enum {
12
  CCV_NNC_CMD_OPT_GEMM_ALGO_DIRECT, // Direct multiplication
13
  CCV_NNC_CMD_OPT_GEMM_ALGO_SYSTEM, // Use system GEMM
14
  CCV_NNC_CMD_OPT_GEMM_ALGO_COUNT
15
};
16
17
static int _ccv_nnc_gemm_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
18
2.56k
{
19
2.56k
  assert(input_size >= 2);
20
2.56k
  const ccv_nnc_tensor_view_t* w = (const ccv_nnc_tensor_view_t*)inputs[1];
21
2.56k
  const ccv_nnc_tensor_view_t* bias = input_size > 2 ? 
(const ccv_nnc_tensor_view_t*)inputs[2]878
:
01.69k
;
22
2.56k
  const ccv_nnc_tensor_view_t* a = (const ccv_nnc_tensor_view_t*)inputs[0];
23
2.56k
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
24
  // Cannot compute if w is not transposed and dimensions are batched.
25
  // Copy the most of parameters, but reshape the dimension of a to a vector.
26
2.56k
  assert(output_size == 1);
27
2.56k
  switch (cmd.algorithm)
28
2.56k
  {
29
289
    case CCV_NNC_CMD_OPT_GEMM_ALGO_DIRECT:
30
      // Cannot handle this with DIRECT.
31
289
      if (ccv_nnc_tensor_nd(a->info.dim) > 2 || ccv_nnc_tensor_nd(b->info.dim) > 2 ||
32
289
        ccv_nnc_tensor_nd(w->info.dim) > 2 ||
33
289
        (bias && 
ccv_nnc_tensor_nd(bias->info.dim) > 1130
) ||
34
289
        cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1] ||
35
289
        cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1])
36
3
        break;
37
286
      return _ccv_nnc_gemm_forw_cpu_opt(a, w, bias, b);
38
2.28k
    case CCV_NNC_CMD_OPT_GEMM_ALGO_SYSTEM:
39
2.28k
      return _ccv_nnc_gemm_forw_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, a, w, bias, b);
40
0
    case -1:
41
      // Pass-through
42
0
      break;
43
2.56k
  }
44
3
#if (defined HAVE_CBLAS || defined HAVE_ACCELERATE_FRAMEWORK)
45
3
  return _ccv_nnc_gemm_forw_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, a, w, bias, b);
46
0
#endif
47
0
  if (ccv_nnc_tensor_nd(a->info.dim) > 2 || ccv_nnc_tensor_nd(b->info.dim) > 2 ||
48
0
    ccv_nnc_tensor_nd(w->info.dim) > 2 ||
49
0
    (bias && ccv_nnc_tensor_nd(bias->info.dim) > 1) ||
50
0
    cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1] ||
51
0
    cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1])
52
0
    return CCV_NNC_EXEC_INVALID;
53
0
  assert(w->info.dim[2] == 0); // It is a 2-d array
54
0
  assert(!bias || bias->info.dim[1] == 0); // It is a 1-d array
55
0
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
56
0
  assert(a_nd == 1 || a_nd == 2);
57
0
  const int* adim = (a_nd == 1) ? a->info.dim : a->info.dim + 1;
58
0
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
59
0
  assert(b_nd == 1 || b_nd == 2);
60
0
  const int* bdim = (b_nd == 1) ? b->info.dim : b->info.dim + 1;
61
0
  const int batch_size = a_nd == 1 ? 1 : ccv_max(1, a->info.dim[0]);
62
0
  assert(batch_size == (b_nd == 1) ? 1 : ccv_max(1, b->info.dim[0]));
63
0
  assert(!bias || bdim[0] == bias->info.dim[0]);
64
0
  assert(bdim[0] == w->info.dim[0]);
65
0
  assert(adim[0] == w->info.dim[1]);
66
0
  return _ccv_nnc_gemm_forw_cpu_opt(a, w, bias, b);
67
0
}
68
69
static int _ccv_nnc_gemm_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
70
4.90k
{
71
  // inputs: gradient, forw prop input, [w]
72
  // outputs: [output gradient], weight updates, bias updates
73
4.90k
  assert(input_size >= 2 && output_size >= 2);
74
4.90k
  const ccv_nnc_tensor_view_t* g = (const ccv_nnc_tensor_view_t*)inputs[0];
75
4.90k
  const ccv_nnc_tensor_view_t* a = (const ccv_nnc_tensor_view_t*)inputs[1];
76
4.90k
  ccv_nnc_tensor_view_t* dw = (ccv_nnc_tensor_view_t*)outputs[1];
77
4.90k
  ccv_nnc_tensor_view_t* bias = output_size > 2 ? 
(ccv_nnc_tensor_view_t*)outputs[2]3.07k
:
01.83k
;
78
4.90k
  const ccv_nnc_tensor_view_t* w = (input_size > 2) ? (const ccv_nnc_tensor_view_t*)inputs[2] : 
00
;
79
4.90k
  ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
80
  // Cannot compute if w is not transposed and dimensions are batched.
81
4.90k
  switch (cmd.algorithm)
82
4.90k
  {
83
99
    case CCV_NNC_CMD_OPT_GEMM_ALGO_DIRECT:
84
      // Cannot handle this with DIRECT.
85
99
      if ((!a || 
ccv_nnc_tensor_nd(a->info.dim) > 296
) ||
ccv_nnc_tensor_nd(g->info.dim) > 296
||
86
99
        
(96
!dw96
||
ccv_nnc_tensor_nd(dw->info.dim) > 296
) ||
87
99
        
(96
bias96
&&
ccv_nnc_tensor_nd(bias->info.dim) > 157
) ||
88
99
        
cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1]96
||
89
99
        
cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1]96
)
90
6
        break;
91
93
      return _ccv_nnc_gemm_back_cpu_opt(g, a, w, dw, bias, h, flags);
92
4.80k
    case CCV_NNC_CMD_OPT_GEMM_ALGO_SYSTEM:
93
4.80k
      return _ccv_nnc_gemm_back_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, g, a, w, dw, bias, h, flags);
94
0
    case -1:
95
      // Pass-through
96
0
      break;
97
4.90k
  }
98
6
#if (defined HAVE_CBLAS || defined HAVE_ACCELERATE_FRAMEWORK)
99
6
  return _ccv_nnc_gemm_back_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, g, a, w, dw, bias, h, flags);
100
#else
101
  if (ccv_nnc_tensor_nd(a->info.dim) > 2 || ccv_nnc_tensor_nd(g->info.dim) > 2 ||
102
    ccv_nnc_tensor_nd(dw->info.dim) > 2 ||
103
    (bias && ccv_nnc_tensor_nd(bias->info.dim) > 1) ||
104
    cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1] ||
105
    cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1])
106
    return CCV_NNC_EXEC_INVALID;
107
  assert(dw->info.dim[2] == 0); // It is a 2-d array.
108
  assert(!bias || bias->info.dim[1] == 0); // It is a 1-d array.
109
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
110
  assert(a_nd == 1 || a_nd == 2);
111
  const int* adim = (a_nd == 1) ? a->info.dim : a->info.dim + 1;
112
  const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
113
  assert(g_nd == 1 || g_nd == 2);
114
  const int* gdim = (g_nd == 1) ? g->info.dim : g->info.dim + 1;
115
  const int batch_size = a_nd == 1 ? 1 : ccv_max(1, a->info.dim[0]);
116
  assert(batch_size == (g_nd == 1) ? 1 : ccv_max(1, g->info.dim[0]));
117
  assert(!bias || bias->info.dim[0] == gdim[0]);
118
  assert(gdim[0] == dw->info.dim[0]);
119
  assert(adim[0] == dw->info.dim[1]);
120
  if (h)
121
  {
122
    assert(h->info.dim[2] == 0); // It is a 2-d array.
123
    const int h_nd = ccv_nnc_tensor_nd(h->info.dim);
124
    assert(h_nd == 1 || h_nd == 2);
125
    const int* hdim = (h_nd == 1) ? h->info.dim : h->info.dim + 1;
126
    assert(hdim[0] == adim[0]);
127
  }
128
  if (w)
129
  {
130
    assert(w->info.dim[2] == 0); // It is a 2-d array.
131
    assert(w->info.dim[0] == dw->info.dim[0]);
132
    assert(w->info.dim[1] == dw->info.dim[1]);
133
  }
134
  return _ccv_nnc_gemm_back_cpu_opt(g, a, w, dw, bias, h, flags);
135
#endif
136
4.90k
}
137
138
REGISTER_COMMAND_BACKEND(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_CPU_OPT)(ccv_nnc_cmd_backend_registry_t* const registry)
139
1
{
140
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
141
1
  registry->tensor_datatypes = CCV_32F;
142
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
143
1
  registry->algorithms = CCV_NNC_CMD_OPT_GEMM_ALGO_COUNT;
144
1
  registry->exec = _ccv_nnc_gemm_forw;
145
1
}
146
147
REGISTER_COMMAND_BACKEND(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_CPU_OPT)(ccv_nnc_cmd_backend_registry_t* const registry)
148
1
{
149
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
150
1
  registry->tensor_datatypes = CCV_32F;
151
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
152
1
  registry->algorithms = CCV_NNC_CMD_OPT_GEMM_ALGO_COUNT;
153
1
  registry->exec = _ccv_nnc_gemm_back;
154
1
}