Coverage Report

Created: 2021-04-12 03:25

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/blas/ccv_nnc_blas.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_internal.h"
5
#include "nnc/ccv_nnc_easy.h"
6
7
static int _ccv_nnc_same_pos_inplace(const int input_idx, const int input_size, const int output_idx, const int output_size)
8
219
{
9
219
  // For cudnnOpTensor: "If the input tensor B is the same tensor as the destination tensor C, then the input tensor A also must be the same tensor as the destination tensor C."
10
219
  return input_idx == output_idx;
11
219
}
12
13
static int _ccv_nnc_gemm_forw_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
14
14.2k
{
15
14.2k
  if (input_size == 3 && 
(input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2))1.98k
&&
output_bitmasks[0] == 1u497
)
16
497
    return 1;
17
13.7k
  // No bias is OK.
18
13.7k
  if (input_size == 2 && 
(input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1))12.2k
&&
output_bitmasks[0] == 1u4.09k
)
19
4.09k
    return 1;
20
9.68k
  return 0;
21
9.68k
}
22
23
static int _ccv_nnc_gemm_back_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
24
38.2k
{
25
38.2k
  // Output the propagated error, gradient w.r.t. w and bias.
26
38.2k
  if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2) | (0 << 3)) && 
output_bitmasks[0] == ((1u << 0) | (1u << 1) | (1u << 2))16.9k
)
27
2.75k
    return 1;
28
35.5k
  // No bias.
29
35.5k
  if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2) | (0 << 3)) && 
output_bitmasks[0] == ((1u << 0) | (1u << 1) | (0u << 2))14.1k
)
30
10.1k
    return 1;
31
25.4k
  // Don't propagate error, only gradient w.r.t. w and bias.
32
25.4k
  if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (0 << 2) | (0 << 3)) && 
output_bitmasks[0] == ((0 << 0) | (1u << 1) | (1u << 2))8.60k
)
33
0
    return 1;
34
25.4k
  // No bias.
35
25.4k
  if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (0 << 2) | (0 << 3)) && 
output_bitmasks[0] == ((0 << 0) | (1u << 1) | (0u << 2))8.60k
)
36
0
    return 1;
37
25.4k
  return 0;
38
25.4k
}
39
40
static void _ccv_nnc_gemm_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
41
23.8k
{
42
23.8k
  assert(output_size == 1);
43
23.8k
  int a_batch_size, a_rows, a_cols, a_batch_inc, a_rows_inc, a_cols_inc;
44
23.8k
  int w_batch_size, w_rows, w_cols, w_batch_inc, w_rows_inc, w_cols_inc;
45
23.8k
  const int a_nd = ccv_nnc_tensor_nd(inputs[0].dim);
46
23.8k
  ccv_nnc_tensor_get_matrix_params(inputs[0], inputs[0].dim, cmd.blas.transpose_a, &a_batch_size, &a_rows, &a_cols, &a_batch_inc, &a_rows_inc, &a_cols_inc);
47
23.8k
  ccv_nnc_tensor_get_matrix_params(inputs[1], inputs[1].dim, cmd.blas.transpose_b, &w_batch_size, &w_rows, &w_cols, &w_batch_inc, &w_rows_inc, &w_cols_inc);
48
23.8k
  outputs[0].type = inputs[0].type;
49
23.8k
  outputs[0].format = inputs[0].format;
50
23.8k
  outputs[0].datatype = inputs[0].datatype;
51
23.8k
  int b_rows = a_rows, b_cols = w_cols;
52
23.8k
  if (a_nd == 1) {
53
16.8k
    outputs[0].dim[0] = b_cols;
54
16.8k
  } else 
if (6.91k
a_nd == 26.91k
) {
55
6.48k
    outputs[0].dim[0] = b_rows;
56
6.48k
    outputs[0].dim[1] = b_cols;
57
6.48k
  } else {
58
428
    assert(a_nd == 3);
59
428
    outputs[0].dim[0] = a_batch_size;
60
428
    outputs[0].dim[1] = b_rows;
61
428
    outputs[0].dim[2] = b_cols;
62
428
  }
63
23.8k
}
64
65
REGISTER_COMMAND(CCV_NNC_GEMM_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
66
  FIND_BACKEND(ccv_nnc_gemm_cpu_ref.c, ccv_nnc_gemm_cpu_opt.c, gpu/ccv_nnc_gemm_gpu_cublas.cu)
67
1
{
68
1
  registry->bitmask = _ccv_nnc_gemm_forw_bitmask;
69
1
  registry->tensor_auto = _ccv_nnc_gemm_tensor_auto_forw;
70
1
}
71
72
REGISTER_COMMAND(CCV_NNC_GEMM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
73
  FIND_BACKEND(ccv_nnc_gemm_cpu_ref.c, ccv_nnc_gemm_cpu_opt.c, gpu/ccv_nnc_gemm_gpu_cublas.cu)
74
1
{
75
1
  registry->bitmask = _ccv_nnc_gemm_back_bitmask;
76
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs;
77
1
}
78
79
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_GEMM_FORWARD)
80
#define CMD_GEMM_FORWARD(...) ccv_nnc_cmd(CCV_NNC_GEMM_FORWARD, 0, CMD_GEMM(__VA_ARGS__), 0)
81
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_GEMM_BACKWARD)
82
#define CMD_GEMM_BACKWARD(...) ccv_nnc_cmd(CCV_NNC_GEMM_BACKWARD, 0, CMD_GEMM(__VA_ARGS__), 0)
83
84
static int _ccv_nnc_add_forw_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
85
78
{
86
78
  if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && 
output_bitmasks[0] == 1u30
)
87
30
    return 1;
88
48
  return 0;
89
48
}
90
91
static int _ccv_nnc_add_back_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
92
15.5k
{
93
15.5k
  // w.r.t. both x and y
94
15.5k
  if ((input_bitmasks[0] & 1u) == 1u && 
output_bitmasks[0] == ((1u << 0) | (1u << 1))11.1k
)
95
2.30k
    return 1;
96
13.2k
  // w.r.t. x
97
13.2k
  if ((input_bitmasks[0] & 1u) == 1u && 
output_bitmasks[0] == ((1u << 0) | (0u << 1))8.82k
)
98
8.82k
    return 1;
99
4.45k
  // w.r.t. y
100
4.45k
  if ((input_bitmasks[0] & 1u) == 1u &&  
output_bitmasks[0] == ((0u << 0) | (1u << 1))0
)
101
0
    return 1;
102
4.45k
  return 0;
103
4.45k
}
104
105
static void _ccv_nnc_broadcast_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
106
12.5k
{
107
12.5k
  assert(input_size >= 2);
108
12.5k
  assert(output_size == 1);
109
12.5k
  const int a_nd = ccv_nnc_tensor_nd(inputs[0].dim);
110
12.5k
  const int b_nd = ccv_nnc_tensor_nd(inputs[1].dim);
111
12.5k
  outputs[0] = inputs[0];
112
12.5k
  const int c_nd = ccv_max(a_nd, b_nd);
113
12.5k
  int i;
114
25.3k
  for (i = a_nd - 1; i >= 0; 
i--12.7k
)
115
12.7k
    outputs[0].dim[i + c_nd - a_nd] = inputs[0].dim[i];
116
25.2k
  for (i = b_nd - 1; i >= 0; 
i--12.6k
)
117
12.6k
    outputs[0].dim[i + c_nd - b_nd] = ccv_max(outputs[0].dim[i + c_nd - b_nd], inputs[1].dim[i]);
118
12.5k
}
119
120
REGISTER_COMMAND(CCV_NNC_ADD_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
121
  FIND_BACKEND(ccv_nnc_add_cpu_ref.c, gpu/ccv_nnc_add_gpu_cudnn.cu)
122
1
{
123
1
  registry->bitmask = _ccv_nnc_add_forw_bitmask;
124
1
  registry->tensor_auto = _ccv_nnc_broadcast_tensor_auto_forw;
125
1
  registry->allow_inplace = _ccv_nnc_same_pos_inplace;
126
1
}
127
128
REGISTER_COMMAND(CCV_NNC_ADD_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
129
  FIND_BACKEND(ccv_nnc_add_cpu_ref.c, gpu/ccv_nnc_add_gpu_cudnn.cu)
130
1
{
131
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
132
1
  registry->bitmask = _ccv_nnc_add_back_bitmask;
133
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs;
134
1
}
135
136
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_ADD_FORWARD)
137
#define CMD_ADD_FORWARD(_p, _q) ccv_nnc_cmd(CCV_NNC_ADD_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_p, _q}}}, 0)
138
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_ADD_BACKWARD)
139
#define CMD_ADD_BACKWARD(_p, _q) ccv_nnc_cmd(CCV_NNC_ADD_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_p, _q}}}, 0)
140
141
static int _ccv_nnc_mul_forw_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
142
66
{
143
66
  if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && 
output_bitmasks[0] == 1u22
)
144
22
    return 1;
145
44
  return 0;
146
44
}
147
148
static int _ccv_nnc_mul_back_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
149
30.0k
{
150
30.0k
  // w.r.t. both x and y
151
30.0k
  if ((input_bitmasks[0] & 7u) == 7u && 
output_bitmasks[0] == ((1u << 0) | (1u << 1))8.02k
)
152
6.02k
    return 1;
153
24.0k
  // w.r.t. x
154
24.0k
  if ((input_bitmasks[0] & 5u) == 5u && 
output_bitmasks[0] == ((1u << 0) | (0u << 1))8.02k
)
155
0
    return 1;
156
24.0k
  // w.r.t. y
157
24.0k
  if ((input_bitmasks[0] & 3u) == 3u && 
output_bitmasks[0] == ((0u << 0) | (1u << 1))10.0k
)
158
6.01k
    return 1;
159
18.0k
  return 0;
160
18.0k
}
161
162
REGISTER_COMMAND(CCV_NNC_MUL_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
163
  FIND_BACKEND(ccv_nnc_mul_cpu_ref.c, gpu/ccv_nnc_mul_gpu_cudnn.cu)
164
1
{
165
1
  registry->bitmask = _ccv_nnc_mul_forw_bitmask;
166
1
  registry->tensor_auto = _ccv_nnc_broadcast_tensor_auto_forw;
167
1
  registry->allow_inplace = _ccv_nnc_same_pos_inplace;
168
1
}
169
170
REGISTER_COMMAND(CCV_NNC_MUL_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
171
  FIND_BACKEND(ccv_nnc_mul_cpu_ref.c, gpu/ccv_nnc_mul_gpu_cudnn.cu)
172
1
{
173
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
174
1
  registry->bitmask = _ccv_nnc_mul_back_bitmask;
175
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs;
176
1
}
177
178
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_MUL_FORWARD)
179
#define CMD_MUL_FORWARD(_p) ccv_nnc_cmd(CCV_NNC_MUL_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_p,}}}, 0)
180
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_MUL_BACKWARD)
181
#define CMD_MUL_BACKWARD(_p) ccv_nnc_cmd(CCV_NNC_MUL_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_p,}}}, 0)
182
183
static int _ccv_nnc_scalar_mul_forw_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
184
12
{
185
12
  if ((input_bitmasks[0] & 1u) == 1u && 
output_bitmasks[0] == 1u6
)
186
6
    return 1;
187
6
  return 0;
188
6
}
189
190
static int _ccv_nnc_scalar_mul_back_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
191
103
{
192
103
  // w.r.t. x
193
103
  if ((input_bitmasks[0] & 1u) == 1u && 
output_bitmasks[0] == 1u69
)
194
69
    return 1;
195
34
  return 0;
196
34
}
197
198
REGISTER_COMMAND(CCV_NNC_SCALAR_MUL_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
199
  FIND_BACKEND(ccv_nnc_mul_cpu_ref.c, gpu/ccv_nnc_mul_gpu_cudnn.cu)
200
1
{
201
1
  registry->bitmask = _ccv_nnc_scalar_mul_forw_bitmask;
202
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
203
1
  registry->allow_inplace = _ccv_nnc_same_pos_inplace;
204
1
}
205
206
REGISTER_COMMAND(CCV_NNC_SCALAR_MUL_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
207
  FIND_BACKEND(ccv_nnc_mul_cpu_ref.c, gpu/ccv_nnc_mul_gpu_cudnn.cu)
208
1
{
209
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
210
1
  registry->bitmask = _ccv_nnc_scalar_mul_back_bitmask;
211
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs;
212
1
}
213
214
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_SCALAR_MUL_FORWARD)
215
#define CMD_SCALAR_MUL_FORWARD(_a) ccv_nnc_cmd(CCV_NNC_SCALAR_MUL_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_a,}}}, 0)
216
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_SCALAR_MUL_BACKWARD)
217
#define CMD_SCALAR_MUL_BACKWARD(_a) ccv_nnc_cmd(CCV_NNC_SCALAR_MUL_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_a,}}}, 0)