Coverage Report

Created: 2019-07-03 22:50

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/blas/ccv_nnc_blas.c
Line
Count
Source (jump to first uncovered line)
1
#include <ccv.h>
2
#include <nnc/ccv_nnc.h>
3
#include <nnc/ccv_nnc_internal.h>
4
5
static int _ccv_nnc_arbitary_inplace(const int input_idx, const int input_size, const int output_idx, const int output_size)
6
34
{
7
34
  return 1;
8
34
}
9
10
static int _ccv_nnc_gemm_forw_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
11
156
{
12
156
  if (input_size == 3 && (input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2)) && 
output_bitmasks[0] == 1u39
)
13
39
    return 1;
14
117
  // No bias is OK.
15
117
  if (input_size == 2 && 
(input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1))0
&&
output_bitmasks[0] == 1u0
)
16
0
    return 1;
17
117
  return 0;
18
117
}
19
20
static int _ccv_nnc_gemm_back_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
21
12.2k
{
22
12.2k
  // Output the propagated error, gradient w.r.t. w and bias.
23
12.2k
  if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2) | (0 << 3)) && 
output_bitmasks[0] == ((1u << 0) | (1u << 1) | (1u << 2))6.10k
)
24
1.04k
    return 1;
25
11.2k
  // No bias.
26
11.2k
  if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2) | (0 << 3)) && 
output_bitmasks[0] == ((1u << 0) | (1u << 1) | (0u << 2))5.06k
)
27
3.03k
    return 1;
28
8.17k
  // Don't propagate error, only gradient w.r.t. w and bias.
29
8.17k
  if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (0 << 2) | (0 << 3)) && 
output_bitmasks[0] == ((0 << 0) | (1u << 1) | (1u << 2))2.05k
)
30
0
    return 1;
31
8.17k
  // No bias.
32
8.17k
  if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (0 << 2) | (0 << 3)) && 
output_bitmasks[0] == ((0 << 0) | (1u << 1) | (0u << 2))2.05k
)
33
0
    return 1;
34
8.17k
  return 0;
35
8.17k
}
36
37
static void _ccv_nnc_gemm_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
38
2.14k
{
39
2.14k
  assert(output_size == 1);
40
2.14k
  outputs[0].type = inputs[0].type;
41
2.14k
  outputs[0].format = inputs[0].format;
42
2.14k
  outputs[0].datatype = inputs[0].datatype;
43
2.14k
  outputs[0].dim[0] = inputs[0].dim[0]; // batch size.
44
2.14k
  outputs[0].dim[1] = inputs[1].dim[0]; // from the weight matrix.
45
2.14k
  assert(inputs[1].dim[0] == cmd.blas.count);
46
2.14k
}
47
48
REGISTER_COMMAND(CCV_NNC_GEMM_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
49
  FIND_BACKEND(ccv_nnc_gemm_cpu_ref.c, ccv_nnc_gemm_cpu_opt.c, gpu/ccv_nnc_gemm_gpu_cublas.cu)
50
1
{
51
1
  registry->bitmask = _ccv_nnc_gemm_forw_bitmask;
52
1
  registry->tensor_auto = _ccv_nnc_gemm_tensor_auto_forw;
53
1
}
54
55
REGISTER_COMMAND(CCV_NNC_GEMM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
56
  FIND_BACKEND(ccv_nnc_gemm_cpu_ref.c, ccv_nnc_gemm_cpu_opt.c, gpu/ccv_nnc_gemm_gpu_cublas.cu)
57
1
{
58
1
  registry->bitmask = _ccv_nnc_gemm_back_bitmask;
59
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs;
60
1
}
61
62
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_GEMM_FORWARD)
63
#define CMD_GEMM_FORWARD(_count) ccv_nnc_cmd(CCV_NNC_GEMM_FORWARD, 0, CMD_GEMM(_count), 0)
64
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_GEMM_BACKWARD)
65
#define CMD_GEMM_BACKWARD(_count) ccv_nnc_cmd(CCV_NNC_GEMM_BACKWARD, 0, CMD_GEMM(_count), 0)
66
67
static int _ccv_nnc_add_forw_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
68
12
{
69
12
  if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && 
output_bitmasks[0] == 1u4
)
70
4
    return 1;
71
8
  return 0;
72
8
}
73
74
static int _ccv_nnc_add_back_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
75
73
{
76
73
  // w.r.t. both x and y
77
73
  if ((input_bitmasks[0] & 1u) == 1u && 
output_bitmasks[0] == ((1u << 0) | (1u << 1))53
)
78
33
    return 1;
79
40
  // w.r.t. x
80
40
  if ((input_bitmasks[0] & 1u) == 1u && 
output_bitmasks[0] == ((1u << 0) | (0u << 1))20
)
81
20
    return 1;
82
20
  // w.r.t. y
83
20
  if ((input_bitmasks[0] & 1u) == 1u &&  
output_bitmasks[0] == ((0u << 0) | (1u << 1))0
)
84
0
    return 1;
85
20
  return 0;
86
20
}
87
88
REGISTER_COMMAND(CCV_NNC_ADD_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
89
  FIND_BACKEND(ccv_nnc_add_cpu_ref.c, gpu/ccv_nnc_add_gpu_cudnn.cu)
90
1
{
91
1
  registry->bitmask = _ccv_nnc_add_forw_bitmask;
92
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
93
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
94
1
}
95
96
REGISTER_COMMAND(CCV_NNC_ADD_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
97
  FIND_BACKEND(ccv_nnc_add_cpu_ref.c, gpu/ccv_nnc_add_gpu_cudnn.cu)
98
1
{
99
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
100
1
  registry->bitmask = _ccv_nnc_add_back_bitmask;
101
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs;
102
1
}
103
104
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_ADD_FORWARD)
105
#define CMD_ADD_FORWARD(...) ccv_nnc_cmd(CCV_NNC_ADD_FORWARD, 0, CMD_BLAS(__VA_ARGS__), 0)
106
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_ADD_BACKWARD)
107
#define CMD_ADD_BACKWARD(...) ccv_nnc_cmd(CCV_NNC_ADD_BACKWARD, 0, CMD_BLAS(__VA_ARGS__), 0)
108
109
static int _ccv_nnc_mul_forw_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
110
0
{
111
0
  if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && output_bitmasks[0] == 1u)
112
0
    return 1;
113
0
  return 0;
114
0
}
115
116
static int _ccv_nnc_mul_back_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
117
12
{
118
12
  // w.r.t. both x and y
119
12
  if ((input_bitmasks[0] & 7u) == 7u && 
output_bitmasks[0] == ((1u << 0) | (1u << 1))4
)
120
4
    return 1;
121
8
  // w.r.t. x
122
8
  if ((input_bitmasks[0] & 5u) == 5u && 
output_bitmasks[0] == ((1u << 0) | (0u << 1))4
)
123
0
    return 1;
124
8
  // w.r.t. y
125
8
  if ((input_bitmasks[0] & 3u) == 3u && 
output_bitmasks[0] == ((0u << 0) | (1u << 1))4
)
126
0
    return 1;
127
8
  return 0;
128
8
}
129
130
REGISTER_COMMAND(CCV_NNC_MUL_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
131
  FIND_BACKEND(ccv_nnc_mul_cpu_ref.c)
132
1
{
133
1
  registry->bitmask = _ccv_nnc_mul_forw_bitmask;
134
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
135
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
136
1
}
137
138
REGISTER_COMMAND(CCV_NNC_MUL_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
139
  FIND_BACKEND(ccv_nnc_mul_cpu_ref.c)
140
1
{
141
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
142
1
  registry->bitmask = _ccv_nnc_mul_back_bitmask;
143
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs;
144
1
}
145
146
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_MUL_FORWARD)
147
#define CMD_MUL_FORWARD(...) ccv_nnc_cmd(CCV_NNC_MUL_FORWARD, 0, CMD_BLAS(__VA_ARGS__), 0)
148
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_MUL_BACKWARD)
149
#define CMD_MUL_BACKWARD(...) ccv_nnc_cmd(CCV_NNC_MUL_BACKWARD, 0, CMD_BLAS(__VA_ARGS__), 0)
150
151
static int _ccv_nnc_scalar_mul_forw_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
152
0
{
153
0
  if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 1u)
154
0
    return 1;
155
0
  return 0;
156
0
}
157
158
static int _ccv_nnc_scalar_mul_back_bitmask(const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
159
10
{
160
10
  // w.r.t. x
161
10
  if ((input_bitmasks[0] & 1u) == 1u && 
output_bitmasks[0] == 1u6
)
162
6
    return 1;
163
4
  return 0;
164
4
}
165
166
REGISTER_COMMAND(CCV_NNC_SCALAR_MUL_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
167
  FIND_BACKEND(ccv_nnc_mul_cpu_ref.c)
168
1
{
169
1
  registry->bitmask = _ccv_nnc_scalar_mul_forw_bitmask;
170
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
171
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
172
1
}
173
174
REGISTER_COMMAND(CCV_NNC_SCALAR_MUL_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
175
  FIND_BACKEND(ccv_nnc_mul_cpu_ref.c)
176
1
{
177
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
178
1
  registry->bitmask = _ccv_nnc_scalar_mul_back_bitmask;
179
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs;
180
1
}
181
182
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_SCALAR_MUL_FORWARD)
183
#define CMD_SCALAR_MUL_FORWARD(...) ccv_nnc_cmd(CCV_NNC_SCALAR_MUL_FORWARD, 0, CMD_BLAS(__VA_ARGS__), 0)
184
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_SCALAR_MUL_BACKWARD)
185
#define CMD_SCALAR_MUL_BACKWARD(...) ccv_nnc_cmd(CCV_NNC_SCALAR_MUL_BACKWARD, 0, CMD_BLAS(__VA_ARGS__), 0)