Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/util/ccv_nnc_util.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "nnc/ccv_nnc.h"
3
#include "nnc/ccv_nnc_internal.h"
4
5
static int _ccv_nnc_set_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
6
3
{
7
3
  int i, j, flag = 0;
8
3
  int output_bitcount = 0;
9
6
  for (i = 0; i < output_bitmask_size; 
i++3
)
10
3
  {
11
6
    for (j = 0; j < 64; 
j++3
)
12
6
      if (output_bitmasks[i] & (uint64_t)1 << j)
13
3
      {
14
3
        if (flag)
15
0
          return 0;
16
3
      } else
17
3
        break;
18
3
    output_bitcount += j;
19
    // Trailing zero even if it is not the end of input_bitmask_size, mark flag,
20
    // if we encounter additional 1, return invalid.
21
3
    if (j < 64)
22
3
      flag = 1;
23
    // Always like 1111100000, no 1110010101
24
192
    for (; j < 64; 
j++189
)
25
189
      if (output_bitmasks[i] & (uint64_t)1 << j)
26
0
        return 0;
27
3
  }
28
3
  return output_size == output_bitcount;
29
3
}
30
31
REGISTER_COMMAND(CCV_NNC_SET_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
32
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m)
33
1
{
34
1
  registry->bitmask = _ccv_nnc_set_bitmask;
35
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
36
1
}
37
38
REGISTER_COMMAND(CCV_NNC_SET_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
39
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m)
40
1
{
41
1
  registry->bitmask = _ccv_nnc_set_bitmask;
42
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
43
1
}
44
45
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_SET_FORWARD)
46
#define CMD_SET_FORWARD(_val) ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_val,}}}, 0)
47
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_SET_BACKWARD)
48
#define CMD_SET_BACKWARD(_val) ccv_nnc_cmd(CCV_NNC_SET_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_val,}}}, 0)
49
50
static int _ccv_nnc_masked_fill_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
51
8
{
52
8
  if (input_size == 2 && (input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && output_bitmasks[0] == 1u)
53
8
    return 1;
54
0
  return 0;
55
8
}
56
57
static int _ccv_nnc_masked_fill_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
58
32
{
59
32
  if ((input_bitmasks[0] & 5u) == ((1u << 0) | (0u << 1) | (1u << 2)) && 
output_bitmasks[0] == ((1u << 0) | (1u << 1))16
)
60
4
    return 1;
61
28
  if ((input_bitmasks[0] & 5u) == ((1u << 0) | (0u << 1) | (1u << 2)) && 
output_bitmasks[0] == (1u << 0)12
)
62
12
    return 1;
63
16
  return 0;
64
28
}
65
66
REGISTER_COMMAND(CCV_NNC_MASKED_FILL_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
67
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu)
68
1
{
69
1
  registry->bitmask = _ccv_nnc_masked_fill_forw_bitmask;
70
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
71
1
}
72
73
REGISTER_COMMAND(CCV_NNC_MASKED_FILL_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
74
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu)
75
1
{
76
1
  registry->bitmask = _ccv_nnc_masked_fill_back_bitmask;
77
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient_and_inputs;
78
1
}
79
80
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_MASKED_FILL_FORWARD)
81
#define CMD_MASKED_FILL_FORWARD(_eq, _fill) ccv_nnc_cmd(CCV_NNC_MASKED_FILL_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_eq, _fill}}}, 0)
82
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_MASKED_FILL_BACKWARD)
83
#define CMD_MASKED_FILL_BACKWARD(_eq, _fill) ccv_nnc_cmd(CCV_NNC_MASKED_FILL_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_eq, _fill}}}, 0)
84
85
static int _ccv_nnc_data_transfer_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
86
2.07k
{
87
2.07k
  int i, j;
88
2.07k
  int input_flag = 0;
89
2.07k
  int input_bitcount = 0;
90
4.15k
  for (i = 0; i < input_bitmask_size; 
i++2.07k
)
91
2.07k
  {
92
4.16k
    for (j = 0; j < 64; 
j++2.09k
)
93
4.16k
      if (input_bitmasks[i] & (uint64_t)1 << j)
94
2.09k
      {
95
2.09k
        if (input_flag)
96
0
          return 0;
97
2.09k
      } else
98
2.07k
        break;
99
2.07k
    input_bitcount += j;
100
2.07k
    if (j < 64)
101
2.07k
      input_flag = 1;
102
    // Always like 1111100000, no 1110010101
103
132k
    for (; j < 64; 
j++130k
)
104
130k
      if (input_bitmasks[i] & (uint64_t)1 << j)
105
0
        return 0;
106
2.07k
  }
107
2.07k
  int output_flag = 0;
108
2.07k
  int output_bitcount = 0;
109
4.15k
  for (i = 0; i < output_bitmask_size; 
i++2.07k
)
110
2.07k
  {
111
4.16k
    for (j = 0; j < 64; 
j++2.09k
)
112
4.16k
      if (output_bitmasks[i] & (uint64_t)1 << j)
113
2.09k
      {
114
2.09k
        if (output_flag)
115
0
          return 0;
116
2.09k
      } else
117
2.07k
        break;
118
2.07k
    output_bitcount += j;
119
2.07k
    if (j < 64)
120
2.07k
      output_flag = 1;
121
132k
    for (; j < 64; 
j++130k
)
122
130k
      if (output_bitmasks[i] & (uint64_t)1 << j)
123
0
        return 0;
124
2.07k
  }
125
2.07k
  return output_bitcount == input_bitcount && input_size == output_size && input_size == input_bitcount;
126
2.07k
}
127
128
static int _ccv_nnc_data_transfer_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
129
4.28k
{
130
4.28k
  int i, j;
131
4.28k
  int input_flag = 0;
132
4.28k
  int input_bitcount = 0;
133
7.43k
  for (i = 0; i < input_bitmask_size; 
i++3.15k
)
134
4.28k
  {
135
11.6k
    for (j = 0; j < 64; 
j++7.35k
)
136
11.6k
      if (input_bitmasks[i] & (uint64_t)1 << j)
137
7.35k
      {
138
7.35k
        if (input_flag)
139
0
          return 0;
140
7.35k
      } else
141
4.28k
        break;
142
4.28k
    input_bitcount += j;
143
4.28k
    if (j < 64)
144
4.28k
      input_flag = 1;
145
    // Always like 1111100000, no 1110010101
146
200k
    for (; j < 64; 
j++196k
)
147
197k
      if (input_bitmasks[i] & (uint64_t)1 << j)
148
1.13k
        return 0;
149
4.28k
  }
150
3.15k
  int output_flag = 0;
151
3.15k
  int output_bitcount = 0;
152
6.30k
  for (i = 0; i < output_bitmask_size; 
i++3.15k
)
153
3.15k
  {
154
6.32k
    for (j = 0; j < 64; 
j++3.17k
)
155
6.32k
      if (output_bitmasks[i] & (uint64_t)1 << j)
156
3.17k
      {
157
3.17k
        if (output_flag)
158
0
          return 0;
159
3.17k
      } else
160
3.15k
        break;
161
3.15k
    output_bitcount += j;
162
3.15k
    if (j < 64)
163
3.15k
      output_flag = 1;
164
201k
    for (; j < 64; 
j++198k
)
165
198k
      if (output_bitmasks[i] & (uint64_t)1 << j)
166
0
        return 0;
167
3.15k
  }
168
3.15k
  return output_bitcount <= input_bitcount && 
output_bitcount == output_size3.11k
;
169
3.15k
}
170
171
REGISTER_COMMAND(CCV_NNC_DATA_TRANSFER_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
172
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu, mps/ccv_nnc_util_mps.m)
173
1
{
174
1
  registry->bitmask = _ccv_nnc_data_transfer_forw_bitmask;
175
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
176
1
}
177
178
REGISTER_COMMAND(CCV_NNC_DATA_TRANSFER_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
179
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu, mps/ccv_nnc_util_mps.m)
180
1
{
181
1
  registry->bitmask = _ccv_nnc_data_transfer_back_bitmask;
182
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
183
1
}
184
185
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_DATA_TRANSFER_FORWARD)
186
#define CMD_DATA_TRANSFER_FORWARD() ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto, 0)
187
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_DATA_TRANSFER_BACKWARD)
188
#define CMD_DATA_TRANSFER_BACKWARD() ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
189
190
REGISTER_COMMAND(CCV_NNC_FORMAT_TRANSFORM_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
191
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m)
192
1
{
193
1
  registry->bitmask = _ccv_nnc_data_transfer_forw_bitmask;
194
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
195
1
}
196
197
REGISTER_COMMAND(CCV_NNC_FORMAT_TRANSFORM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
198
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m)
199
1
{
200
1
  registry->bitmask = _ccv_nnc_data_transfer_back_bitmask;
201
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
202
1
}
203
204
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_FORMAT_TRANSFORM_FORWARD)
205
#define CMD_FORMAT_TRANSFORM_FORWARD() ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto, 0)
206
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_FORMAT_TRANSFORM_BACKWARD)
207
#define CMD_FORMAT_TRANSFORM_BACKWARD() ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
208
209
static void _ccv_nnc_transpose_tensor_auto(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
210
1.82k
{
211
1.82k
  int i;
212
3.64k
  for (i = 0; i < output_size; 
i++1.82k
)
213
1.82k
  {
214
1.82k
    outputs[i] = inputs[i];
215
1.82k
    int t;
216
1.82k
    CCV_SWAP(outputs[i].dim[cmd.transpose.axis[0]], outputs[i].dim[cmd.transpose.axis[1]], t);
217
1.82k
  }
218
1.82k
}
219
220
REGISTER_COMMAND(CCV_NNC_TRANSPOSE_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
221
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m)
222
1
{
223
1
  registry->bitmask = _ccv_nnc_data_transfer_forw_bitmask;
224
1
  registry->tensor_auto = _ccv_nnc_transpose_tensor_auto;
225
1
}
226
227
REGISTER_COMMAND(CCV_NNC_TRANSPOSE_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
228
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m)
229
1
{
230
1
  registry->bitmask = _ccv_nnc_data_transfer_back_bitmask;
231
1
  registry->tensor_auto = _ccv_nnc_transpose_tensor_auto;
232
1
}
233
234
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_TRANSPOSE_FORWARD)
235
#define CMD_TRANSPOSE_FORWARD(_axis_a, _axis_b) ccv_nnc_cmd(CCV_NNC_TRANSPOSE_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.transpose={.axis={_axis_a, _axis_b}}}), 0)
236
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_TRANSPOSE_BACKWARD)
237
#define CMD_TRANSPOSE_BACKWARD(_axis_a, _axis_b) ccv_nnc_cmd(CCV_NNC_TRANSPOSE_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.transpose={.axis={_axis_a, _axis_b}}}), 0)
238
239
REGISTER_COMMAND(CCV_NNC_DATATYPE_CONVERSION_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
240
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu, mps/ccv_nnc_util_mps.m)
241
1
{
242
1
  registry->bitmask = _ccv_nnc_data_transfer_forw_bitmask;
243
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
244
1
}
245
246
REGISTER_COMMAND(CCV_NNC_DATATYPE_CONVERSION_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
247
  FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu, mps/ccv_nnc_util_mps.m)
248
1
{
249
1
  registry->bitmask = _ccv_nnc_data_transfer_back_bitmask;
250
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
251
1
}
252
253
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_DATATYPE_CONVERSION_FORWARD)
254
#define CMD_DATATYPE_CONVERSION_FORWARD() ccv_nnc_cmd(CCV_NNC_DATATYPE_CONVERSION_FORWARD, 0, ccv_nnc_cmd_auto, 0)
255
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_DATATYPE_CONVERSION_BACKWARD)
256
#define CMD_DATATYPE_CONVERSION_BACKWARD() ccv_nnc_cmd(CCV_NNC_DATATYPE_CONVERSION_BACKWARD, 0, ccv_nnc_cmd_auto, 0)