/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/util/ccv_nnc_util.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "ccv.h" |
2 | | #include "nnc/ccv_nnc.h" |
3 | | #include "nnc/ccv_nnc_internal.h" |
4 | | |
5 | | static int _ccv_nnc_set_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
6 | 3 | { |
7 | 3 | int i, j, flag = 0; |
8 | 3 | int output_bitcount = 0; |
9 | 6 | for (i = 0; i < output_bitmask_size; i++3 ) |
10 | 3 | { |
11 | 6 | for (j = 0; j < 64; j++3 ) |
12 | 6 | if (output_bitmasks[i] & (uint64_t)1 << j) |
13 | 3 | { |
14 | 3 | if (flag) |
15 | 0 | return 0; |
16 | 3 | } else |
17 | 3 | break; |
18 | 3 | output_bitcount += j; |
19 | | // Trailing zero even if it is not the end of input_bitmask_size, mark flag, |
20 | | // if we encounter additional 1, return invalid. |
21 | 3 | if (j < 64) |
22 | 3 | flag = 1; |
23 | | // Always like 1111100000, no 1110010101 |
24 | 192 | for (; j < 64; j++189 ) |
25 | 189 | if (output_bitmasks[i] & (uint64_t)1 << j) |
26 | 0 | return 0; |
27 | 3 | } |
28 | 3 | return output_size == output_bitcount; |
29 | 3 | } |
30 | | |
31 | | REGISTER_COMMAND(CCV_NNC_SET_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
32 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m) |
33 | 1 | { |
34 | 1 | registry->bitmask = _ccv_nnc_set_bitmask; |
35 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs; |
36 | 1 | } |
37 | | |
38 | | REGISTER_COMMAND(CCV_NNC_SET_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
39 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m) |
40 | 1 | { |
41 | 1 | registry->bitmask = _ccv_nnc_set_bitmask; |
42 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs; |
43 | 1 | } |
44 | | |
45 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_SET_FORWARD) |
46 | | #define CMD_SET_FORWARD(_val) ccv_nnc_cmd(CCV_NNC_SET_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_val,}}}, 0) |
47 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_SET_BACKWARD) |
48 | | #define CMD_SET_BACKWARD(_val) ccv_nnc_cmd(CCV_NNC_SET_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_val,}}}, 0) |
49 | | |
50 | | static int _ccv_nnc_masked_fill_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
51 | 8 | { |
52 | 8 | if (input_size == 2 && (input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && output_bitmasks[0] == 1u) |
53 | 8 | return 1; |
54 | 0 | return 0; |
55 | 8 | } |
56 | | |
57 | | static int _ccv_nnc_masked_fill_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
58 | 32 | { |
59 | 32 | if ((input_bitmasks[0] & 5u) == ((1u << 0) | (0u << 1) | (1u << 2)) && output_bitmasks[0] == ((1u << 0) | (1u << 1))16 ) |
60 | 4 | return 1; |
61 | 28 | if ((input_bitmasks[0] & 5u) == ((1u << 0) | (0u << 1) | (1u << 2)) && output_bitmasks[0] == (1u << 0)12 ) |
62 | 12 | return 1; |
63 | 16 | return 0; |
64 | 28 | } |
65 | | |
66 | | REGISTER_COMMAND(CCV_NNC_MASKED_FILL_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
67 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu) |
68 | 1 | { |
69 | 1 | registry->bitmask = _ccv_nnc_masked_fill_forw_bitmask; |
70 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs; |
71 | 1 | } |
72 | | |
73 | | REGISTER_COMMAND(CCV_NNC_MASKED_FILL_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
74 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu) |
75 | 1 | { |
76 | 1 | registry->bitmask = _ccv_nnc_masked_fill_back_bitmask; |
77 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient_and_inputs; |
78 | 1 | } |
79 | | |
80 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_MASKED_FILL_FORWARD) |
81 | | #define CMD_MASKED_FILL_FORWARD(_eq, _fill) ccv_nnc_cmd(CCV_NNC_MASKED_FILL_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_eq, _fill}}}, 0) |
82 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_MASKED_FILL_BACKWARD) |
83 | | #define CMD_MASKED_FILL_BACKWARD(_eq, _fill) ccv_nnc_cmd(CCV_NNC_MASKED_FILL_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_eq, _fill}}}, 0) |
84 | | |
85 | | static int _ccv_nnc_data_transfer_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
86 | 2.07k | { |
87 | 2.07k | int i, j; |
88 | 2.07k | int input_flag = 0; |
89 | 2.07k | int input_bitcount = 0; |
90 | 4.15k | for (i = 0; i < input_bitmask_size; i++2.07k ) |
91 | 2.07k | { |
92 | 4.16k | for (j = 0; j < 64; j++2.09k ) |
93 | 4.16k | if (input_bitmasks[i] & (uint64_t)1 << j) |
94 | 2.09k | { |
95 | 2.09k | if (input_flag) |
96 | 0 | return 0; |
97 | 2.09k | } else |
98 | 2.07k | break; |
99 | 2.07k | input_bitcount += j; |
100 | 2.07k | if (j < 64) |
101 | 2.07k | input_flag = 1; |
102 | | // Always like 1111100000, no 1110010101 |
103 | 132k | for (; j < 64; j++130k ) |
104 | 130k | if (input_bitmasks[i] & (uint64_t)1 << j) |
105 | 0 | return 0; |
106 | 2.07k | } |
107 | 2.07k | int output_flag = 0; |
108 | 2.07k | int output_bitcount = 0; |
109 | 4.15k | for (i = 0; i < output_bitmask_size; i++2.07k ) |
110 | 2.07k | { |
111 | 4.16k | for (j = 0; j < 64; j++2.09k ) |
112 | 4.16k | if (output_bitmasks[i] & (uint64_t)1 << j) |
113 | 2.09k | { |
114 | 2.09k | if (output_flag) |
115 | 0 | return 0; |
116 | 2.09k | } else |
117 | 2.07k | break; |
118 | 2.07k | output_bitcount += j; |
119 | 2.07k | if (j < 64) |
120 | 2.07k | output_flag = 1; |
121 | 132k | for (; j < 64; j++130k ) |
122 | 130k | if (output_bitmasks[i] & (uint64_t)1 << j) |
123 | 0 | return 0; |
124 | 2.07k | } |
125 | 2.07k | return output_bitcount == input_bitcount && input_size == output_size && input_size == input_bitcount; |
126 | 2.07k | } |
127 | | |
128 | | static int _ccv_nnc_data_transfer_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
129 | 4.28k | { |
130 | 4.28k | int i, j; |
131 | 4.28k | int input_flag = 0; |
132 | 4.28k | int input_bitcount = 0; |
133 | 7.43k | for (i = 0; i < input_bitmask_size; i++3.15k ) |
134 | 4.28k | { |
135 | 11.6k | for (j = 0; j < 64; j++7.35k ) |
136 | 11.6k | if (input_bitmasks[i] & (uint64_t)1 << j) |
137 | 7.35k | { |
138 | 7.35k | if (input_flag) |
139 | 0 | return 0; |
140 | 7.35k | } else |
141 | 4.28k | break; |
142 | 4.28k | input_bitcount += j; |
143 | 4.28k | if (j < 64) |
144 | 4.28k | input_flag = 1; |
145 | | // Always like 1111100000, no 1110010101 |
146 | 200k | for (; j < 64; j++196k ) |
147 | 197k | if (input_bitmasks[i] & (uint64_t)1 << j) |
148 | 1.13k | return 0; |
149 | 4.28k | } |
150 | 3.15k | int output_flag = 0; |
151 | 3.15k | int output_bitcount = 0; |
152 | 6.30k | for (i = 0; i < output_bitmask_size; i++3.15k ) |
153 | 3.15k | { |
154 | 6.32k | for (j = 0; j < 64; j++3.17k ) |
155 | 6.32k | if (output_bitmasks[i] & (uint64_t)1 << j) |
156 | 3.17k | { |
157 | 3.17k | if (output_flag) |
158 | 0 | return 0; |
159 | 3.17k | } else |
160 | 3.15k | break; |
161 | 3.15k | output_bitcount += j; |
162 | 3.15k | if (j < 64) |
163 | 3.15k | output_flag = 1; |
164 | 201k | for (; j < 64; j++198k ) |
165 | 198k | if (output_bitmasks[i] & (uint64_t)1 << j) |
166 | 0 | return 0; |
167 | 3.15k | } |
168 | 3.15k | return output_bitcount <= input_bitcount && output_bitcount == output_size3.11k ; |
169 | 3.15k | } |
170 | | |
171 | | REGISTER_COMMAND(CCV_NNC_DATA_TRANSFER_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
172 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu, mps/ccv_nnc_util_mps.m) |
173 | 1 | { |
174 | 1 | registry->bitmask = _ccv_nnc_data_transfer_forw_bitmask; |
175 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs; |
176 | 1 | } |
177 | | |
178 | | REGISTER_COMMAND(CCV_NNC_DATA_TRANSFER_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
179 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu, mps/ccv_nnc_util_mps.m) |
180 | 1 | { |
181 | 1 | registry->bitmask = _ccv_nnc_data_transfer_back_bitmask; |
182 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs; |
183 | 1 | } |
184 | | |
185 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_DATA_TRANSFER_FORWARD) |
186 | | #define CMD_DATA_TRANSFER_FORWARD() ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto, 0) |
187 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_DATA_TRANSFER_BACKWARD) |
188 | | #define CMD_DATA_TRANSFER_BACKWARD() ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_BACKWARD, 0, ccv_nnc_cmd_auto, 0) |
189 | | |
190 | | REGISTER_COMMAND(CCV_NNC_FORMAT_TRANSFORM_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
191 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m) |
192 | 1 | { |
193 | 1 | registry->bitmask = _ccv_nnc_data_transfer_forw_bitmask; |
194 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs; |
195 | 1 | } |
196 | | |
197 | | REGISTER_COMMAND(CCV_NNC_FORMAT_TRANSFORM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
198 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m) |
199 | 1 | { |
200 | 1 | registry->bitmask = _ccv_nnc_data_transfer_back_bitmask; |
201 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs; |
202 | 1 | } |
203 | | |
204 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_FORMAT_TRANSFORM_FORWARD) |
205 | | #define CMD_FORMAT_TRANSFORM_FORWARD() ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_FORWARD, 0, ccv_nnc_cmd_auto, 0) |
206 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_FORMAT_TRANSFORM_BACKWARD) |
207 | | #define CMD_FORMAT_TRANSFORM_BACKWARD() ccv_nnc_cmd(CCV_NNC_FORMAT_TRANSFORM_BACKWARD, 0, ccv_nnc_cmd_auto, 0) |
208 | | |
209 | | static void _ccv_nnc_transpose_tensor_auto(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size) |
210 | 1.82k | { |
211 | 1.82k | int i; |
212 | 3.64k | for (i = 0; i < output_size; i++1.82k ) |
213 | 1.82k | { |
214 | 1.82k | outputs[i] = inputs[i]; |
215 | 1.82k | int t; |
216 | 1.82k | CCV_SWAP(outputs[i].dim[cmd.transpose.axis[0]], outputs[i].dim[cmd.transpose.axis[1]], t); |
217 | 1.82k | } |
218 | 1.82k | } |
219 | | |
220 | | REGISTER_COMMAND(CCV_NNC_TRANSPOSE_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
221 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m) |
222 | 1 | { |
223 | 1 | registry->bitmask = _ccv_nnc_data_transfer_forw_bitmask; |
224 | 1 | registry->tensor_auto = _ccv_nnc_transpose_tensor_auto; |
225 | 1 | } |
226 | | |
227 | | REGISTER_COMMAND(CCV_NNC_TRANSPOSE_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
228 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_cudnn.cu, mps/ccv_nnc_util_mps.m) |
229 | 1 | { |
230 | 1 | registry->bitmask = _ccv_nnc_data_transfer_back_bitmask; |
231 | 1 | registry->tensor_auto = _ccv_nnc_transpose_tensor_auto; |
232 | 1 | } |
233 | | |
234 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_TRANSPOSE_FORWARD) |
235 | | #define CMD_TRANSPOSE_FORWARD(_axis_a, _axis_b) ccv_nnc_cmd(CCV_NNC_TRANSPOSE_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.transpose={.axis={_axis_a, _axis_b}}}), 0) |
236 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_TRANSPOSE_BACKWARD) |
237 | | #define CMD_TRANSPOSE_BACKWARD(_axis_a, _axis_b) ccv_nnc_cmd(CCV_NNC_TRANSPOSE_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.transpose={.axis={_axis_a, _axis_b}}}), 0) |
238 | | |
239 | | REGISTER_COMMAND(CCV_NNC_DATATYPE_CONVERSION_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
240 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu, mps/ccv_nnc_util_mps.m) |
241 | 1 | { |
242 | 1 | registry->bitmask = _ccv_nnc_data_transfer_forw_bitmask; |
243 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs; |
244 | 1 | } |
245 | | |
246 | | REGISTER_COMMAND(CCV_NNC_DATATYPE_CONVERSION_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
247 | | FIND_BACKEND(ccv_nnc_util_cpu_ref.c, gpu/ccv_nnc_util_gpu_ref.cu, mps/ccv_nnc_util_mps.m) |
248 | 1 | { |
249 | 1 | registry->bitmask = _ccv_nnc_data_transfer_back_bitmask; |
250 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs; |
251 | 1 | } |
252 | | |
253 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_DATATYPE_CONVERSION_FORWARD) |
254 | | #define CMD_DATATYPE_CONVERSION_FORWARD() ccv_nnc_cmd(CCV_NNC_DATATYPE_CONVERSION_FORWARD, 0, ccv_nnc_cmd_auto, 0) |
255 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_DATATYPE_CONVERSION_BACKWARD) |
256 | | #define CMD_DATATYPE_CONVERSION_BACKWARD() ccv_nnc_cmd(CCV_NNC_DATATYPE_CONVERSION_BACKWARD, 0, ccv_nnc_cmd_auto, 0) |