/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/blas/ccv_nnc_blas.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_internal.h" |
5 | | #include "nnc/ccv_nnc_easy.h" |
6 | | |
7 | | static int _ccv_nnc_same_pos_inplace(const ccv_nnc_cmd_param_t cmd, const int input_idx, const int input_size, const int output_idx, const int output_size) |
8 | 309 | { |
9 | | // For cudnnOpTensor: "If the input tensor B is the same tensor as the destination tensor C, then the input tensor A also must be the same tensor as the destination tensor C." |
10 | 309 | return input_idx == output_idx; |
11 | 309 | } |
12 | | |
13 | | static int _ccv_nnc_gemm_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
14 | 4.62k | { |
15 | 4.62k | if (input_size == 3 && (input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2))487 && output_bitmasks[0] == 1u487 ) |
16 | 487 | return 1; |
17 | | // No bias is OK. |
18 | 4.13k | if (input_size == 2 && (input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && output_bitmasks[0] == 1u) |
19 | 4.13k | return 1; |
20 | 0 | return 0; |
21 | 4.13k | } |
22 | | |
23 | | static int _ccv_nnc_gemm_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
24 | 36.2k | { |
25 | | // Output the propagated error, gradient w.r.t. w and bias. |
26 | 36.2k | if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2) | (0 << 3)) && output_bitmasks[0] == ((1u << 0) | (1u << 1) | (1u << 2))10.8k ) |
27 | 2.73k | return 1; |
28 | | // No bias. |
29 | 33.4k | if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2) | (0 << 3)) && output_bitmasks[0] == ((1u << 0) | (1u << 1) | (0 << 2))8.11k ) |
30 | 4.08k | return 1; |
31 | | // Don't propagate error, only gradient w.r.t. w and bias. |
32 | 29.4k | if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1) | (0 << 2) | (0 << 3)) && output_bitmasks[0] == ((0 << 0) | (1u << 1) | (1u << 2))14.6k ) |
33 | 2.02k | return 1; |
34 | | // No bias. |
35 | 27.3k | if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1) | (0 << 2) | (0 << 3)) && output_bitmasks[0] == ((0 << 0) | (1u << 1) | (0 << 2))12.6k ) |
36 | 8.03k | return 1; |
37 | | // Bias, no weight. |
38 | 19.3k | if ((input_bitmasks[0] & 5u) == ((1u << 0) | (0 << 1) | (1u << 2) | (0 << 3)) && output_bitmasks[0] == ((1u << 0) | (0u << 1) | (1u << 2))6.60k ) |
39 | 4 | return 1; |
40 | | // No bias, No weight. |
41 | 19.3k | if ((input_bitmasks[0] & 5u) == ((1u << 0) | (0 << 1) | (1u << 2) | (0 << 3)) && output_bitmasks[0] == ((1u << 0) | (0u << 1) | (0 << 2))6.59k ) |
42 | 30 | return 1; |
43 | 19.3k | return 0; |
44 | 19.3k | } |
45 | | |
46 | | static void _ccv_nnc_gemm_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size) |
47 | 23.7k | { |
48 | 23.7k | assert(output_size == 1); |
49 | 23.7k | int a_batch_size, a_rows, a_cols, a_batch_inc, a_rows_inc, a_cols_inc; |
50 | 23.7k | int w_batch_size, w_rows, w_cols, w_batch_inc, w_rows_inc, w_cols_inc; |
51 | 23.7k | const int a_nd = ccv_nnc_tensor_nd(inputs[0].dim); |
52 | 23.7k | const int w_nd = ccv_nnc_tensor_nd(inputs[1].dim); |
53 | 23.7k | const int nd = ccv_max(a_nd, w_nd); |
54 | 23.7k | ccv_nnc_tensor_get_matrix_params(inputs[0], 0, inputs[0].dim, cmd.blas.transpose_a, &a_batch_size, &a_rows, &a_cols, &a_batch_inc, &a_rows_inc, &a_cols_inc); |
55 | 23.7k | ccv_nnc_tensor_get_matrix_params(inputs[1], 0, inputs[1].dim, cmd.blas.transpose_b, &w_batch_size, &w_rows, &w_cols, &w_batch_inc, &w_rows_inc, &w_cols_inc); |
56 | 23.7k | outputs[0].type = inputs[0].type; |
57 | 23.7k | outputs[0].format = inputs[0].format; |
58 | 23.7k | outputs[0].datatype = inputs[0].datatype; |
59 | 23.7k | int b_rows = a_rows, b_cols = w_cols; |
60 | 23.7k | if (nd == 1) |
61 | 1 | outputs[0].dim[0] = b_cols; |
62 | 23.7k | else if (nd == 2) { |
63 | 23.3k | if (a_nd == 1) // If a is a vector, output is a vector too. |
64 | 17.0k | outputs[0].dim[0] = b_cols; |
65 | 6.27k | else { |
66 | 6.27k | outputs[0].dim[0] = b_rows; |
67 | 6.27k | outputs[0].dim[1] = b_cols; |
68 | 6.27k | } |
69 | 23.3k | } else { |
70 | 371 | assert(nd >= 3); |
71 | 371 | outputs[0].dim[nd - 3] = ccv_max(a_batch_size, w_batch_size); |
72 | 371 | outputs[0].dim[nd - 2] = b_rows; |
73 | 371 | outputs[0].dim[nd - 1] = b_cols; |
74 | 371 | int i; |
75 | 379 | for (i = 0; i < nd - 3; i++8 ) |
76 | 8 | { |
77 | 8 | const int a_idx = a_nd - nd + i; |
78 | 8 | const int w_idx = w_nd - nd + i; |
79 | 8 | outputs[0].dim[i] = ccv_max(a_idx >= 0 ? inputs[0].dim[a_idx] : 1, w_idx >= 0 ? inputs[1].dim[w_idx] : 1); |
80 | 8 | } |
81 | 371 | } |
82 | 23.7k | } |
83 | | |
84 | | REGISTER_COMMAND(CCV_NNC_GEMM_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
85 | | FIND_BACKEND(ccv_nnc_gemm_cpu_ref.c, ccv_nnc_gemm_cpu_opt.c, gpu/ccv_nnc_gemm_gpu_cublas.cu, mps/ccv_nnc_gemm_mps.m) |
86 | 1 | { |
87 | 1 | registry->bitmask = _ccv_nnc_gemm_forw_bitmask; |
88 | 1 | registry->tensor_auto = _ccv_nnc_gemm_tensor_auto_forw; |
89 | 1 | } |
90 | | |
91 | | REGISTER_COMMAND(CCV_NNC_GEMM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
92 | | FIND_BACKEND(ccv_nnc_gemm_cpu_ref.c, ccv_nnc_gemm_cpu_opt.c, gpu/ccv_nnc_gemm_gpu_cublas.cu, mps/ccv_nnc_gemm_mps.m) |
93 | 1 | { |
94 | 1 | registry->bitmask = _ccv_nnc_gemm_back_bitmask; |
95 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs; |
96 | 1 | } |
97 | | |
98 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_GEMM_FORWARD) |
99 | | #define CMD_GEMM_FORWARD(...) ccv_nnc_cmd(CCV_NNC_GEMM_FORWARD, 0, CMD_GEMM(__VA_ARGS__), 0) |
100 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_GEMM_BACKWARD) |
101 | | #define CMD_GEMM_BACKWARD(...) ccv_nnc_cmd(CCV_NNC_GEMM_BACKWARD, 0, CMD_GEMM(__VA_ARGS__), 0) |
102 | | |
103 | | static int _ccv_nnc_add_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
104 | 31 | { |
105 | 31 | if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && output_bitmasks[0] == 1u) |
106 | 31 | return 1; |
107 | 0 | return 0; |
108 | 31 | } |
109 | | |
110 | | static int _ccv_nnc_add_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
111 | 15.6k | { |
112 | | // w.r.t. both x and y |
113 | 15.6k | if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == ((1u << 0) | (1u << 1))11.1k ) |
114 | 2.33k | return 1; |
115 | | // w.r.t. x |
116 | 13.2k | if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == ((1u << 0) | (0u << 1))8.82k ) |
117 | 8.82k | return 1; |
118 | | // w.r.t. y |
119 | 4.47k | if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == ((0u << 0) | (1u << 1))0 ) |
120 | 0 | return 1; |
121 | 4.47k | return 0; |
122 | 4.47k | } |
123 | | |
124 | | static void _ccv_nnc_broadcast_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size) |
125 | 12.6k | { |
126 | 12.6k | assert(input_size >= 2); |
127 | 12.6k | assert(output_size == 1); |
128 | 12.6k | const int a_nd = ccv_nnc_tensor_nd(inputs[0].dim); |
129 | 12.6k | const int b_nd = ccv_nnc_tensor_nd(inputs[1].dim); |
130 | 12.6k | outputs[0] = inputs[0]; |
131 | 12.6k | const int c_nd = ccv_max(a_nd, b_nd); |
132 | 12.6k | int i; |
133 | 25.5k | for (i = a_nd - 1; i >= 0; i--12.9k ) |
134 | 12.9k | outputs[0].dim[i + c_nd - a_nd] = inputs[0].dim[i]; |
135 | 25.4k | for (i = b_nd - 1; i >= 0; i--12.8k ) |
136 | 12.8k | outputs[0].dim[i + c_nd - b_nd] = ccv_max(outputs[0].dim[i + c_nd - b_nd], inputs[1].dim[i]); |
137 | 12.6k | } |
138 | | |
139 | | REGISTER_COMMAND(CCV_NNC_ADD_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
140 | | FIND_BACKEND(ccv_nnc_add_cpu_ref.c, gpu/ccv_nnc_add_gpu_cudnn.cu, mps/ccv_nnc_add_mps.m) |
141 | 1 | { |
142 | 1 | registry->bitmask = _ccv_nnc_add_forw_bitmask; |
143 | 1 | registry->tensor_auto = _ccv_nnc_broadcast_tensor_auto_forw; |
144 | 1 | registry->allow_inplace = _ccv_nnc_same_pos_inplace; |
145 | 1 | } |
146 | | |
147 | | REGISTER_COMMAND(CCV_NNC_ADD_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
148 | | FIND_BACKEND(ccv_nnc_add_cpu_ref.c, gpu/ccv_nnc_add_gpu_cudnn.cu, mps/ccv_nnc_add_mps.m) |
149 | 1 | { |
150 | 1 | registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES; |
151 | 1 | registry->bitmask = _ccv_nnc_add_back_bitmask; |
152 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs; |
153 | 1 | } |
154 | | |
155 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_ADD_FORWARD) |
156 | | #define CMD_ADD_FORWARD(_p, _q) ccv_nnc_cmd(CCV_NNC_ADD_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_p, _q}}}, 0) |
157 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_ADD_BACKWARD) |
158 | | #define CMD_ADD_BACKWARD(_p, _q) ccv_nnc_cmd(CCV_NNC_ADD_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_p, _q}}}, 0) |
159 | | |
160 | | static int _ccv_nnc_mul_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
161 | 26 | { |
162 | 26 | if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && output_bitmasks[0] == 1u) |
163 | 26 | return 1; |
164 | 0 | return 0; |
165 | 26 | } |
166 | | |
167 | | static int _ccv_nnc_mul_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
168 | 30.1k | { |
169 | | // w.r.t. both x and y |
170 | 30.1k | if ((input_bitmasks[0] & 7u) == 7u && output_bitmasks[0] == ((1u << 0) | (1u << 1))8.04k ) |
171 | 6.04k | return 1; |
172 | | // w.r.t. x |
173 | 24.1k | if ((input_bitmasks[0] & 5u) == 5u && output_bitmasks[0] == ((1u << 0) | (0u << 1))8.04k ) |
174 | 0 | return 1; |
175 | | // w.r.t. y |
176 | 24.1k | if ((input_bitmasks[0] & 3u) == 3u && output_bitmasks[0] == ((0u << 0) | (1u << 1))10.0k ) |
177 | 6.01k | return 1; |
178 | 18.0k | return 0; |
179 | 24.1k | } |
180 | | |
181 | | REGISTER_COMMAND(CCV_NNC_MUL_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
182 | | FIND_BACKEND(ccv_nnc_mul_cpu_ref.c, gpu/ccv_nnc_mul_gpu_cudnn.cu, mps/ccv_nnc_mul_mps.m) |
183 | 1 | { |
184 | 1 | registry->bitmask = _ccv_nnc_mul_forw_bitmask; |
185 | 1 | registry->tensor_auto = _ccv_nnc_broadcast_tensor_auto_forw; |
186 | 1 | registry->allow_inplace = _ccv_nnc_same_pos_inplace; |
187 | 1 | } |
188 | | |
189 | | REGISTER_COMMAND(CCV_NNC_MUL_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
190 | | FIND_BACKEND(ccv_nnc_mul_cpu_ref.c, gpu/ccv_nnc_mul_gpu_cudnn.cu, mps/ccv_nnc_mul_mps.m) |
191 | 1 | { |
192 | 1 | registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES; |
193 | 1 | registry->bitmask = _ccv_nnc_mul_back_bitmask; |
194 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs; |
195 | 1 | } |
196 | | |
197 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_MUL_FORWARD) |
198 | | #define CMD_MUL_FORWARD(_p) ccv_nnc_cmd(CCV_NNC_MUL_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_p,}}}, 0) |
199 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_MUL_BACKWARD) |
200 | | #define CMD_MUL_BACKWARD(_p) ccv_nnc_cmd(CCV_NNC_MUL_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_p,}}}, 0) |
201 | | |
202 | | static int _ccv_nnc_scalar_mul_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
203 | 4 | { |
204 | 4 | if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 1u) |
205 | 4 | return 1; |
206 | 0 | return 0; |
207 | 4 | } |
208 | | |
209 | | static int _ccv_nnc_scalar_mul_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
210 | 173 | { |
211 | | // w.r.t. x |
212 | 173 | if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 1u111 ) |
213 | 111 | return 1; |
214 | 62 | return 0; |
215 | 173 | } |
216 | | |
217 | | REGISTER_COMMAND(CCV_NNC_SCALAR_MUL_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
218 | | FIND_BACKEND(ccv_nnc_mul_cpu_ref.c, gpu/ccv_nnc_mul_gpu_cudnn.cu, mps/ccv_nnc_mul_mps.m) |
219 | 1 | { |
220 | 1 | registry->bitmask = _ccv_nnc_scalar_mul_forw_bitmask; |
221 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs; |
222 | 1 | registry->allow_inplace = _ccv_nnc_same_pos_inplace; |
223 | 1 | } |
224 | | |
225 | | REGISTER_COMMAND(CCV_NNC_SCALAR_MUL_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
226 | | FIND_BACKEND(ccv_nnc_mul_cpu_ref.c, gpu/ccv_nnc_mul_gpu_cudnn.cu, mps/ccv_nnc_mul_mps.m) |
227 | 1 | { |
228 | 1 | registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES; |
229 | 1 | registry->bitmask = _ccv_nnc_scalar_mul_back_bitmask; |
230 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs; |
231 | 1 | } |
232 | | |
233 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_SCALAR_MUL_FORWARD) |
234 | | #define CMD_SCALAR_MUL_FORWARD(_a) ccv_nnc_cmd(CCV_NNC_SCALAR_MUL_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_a,}}}, 0) |
235 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_SCALAR_MUL_BACKWARD) |
236 | | #define CMD_SCALAR_MUL_BACKWARD(_a) ccv_nnc_cmd(CCV_NNC_SCALAR_MUL_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={_a,}}}, 0) |
237 | | |
238 | | static int _ccv_nnc_cmul_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
239 | 0 | { |
240 | 0 | if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && output_bitmasks[0] == 1u) |
241 | 0 | return 1; |
242 | 0 | return 0; |
243 | 0 | } |
244 | | |
245 | | static int _ccv_nnc_cmul_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
246 | 6 | { |
247 | | // w.r.t. both x and y |
248 | 6 | if ((input_bitmasks[0] & 7u) == 7u && output_bitmasks[0] == ((1u << 0) | (1u << 1))2 ) |
249 | 1 | return 1; |
250 | | // w.r.t. x |
251 | 5 | if ((input_bitmasks[0] & 5u) == 5u && output_bitmasks[0] == ((1u << 0) | (0u << 1))3 ) |
252 | 3 | return 1; |
253 | | // w.r.t. y |
254 | 2 | if ((input_bitmasks[0] & 3u) == 3u && output_bitmasks[0] == ((0u << 0) | (1u << 1))0 ) |
255 | 0 | return 1; |
256 | 2 | return 0; |
257 | 2 | } |
258 | | |
259 | | REGISTER_COMMAND(CCV_NNC_CMUL_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
260 | | FIND_BACKEND(ccv_nnc_cmul_cpu_ref.c, gpu/ccv_nnc_cmul_gpu_ref.cu, mps/ccv_nnc_cmul_mps.m) |
261 | 1 | { |
262 | 1 | registry->bitmask = _ccv_nnc_cmul_forw_bitmask; |
263 | 1 | registry->tensor_auto = _ccv_nnc_broadcast_tensor_auto_forw; |
264 | 1 | registry->allow_inplace = _ccv_nnc_same_pos_inplace; |
265 | 1 | } |
266 | | |
267 | | REGISTER_COMMAND(CCV_NNC_CMUL_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
268 | | FIND_BACKEND(ccv_nnc_cmul_cpu_ref.c, gpu/ccv_nnc_cmul_gpu_ref.cu, mps/ccv_nnc_cmul_mps.m) |
269 | 1 | { |
270 | 1 | registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES; |
271 | 1 | registry->bitmask = _ccv_nnc_cmul_back_bitmask; |
272 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs; |
273 | 1 | } |
274 | | |
275 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_CMUL_FORWARD) |
276 | | #define CMD_CMUL_FORWARD() ccv_nnc_cmd(CCV_NNC_CMUL_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}, 0) |
277 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_CMUL_BACKWARD) |
278 | | #define CMD_CMUL_BACKWARD() ccv_nnc_cmd(CCV_NNC_CMUL_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}, 0) |