Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/norm/ccv_nnc_norm.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "nnc/ccv_nnc.h"
3
#include "nnc/ccv_nnc_internal.h"
4
5
static int _ccv_nnc_batch_norm_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
6
675
{
7
  // 5 inputs (x, scale, bias, mean, var)
8
  // 1 outputs (y)
9
675
  if (input_bitmasks[0] == 31u && output_bitmasks[0] == 1u)
10
0
    return 1;
11
  // 5 inputs (x, scale, bias, mean, var)
12
  // 5 outputs (y, mean, var, saved_mean, saved_inv_var)
13
  // Both mean and var in output is inplace for the input mean, var
14
675
  if (input_bitmasks[0] == 31u && output_bitmasks[0] == 31u)
15
375
    return 1;
16
300
  return 0;
17
675
}
18
19
static int _ccv_nnc_batch_norm_enforce_inplace(const ccv_nnc_cmd_param_t cmd, const int input_idx, const int input_size, const int output_idx, const int output_size)
20
2.27k
{
21
2.27k
  if (input_idx == 3 && 
output_idx == 1455
)
22
91
    return 1;
23
2.18k
  if (input_idx == 4 && 
output_idx == 2454
)
24
91
    return 1;
25
2.09k
  return 0;
26
2.18k
}
27
28
static int _ccv_nnc_batch_norm_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
29
657
{
30
  // 0b110000001100001
31
  // Inputs (gradient, 0, 0, 0, 0, x, scale, 0, 0, 0, 0, 0, 0, saved_mean, saved_inv_var)
32
  // Output the propagated error, dscale and dbias
33
657
  if ((input_bitmasks[0] & 24673u) == 24673u && 
(output_bitmasks[0] & 7u) == 7u377
)
34
377
    return 1;
35
280
  return 0;
36
657
}
37
38
static void _ccv_nnc_batch_norm_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
39
1.90k
{
40
1.90k
  assert(input_size == 5);
41
1.90k
  assert(output_size == 1 || output_size == 5);
42
1.90k
  outputs[0] = inputs[0];
43
1.90k
  if (output_size == 1)
44
0
    return;
45
1.90k
  int i, j;
46
9.52k
  for (i = 1; i < output_size; 
i++7.62k
)
47
7.62k
  {
48
7.62k
    outputs[i] = inputs[0];
49
30.4k
    for (j = 0; j < cmd.bnorm.count; 
j++22.8k
)
50
22.8k
      outputs[i].dim[cmd.bnorm.axis[j]] = 1; // Reduce the dimension to 1.
51
7.62k
  }
52
1.90k
}
53
54
static void _ccv_nnc_batch_norm_tensor_auto_back(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
55
1.66k
{
56
1.66k
  assert(input_size == 15);
57
1.66k
  assert(output_size == 5);
58
1.66k
  outputs[0] = inputs[0];
59
1.66k
  int i, j;
60
8.30k
  for (i = 1; i < output_size; 
i++6.64k
)
61
6.64k
  {
62
6.64k
    outputs[i] = inputs[0];
63
26.5k
    for (j = 0; j < cmd.bnorm.count; 
j++19.9k
)
64
19.9k
      outputs[i].dim[cmd.bnorm.axis[j]] = 1; // Reduce the dimension to 1.
65
6.64k
  }
66
1.66k
}
67
68
REGISTER_COMMAND(CCV_NNC_BATCH_NORM_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
69
  FIND_BACKEND(ccv_nnc_batch_norm_cpu_ref.c, gpu/ccv_nnc_batch_norm_gpu_cudnn.cu)
70
1
{
71
1
  registry->bitmask = _ccv_nnc_batch_norm_forw_bitmask;
72
1
  registry->tensor_auto = _ccv_nnc_batch_norm_tensor_auto_forw;
73
1
  registry->enforce_inplace = _ccv_nnc_batch_norm_enforce_inplace;
74
1
}
75
76
REGISTER_COMMAND(CCV_NNC_BATCH_NORM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
77
  FIND_BACKEND(ccv_nnc_batch_norm_cpu_ref.c, gpu/ccv_nnc_batch_norm_gpu_cudnn.cu)
78
1
{
79
1
  registry->bitmask = _ccv_nnc_batch_norm_back_bitmask;
80
1
  registry->tensor_auto = _ccv_nnc_batch_norm_tensor_auto_back;
81
1
}
82
83
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_BATCH_NORM_FORWARD)
84
#define CMD_BATCH_NORM_FORWARD(_epsilon, _is_test, _momentum, ...) ccv_nnc_cmd(CCV_NNC_BATCH_NORM_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.bnorm={.epsilon=_epsilon,.is_test=_is_test,.momentum=_momentum,.count=LIST_COUNT(__VA_ARGS__),.axis={__VA_ARGS__}}}), 0)
85
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_BATCH_NORM_BACKWARD)
86
#define CMD_BATCH_NORM_BACKWARD(_epsilon, _is_test, _momentum, ...) ccv_nnc_cmd(CCV_NNC_BATCH_NORM_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.bnorm={.epsilon=_epsilon,.is_test=_is_test,.momentum=_momentum,.count=LIST_COUNT(__VA_ARGS__),.axis={__VA_ARGS__}}}), 0)
87
88
static int _ccv_nnc_layer_norm_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
89
40
{
90
40
  if (cmd.lnorm.elementwise_affine)
91
40
  {
92
    // 3 inputs (x, gamma, beta)
93
    // 3 outputs (y, saved_mean, saved_inv_std)
94
40
    if (input_bitmasks[0] == 7u && output_bitmasks[0] == 7u)
95
24
      return 1;
96
    // 3 inputs (x, gamma, beta)
97
    // 1 output (y)
98
16
    if (input_bitmasks[0] == 7u && output_bitmasks[0] == 1u)
99
0
      return 1;
100
16
  } else {
101
    // 1 inputs (x)
102
    // 3 outputs (y, saved_mean, saved_inv_std)
103
0
    if (input_bitmasks[0] == 1u && output_bitmasks[0] == 7u)
104
0
      return 1;
105
    // 1 inputs (x)
106
    // 1 output (y)
107
0
    if (input_bitmasks[0] == 1u && output_bitmasks[0] == 1u)
108
0
      return 1;
109
0
  }
110
16
  return 0;
111
40
}
112
113
static int _ccv_nnc_layer_norm_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
114
239
{
115
239
  if (cmd.lnorm.elementwise_affine)
116
189
  {
117
    // 0b110011001
118
    // Inputs (gradient, 0, 0, x, gamma, 0, 0, saved_mean, saved_inv_std)
119
    // Output the propagated error, dgamma and dbeta
120
189
    if ((input_bitmasks[0] & 409u) == 409u && 
(output_bitmasks[0] & 7u) == 7u69
)
121
57
      return 1;
122
132
    if ((input_bitmasks[0] & 409u) == 409u && 
(output_bitmasks[0] & 5u) == 5u12
)
123
2
      return 1;
124
130
    if ((input_bitmasks[0] & 409u) == 409u && 
(output_bitmasks[0] & 3u) == 3u10
)
125
0
      return 1;
126
130
    if ((input_bitmasks[0] & 409u) == 409u && 
(output_bitmasks[0] & 1u) == 1u10
)
127
10
      return 1;
128
130
  } else {
129
    // 0b1101001
130
    // Inputs (gradient, 0, 0, x, 0, saved_mean, saved_inv_std)
131
    // Output the propagated error
132
50
    if ((input_bitmasks[0] & 105u) == 105u && 
(output_bitmasks[0] & 1u) == 1u20
)
133
20
      return 1;
134
50
  }
135
150
  return 0;
136
239
}
137
138
static void _ccv_nnc_layer_norm_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
139
386
{
140
386
  assert(input_size == 3 || input_size == 1);
141
386
  assert(output_size == 1 || output_size == 3);
142
386
  outputs[0] = inputs[0];
143
386
  if (output_size == 1)
144
0
    return;
145
386
  int i, j;
146
1.15k
  for (i = 1; i < output_size; 
i++772
)
147
772
  {
148
772
    outputs[i] = inputs[0];
149
1.64k
    for (j = 0; j < cmd.lnorm.count; 
j++868
)
150
868
      outputs[i].dim[cmd.lnorm.axis[j]] = 1; // Reduce the dimension to 1.
151
772
  }
152
386
}
153
154
static void _ccv_nnc_layer_norm_tensor_auto_back(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
155
306
{
156
306
  assert(input_size == 9 || input_size == 7);
157
306
  assert(output_size == 1 || output_size == 3);
158
306
  outputs[0] = inputs[0];
159
306
  int i, j;
160
908
  for (i = 1; i < output_size; 
i++602
)
161
602
  {
162
602
    outputs[i] = inputs[0];
163
1.22k
    for (j = 0; j < cmd.lnorm.count; 
j++620
)
164
620
      outputs[i].dim[cmd.lnorm.axis[j]] = 1; // Reduce the dimension to 1.
165
602
  }
166
306
}
167
168
REGISTER_COMMAND(CCV_NNC_LAYER_NORM_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
169
  FIND_BACKEND(ccv_nnc_layer_norm_cpu_ref.c, gpu/ccv_nnc_layer_norm_gpu_cudnn.cu, mps/ccv_nnc_layer_norm_mps.m)
170
1
{
171
1
  registry->bitmask = _ccv_nnc_layer_norm_forw_bitmask;
172
1
  registry->tensor_auto = _ccv_nnc_layer_norm_tensor_auto_forw;
173
1
}
174
175
REGISTER_COMMAND(CCV_NNC_LAYER_NORM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
176
  FIND_BACKEND(ccv_nnc_layer_norm_cpu_ref.c, gpu/ccv_nnc_layer_norm_gpu_cudnn.cu, mps/ccv_nnc_layer_norm_mps.m)
177
1
{
178
1
  registry->bitmask = _ccv_nnc_layer_norm_back_bitmask;
179
1
  registry->tensor_auto = _ccv_nnc_layer_norm_tensor_auto_back;
180
1
}
181
182
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_LAYER_NORM_FORWARD)
183
#define CMD_LAYER_NORM_FORWARD(_epsilon, _elementwise_affine, ...) ccv_nnc_cmd(CCV_NNC_LAYER_NORM_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.lnorm={.epsilon=_epsilon,.elementwise_affine=_elementwise_affine,.count=LIST_COUNT(__VA_ARGS__),.axis={__VA_ARGS__}}}), 0)
184
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_LAYER_NORM_BACKWARD)
185
#define CMD_LAYER_NORM_BACKWARD(_epsilon, _elementwise_affine, ...) ccv_nnc_cmd(CCV_NNC_LAYER_NORM_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.lnorm={.epsilon=_epsilon,.elementwise_affine=_elementwise_affine,.count=LIST_COUNT(__VA_ARGS__),.axis={__VA_ARGS__}}}), 0)
186
187
static int _ccv_nnc_group_norm_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
188
0
{
189
0
  if (cmd.gnorm.elementwise_affine)
190
0
  {
191
    // 3 inputs (x, gamma, beta)
192
    // 3 outputs (y, saved_mean, saved_inv_std)
193
0
    if (input_bitmasks[0] == 7u && output_bitmasks[0] == 7u)
194
0
      return 1;
195
    // 3 inputs (x, gamma, beta)
196
    // 1 output (y)
197
0
    if (input_bitmasks[0] == 7u && output_bitmasks[0] == 1u)
198
0
      return 1;
199
0
  } else {
200
    // 1 inputs (x)
201
    // 3 outputs (y, saved_mean, saved_inv_std)
202
0
    if (input_bitmasks[0] == 1u && output_bitmasks[0] == 7u)
203
0
      return 1;
204
    // 1 inputs (x)
205
    // 1 output (y)
206
0
    if (input_bitmasks[0] == 1u && output_bitmasks[0] == 1u)
207
0
      return 1;
208
0
  }
209
0
  return 0;
210
0
}
211
212
static int _ccv_nnc_group_norm_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
213
178
{
214
178
  if (cmd.gnorm.elementwise_affine)
215
108
  {
216
    // 0b110011001
217
    // Inputs (gradient, 0, 0, x, gamma, 0, 0, saved_mean, saved_inv_std)
218
    // Output the propagated error, dgamma and dbeta
219
108
    if ((input_bitmasks[0] & 409u) == 409u && 
(output_bitmasks[0] & 7u) == 7u44
)
220
32
      return 1;
221
76
    if ((input_bitmasks[0] & 409u) == 409u && 
(output_bitmasks[0] & 5u) == 5u12
)
222
2
      return 1;
223
74
    if ((input_bitmasks[0] & 409u) == 409u && 
(output_bitmasks[0] & 3u) == 3u10
)
224
0
      return 1;
225
74
    if ((input_bitmasks[0] & 409u) == 409u && 
(output_bitmasks[0] & 1u) == 1u10
)
226
10
      return 1;
227
74
  } else {
228
    // 0b1101001
229
    // Inputs (gradient, 0, 0, x, 0, saved_mean, saved_inv_std)
230
    // Output the propagated error
231
70
    if ((input_bitmasks[0] & 105u) == 105u && 
(output_bitmasks[0] & 7u) == 1u28
)
232
28
      return 1;
233
70
  }
234
106
  return 0;
235
178
}
236
237
static void _ccv_nnc_group_norm_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
238
41
{
239
41
  assert(input_size == 3 || input_size == 1);
240
41
  assert(output_size == 1 || output_size == 3);
241
41
  outputs[0] = inputs[0];
242
41
  if (output_size == 1)
243
0
    return;
244
41
  int i, j;
245
123
  for (i = 1; i < output_size; 
i++82
)
246
82
  {
247
82
    outputs[i] = inputs[0];
248
82
    outputs[i].dim[cmd.gnorm.group_axis] = cmd.gnorm.groups; // Reduce to num_groups.
249
130
    for (j = 0; j < cmd.gnorm.reduce_count; 
j++48
)
250
48
      outputs[i].dim[cmd.gnorm.reduce_axis[j]] = 1;
251
82
  }
252
41
}
253
254
static void _ccv_nnc_group_norm_tensor_auto_back(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
255
15
{
256
15
  assert(input_size == 9 || input_size == 7);
257
15
  assert(output_size == 1 || output_size == 3);
258
15
  outputs[0] = inputs[0];
259
15
  int i, j;
260
31
  for (i = 1; i < output_size; 
i++16
)
261
16
  {
262
16
    outputs[i] = inputs[0];
263
16
    outputs[i].dim[cmd.gnorm.group_axis] = cmd.gnorm.groups; // Reduce the dimension to num_groups.
264
24
    for (j = 0; j < cmd.gnorm.reduce_count; 
j++8
)
265
8
      outputs[i].dim[cmd.gnorm.reduce_axis[j]] = 1;
266
16
  }
267
15
}
268
269
REGISTER_COMMAND(CCV_NNC_GROUP_NORM_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
270
  FIND_BACKEND(ccv_nnc_group_norm_cpu_ref.c, gpu/ccv_nnc_group_norm_gpu_cudnn.cu, mps/ccv_nnc_group_norm_mps.m)
271
1
{
272
1
  registry->bitmask = _ccv_nnc_group_norm_forw_bitmask;
273
1
  registry->tensor_auto = _ccv_nnc_group_norm_tensor_auto_forw;
274
1
}
275
276
REGISTER_COMMAND(CCV_NNC_GROUP_NORM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
277
  FIND_BACKEND(ccv_nnc_group_norm_cpu_ref.c, gpu/ccv_nnc_group_norm_gpu_cudnn.cu, mps/ccv_nnc_group_norm_mps.m)
278
1
{
279
1
  registry->bitmask = _ccv_nnc_group_norm_back_bitmask;
280
1
  registry->tensor_auto = _ccv_nnc_group_norm_tensor_auto_back;
281
1
}
282
283
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_GROUP_NORM_FORWARD)
284
#define CMD_GROUP_NORM_FORWARD(_group_axis, _groups, _epsilon, _elementwise_affine, ...) ccv_nnc_cmd(CCV_NNC_GROUP_NORM_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.gnorm={.group_axis=_group_axis,.groups=_groups,.epsilon=_epsilon,.elementwise_affine=_elementwise_affine,.reduce_count=LIST_COUNT(__VA_ARGS__),.reduce_axis={__VA_ARGS__}}}), 0)
285
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_GROUP_NORM_BACKWARD)
286
#define CMD_GROUP_NORM_BACKWARD(_group_axis, _groups, _epsilon, _elementwise_affine, ...) ccv_nnc_cmd(CCV_NNC_GROUP_NORM_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.gnorm={.group_axis=_group_axis,.groups=_groups,.epsilon=_epsilon,.elementwise_affine=_elementwise_affine,.reduce_count=LIST_COUNT(__VA_ARGS__),.reduce_axis={__VA_ARGS__}}}), 0)
287
288
static int _ccv_nnc_rmsnorm_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
289
0
{
290
  // 2 inputs (x, gamma)
291
  // 2 outputs (y, saved_inv_std)
292
0
  if (input_bitmasks[0] == 3u && output_bitmasks[0] == 3u)
293
0
    return 1;
294
0
  return 0;
295
0
}
296
297
static int _ccv_nnc_rmsnorm_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
298
47
{
299
  // 1 + 4 + 8 + 32
300
  // Inputs (gradient, 0, x, gamma, 0, saved_inv_std)
301
  // Output the propagated error, dgamma
302
47
  if ((input_bitmasks[0] & 45u) == 45u && 
(output_bitmasks[0] & 3u) == 3u17
)
303
11
    return 1;
304
36
  if ((input_bitmasks[0] & 45u) == 45u && 
(output_bitmasks[0] & 1u) == 1u6
)
305
6
    return 1;
306
30
  return 0;
307
36
}
308
309
static void _ccv_nnc_rmsnorm_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
310
13
{
311
13
  assert(input_size == 2);
312
13
  assert(output_size == 1 || output_size == 2);
313
13
  outputs[0] = inputs[0];
314
13
  if (output_size == 1)
315
0
    return;
316
13
  int i, j;
317
26
  for (i = 1; i < output_size; 
i++13
)
318
13
  {
319
13
    outputs[i] = inputs[0];
320
52
    for (j = 0; j < cmd.rmsnorm.count; 
j++39
)
321
39
      outputs[i].dim[cmd.rmsnorm.axis[j]] = 1; // Reduce the dimension to 1.
322
13
  }
323
13
}
324
325
static void _ccv_nnc_rmsnorm_tensor_auto_back(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
326
5
{
327
5
  assert(input_size == 6);
328
5
  assert(output_size == 1 || output_size == 2);
329
5
  outputs[0] = inputs[0];
330
5
  int i, j;
331
10
  for (i = 1; i < output_size; 
i++5
)
332
5
  {
333
5
    outputs[i] = inputs[0];
334
20
    for (j = 0; j < cmd.rmsnorm.count; 
j++15
)
335
15
      outputs[i].dim[cmd.rmsnorm.axis[j]] = 1; // Reduce the dimension to 1.
336
5
  }
337
5
}
338
339
REGISTER_COMMAND(CCV_NNC_RMSNORM_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
340
  FIND_BACKEND(ccv_nnc_rmsnorm_cpu_ref.c, gpu/ccv_nnc_rmsnorm_gpu_cudnn.cu, mps/ccv_nnc_rmsnorm_mps.m)
341
1
{
342
1
  registry->bitmask = _ccv_nnc_rmsnorm_forw_bitmask;
343
1
  registry->tensor_auto = _ccv_nnc_rmsnorm_tensor_auto_forw;
344
1
}
345
346
REGISTER_COMMAND(CCV_NNC_RMSNORM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
347
  FIND_BACKEND(ccv_nnc_rmsnorm_cpu_ref.c, gpu/ccv_nnc_rmsnorm_gpu_cudnn.cu, mps/ccv_nnc_rmsnorm_mps.m)
348
1
{
349
1
  registry->bitmask = _ccv_nnc_rmsnorm_back_bitmask;
350
1
  registry->tensor_auto = _ccv_nnc_rmsnorm_tensor_auto_back;
351
1
}
352
353
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_RMSNORM_FORWARD)
354
#define CMD_RMSNORM_FORWARD(_epsilon, ...) ccv_nnc_cmd(CCV_NNC_RMSNORM_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.rmsnorm={.epsilon=_epsilon,.count=LIST_COUNT(__VA_ARGS__),.axis={__VA_ARGS__}}}), 0)
355
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_RMSNORM_BACKWARD)
356
#define CMD_RMSNORM_BACKWARD(_epsilon, ...) ccv_nnc_cmd(CCV_NNC_RMSNORM_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.rmsnorm={.epsilon=_epsilon,.count=LIST_COUNT(__VA_ARGS__),.axis={__VA_ARGS__}}}), 0)