Coverage Report

Created: 2026-04-14 19:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/ew/ccv_nnc_ew.c
Line
Count
Source
1
#include "ccv.h"
2
#include "nnc/ccv_nnc.h"
3
#include "nnc/ccv_nnc_internal.h"
4
5
static int _ccv_nnc_arbitary_inplace(const ccv_nnc_cmd_param_t cmd, const int input_idx, const int input_size, const int output_idx, const int output_size)
6
11.6k
{
7
11.6k
  return 1;
8
11.6k
}
9
10
static int _ccv_nnc_ewsum_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
11
126
{
12
126
  if (output_size == 1 && output_bitmasks[0] == 1)
13
126
  {
14
126
    int i, j, flag = 0;
15
126
    int input_bitcount = 0;
16
252
    for (i = 0; i < input_bitmask_size; 
i++126
)
17
126
    {
18
382
      for (j = 0; j < 64; 
j++256
)
19
382
        if (input_bitmasks[i] & (uint64_t)1 << j)
20
256
        {
21
256
          if (flag)
22
0
            return 0;
23
256
        } else
24
126
          break;
25
126
      input_bitcount += j;
26
      // Trailing zero even if it is not the end of input_bitmask_size, mark flag,
27
      // if we encounter additional 1, return invalid.
28
126
      if (j < 64)
29
126
        flag = 1;
30
      // Always like 1111100000, no 1110010101
31
7.93k
      for (; j < 64; 
j++7.80k
)
32
7.80k
        if (input_bitmasks[i] & (uint64_t)1 << j)
33
0
          return 0;
34
126
    }
35
126
    return input_size == input_bitcount;
36
126
  }
37
0
  return 0;
38
126
}
39
40
static int _ccv_nnc_ewsum_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
41
351
{
42
351
  if (input_size >= 1 && (input_bitmasks[0] & 1u) == 1u)
43
271
  {
44
271
    int i, j, flag = 0;
45
271
    int output_bitcount = 0;
46
530
    for (i = 0; i < output_bitmask_size; 
i++259
)
47
271
    {
48
760
      for (j = 0; j < 64; 
j++489
)
49
760
        if (output_bitmasks[i] & (uint64_t)1 << j)
50
489
        {
51
489
          if (flag)
52
0
            return 0;
53
489
        } else
54
271
          break;
55
271
      output_bitcount += j;
56
      // Trailing zero even if it is not the end of input_bitmask_size, mark flag,
57
      // if we encounter additional 1, return invalid.
58
271
      if (j < 64)
59
271
        flag = 1;
60
      // Always like 1111100000, no 1110010101
61
16.3k
      for (; j < 64; 
j++16.0k
)
62
16.1k
        if (output_bitmasks[i] & (uint64_t)1 << j)
63
12
          return 0;
64
271
    }
65
259
    return output_size == output_bitcount;
66
271
  }
67
80
  return 0;
68
351
}
69
70
REGISTER_COMMAND(CCV_NNC_EWSUM_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
71
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_cudnn.cu, mps/ccv_nnc_ew_mps.m)
72
1
{
73
1
  registry->bitmask = _ccv_nnc_ewsum_forw_bitmask;
74
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
75
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
76
1
}
77
78
REGISTER_COMMAND(CCV_NNC_EWSUM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
79
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_cudnn.cu, mps/ccv_nnc_ew_mps.m)
80
1
{
81
1
  registry->flags = CCV_NNC_CMD_ATTR_PASSTHROUGH | CCV_NNC_CMD_ATTR_NULL_IS_ONES;
82
1
  registry->bitmask = _ccv_nnc_ewsum_back_bitmask;
83
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
84
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
85
1
}
86
87
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWSUM_FORWARD)
88
#define CMD_EWSUM_FORWARD() ccv_nnc_cmd(CCV_NNC_EWSUM_FORWARD, 0, ccv_nnc_cmd_auto, 0)
89
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWSUM_BACKWARD)
90
#define CMD_EWSUM_BACKWARD() ccv_nnc_cmd(CCV_NNC_EWSUM_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
91
92
static int _ccv_nnc_ewprod_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
93
59
{
94
59
  if (output_size == 1 && output_bitmasks[0] == 1)
95
59
  {
96
59
    int i, j, flag = 0;
97
59
    int input_bitcount = 0;
98
118
    for (i = 0; i < input_bitmask_size; 
i++59
)
99
59
    {
100
177
      for (j = 0; j < 64; 
j++118
)
101
177
        if (input_bitmasks[i] & (uint64_t)1 << j)
102
118
        {
103
118
          if (flag)
104
0
            return 0;
105
118
        } else
106
59
          break;
107
59
      input_bitcount += j;
108
      // Trailing zero even if it is not the end of input_bitmask_size, mark flag,
109
      // if we encounter additional 1, return invalid.
110
59
      if (j < 64)
111
59
        flag = 1;
112
      // Always like 1111100000, no 1110010101
113
3.71k
      for (; j < 64; 
j++3.65k
)
114
3.65k
        if (input_bitmasks[i] & (uint64_t)1 << j)
115
0
          return 0;
116
59
    }
117
59
    return input_size == input_bitcount;
118
59
  }
119
0
  return 0;
120
59
}
121
122
static int _ccv_nnc_ewprod_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
123
11.3k
{
124
11.3k
  int i, j;
125
11.3k
  int input_flag = 0;
126
11.3k
  int input_bitcount = 0;
127
15.8k
  for (i = 0; i < input_bitmask_size; 
i++4.54k
)
128
11.3k
  {
129
34.0k
    for (j = 0; j < 64; 
j++22.6k
)
130
34.0k
      if (input_bitmasks[i] & (uint64_t)1 << j)
131
22.6k
      {
132
22.6k
        if (input_flag)
133
0
          return 0;
134
22.6k
      } else
135
11.3k
        break;
136
11.3k
    input_bitcount += j;
137
11.3k
    if (j < 64)
138
11.3k
      input_flag = 1;
139
    // Always like 1111100000, no 1110010101
140
292k
    for (; j < 64; 
j++281k
)
141
288k
      if (input_bitmasks[i] & (uint64_t)1 << j)
142
6.78k
        return 0;
143
11.3k
  }
144
4.54k
  int output_flag = 0;
145
4.54k
  int output_bitcount = 0;
146
9.07k
  for (i = 0; i < output_bitmask_size; 
i++4.53k
)
147
4.54k
  {
148
13.6k
    for (j = 0; j < 64; 
j++9.06k
)
149
13.6k
      if ((output_bitmasks[i] & (uint64_t)1 << j))
150
9.06k
      {
151
9.06k
        if (output_flag)
152
0
          return 0;
153
9.06k
      } else
154
4.54k
        break;
155
4.54k
    output_bitcount += j;
156
4.54k
    if (j < 64)
157
4.54k
      output_flag = 1;
158
285k
    for (; j < 64; 
j++281k
)
159
281k
      if (output_bitmasks[i] & (uint64_t)1 << j)
160
2
        return 0;
161
4.54k
  }
162
4.53k
  if (output_bitcount != output_size)
163
10
    return 0;
164
4.52k
  return output_bitcount + 2 /* Gradient + Original output */ == input_bitcount;
165
4.53k
}
166
167
REGISTER_COMMAND(CCV_NNC_EWPROD_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
168
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c)
169
1
{
170
1
  registry->bitmask = _ccv_nnc_ewprod_forw_bitmask;
171
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
172
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
173
1
}
174
175
REGISTER_COMMAND(CCV_NNC_EWPROD_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
176
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c)
177
1
{
178
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
179
1
  registry->bitmask = _ccv_nnc_ewprod_back_bitmask;
180
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
181
1
}
182
183
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWPROD_FORWARD)
184
#define CMD_EWPROD_FORWARD() ccv_nnc_cmd(CCV_NNC_EWPROD_FORWARD, 0, ccv_nnc_cmd_auto, 0)
185
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWPROD_BACKWARD)
186
#define CMD_EWPROD_BACKWARD() ccv_nnc_cmd(CCV_NNC_EWPROD_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
187
188
static int _ccv_nnc_ewdiv_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
189
4
{
190
4
  if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && 
output_bitmasks[0] == 1u2
)
191
2
    return 1;
192
  // Nominator can be null (meaning 1).
193
2
  if ((input_bitmasks[0] & 3u) == ((0u << 0) | (1u << 1)) && output_bitmasks[0] == 1u)
194
2
    return 1;
195
0
  return 0;
196
2
}
197
198
static int _ccv_nnc_ewdiv_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
199
72
{
200
72
  if ((input_bitmasks[0] & (15u & ~((uint64_t)1u << 1))) == ((1u << 0) | (0u << 1) | (1u << 2) | (1u << 3)) && 
output_bitmasks[0] == ((1u << 0) | (1u << 1))21
)
201
5
    return 1;
202
  // We don't need to know the original output.
203
67
  if ((input_bitmasks[0] & (15u & ~((uint64_t)1u << 1))) == ((1u << 0) | (0u << 1) | (1u << 2) | (0u << 3)) && 
output_bitmasks[0] == ((1u << 0) | (0u << 1))18
)
204
0
    return 1;
205
67
  if ((input_bitmasks[0] & (15u & ~((uint64_t)1u << 1))) == ((1u << 0) | (0u << 1) | (1u << 2) | (1u << 3)) && 
output_bitmasks[0] == ((0u << 0) | (1u << 1))16
)
206
16
    return 1;
207
51
  return 0;
208
67
}
209
210
static void _ccv_nnc_ewdiv_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
211
49
{
212
49
  assert(output_size >= 1);
213
49
  assert(input_size >= 2);
214
49
  int i;
215
98
  for (i = 0; i < output_size; 
i++49
)
216
49
    outputs[i] = inputs[1];
217
49
}
218
219
REGISTER_COMMAND(CCV_NNC_EWDIV_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
220
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
221
1
{
222
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
223
1
  registry->bitmask = _ccv_nnc_ewdiv_forw_bitmask;
224
1
  registry->tensor_auto = _ccv_nnc_ewdiv_tensor_auto_forw;
225
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
226
1
}
227
228
REGISTER_COMMAND(CCV_NNC_EWDIV_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
229
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
230
1
{
231
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
232
1
  registry->bitmask = _ccv_nnc_ewdiv_back_bitmask;
233
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
234
1
}
235
236
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWDIV_FORWARD)
237
#define CMD_EWDIV_FORWARD() ccv_nnc_cmd(CCV_NNC_EWDIV_FORWARD, 0, ccv_nnc_cmd_auto, 0)
238
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWDIV_BACKWARD)
239
#define CMD_EWDIV_BACKWARD() ccv_nnc_cmd(CCV_NNC_EWDIV_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
240
241
static int _ccv_nnc_ewexp_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
242
0
{
243
0
  if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 1u)
244
0
    return 1;
245
0
  return 0;
246
0
}
247
248
static int _ccv_nnc_ewexp_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
249
54
{
250
  // We don't care about the original input.
251
54
  if ((input_bitmasks[0] & (7u & ~((uint64_t)1u << 1))) == ((1u << 0) | (0u << 1) | (1u << 2)) && 
output_bitmasks[0] == 1u18
)
252
18
    return 1;
253
36
  return 0;
254
54
}
255
256
REGISTER_COMMAND(CCV_NNC_EWEXP_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
257
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
258
1
{
259
1
  registry->bitmask = _ccv_nnc_ewexp_forw_bitmask;
260
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
261
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
262
1
}
263
264
REGISTER_COMMAND(CCV_NNC_EWEXP_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
265
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
266
1
{
267
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
268
1
  registry->bitmask = _ccv_nnc_ewexp_back_bitmask;
269
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
270
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
271
1
}
272
273
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWEXP_FORWARD)
274
#define CMD_EWEXP_FORWARD() ccv_nnc_cmd(CCV_NNC_EWEXP_FORWARD, 0, ccv_nnc_cmd_auto, 0)
275
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWEXP_BACKWARD)
276
#define CMD_EWEXP_BACKWARD() ccv_nnc_cmd(CCV_NNC_EWEXP_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
277
278
static int _ccv_nnc_ewpow_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
279
2
{
280
2
  if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 1u)
281
2
    return 1;
282
0
  return 0;
283
2
}
284
285
static int _ccv_nnc_ewpow_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
286
9
{
287
  // We only care about the original input.
288
9
  if ((input_bitmasks[0] & (7u & ~((uint64_t)1u << 0) & ~((uint64_t)1u << 2))) == ((0u << 0) | (1u << 1) | (0u << 2)) && 
output_bitmasks[0] == 1u5
)
289
5
    return 1;
290
4
  return 0;
291
9
}
292
293
static void _ccv_nnc_ewpow_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
294
8
{
295
8
  assert(output_size >= 1);
296
8
  assert(input_size >= 1);
297
8
  int i;
298
16
  for (i = 0; i < output_size; 
i++8
)
299
8
    outputs[i] = inputs[0];
300
8
}
301
302
REGISTER_COMMAND(CCV_NNC_EWPOW_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
303
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
304
1
{
305
1
  registry->bitmask = _ccv_nnc_ewpow_forw_bitmask;
306
1
  registry->tensor_auto = _ccv_nnc_ewpow_tensor_auto_forw;
307
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
308
1
}
309
310
REGISTER_COMMAND(CCV_NNC_EWPOW_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
311
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu)
312
1
{
313
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
314
1
  registry->bitmask = _ccv_nnc_ewpow_back_bitmask;
315
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
316
1
}
317
318
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWPOW_FORWARD)
319
#define CMD_EWPOW_FORWARD(_exponent) ccv_nnc_cmd(CCV_NNC_EWPOW_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.pow={.exponent=_exponent}}), 0)
320
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWPOW_BACKWARD)
321
#define CMD_EWPOW_BACKWARD(_exponent) ccv_nnc_cmd(CCV_NNC_EWPOW_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.pow={.exponent=_exponent}}), 0)
322
323
static int _ccv_nnc_ewlog_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
324
2
{
325
2
  if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 1u)
326
2
    return 1;
327
0
  return 0;
328
2
}
329
330
static int _ccv_nnc_ewlog_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
331
1.32k
{
332
  // We don't care about the original output.
333
1.32k
  if ((input_bitmasks[0] & 3u) == 3u && 
output_bitmasks[0] == 1u442
)
334
442
    return 1;
335
878
  return 0;
336
1.32k
}
337
338
REGISTER_COMMAND(CCV_NNC_EWLOG_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
339
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
340
1
{
341
1
  registry->bitmask = _ccv_nnc_ewlog_forw_bitmask;
342
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
343
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
344
1
}
345
346
REGISTER_COMMAND(CCV_NNC_EWLOG_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
347
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
348
1
{
349
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
350
1
  registry->bitmask = _ccv_nnc_ewlog_back_bitmask;
351
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
352
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
353
1
}
354
355
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWLOG_FORWARD)
356
#define CMD_EWLOG_FORWARD() ccv_nnc_cmd(CCV_NNC_EWLOG_FORWARD, 0, ccv_nnc_cmd_auto, 0)
357
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWLOG_BACKWARD)
358
#define CMD_EWLOG_BACKWARD() ccv_nnc_cmd(CCV_NNC_EWLOG_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
359
360
static int _ccv_nnc_ewsqrt_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
361
0
{
362
0
  if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 1u)
363
0
    return 1;
364
0
  return 0;
365
0
}
366
367
static int _ccv_nnc_ewsqrt_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
368
6
{
369
  // We don't care about the original input.
370
6
  if ((input_bitmasks[0] & (7u & ~((uint64_t)1u << 1))) == ((1u << 0) | (0u << 1) | (1u << 2)) && 
output_bitmasks[0] == 1u2
)
371
2
    return 1;
372
4
  return 0;
373
6
}
374
375
REGISTER_COMMAND(CCV_NNC_EWSQRT_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
376
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
377
1
{
378
1
  registry->bitmask = _ccv_nnc_ewsqrt_forw_bitmask;
379
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
380
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
381
1
}
382
383
REGISTER_COMMAND(CCV_NNC_EWSQRT_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
384
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
385
1
{
386
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
387
1
  registry->bitmask = _ccv_nnc_ewsqrt_back_bitmask;
388
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
389
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
390
1
}
391
392
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWSQRT_FORWARD)
393
#define CMD_EWSQRT_FORWARD() ccv_nnc_cmd(CCV_NNC_EWSQRT_FORWARD, 0, ccv_nnc_cmd_auto, 0)
394
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWSQRT_BACKWARD)
395
#define CMD_EWSQRT_BACKWARD() ccv_nnc_cmd(CCV_NNC_EWSQRT_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
396
397
static int _ccv_nnc_ewsin_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
398
2
{
399
2
  if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 1u)
400
2
    return 1;
401
0
  return 0;
402
2
}
403
404
static int _ccv_nnc_ewsin_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
405
9
{
406
  // We only care about the original input.
407
9
  if ((input_bitmasks[0] & (7u & ~((uint64_t)1u << 0) & ~((uint64_t)1u << 2))) == ((0u << 0) | (1u << 1) | (0u << 2)) && 
output_bitmasks[0] == 1u5
)
408
5
    return 1;
409
4
  return 0;
410
9
}
411
412
REGISTER_COMMAND(CCV_NNC_EWSIN_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
413
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
414
1
{
415
1
  registry->bitmask = _ccv_nnc_ewsin_forw_bitmask;
416
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
417
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
418
1
}
419
420
REGISTER_COMMAND(CCV_NNC_EWSIN_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
421
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu)
422
1
{
423
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
424
1
  registry->bitmask = _ccv_nnc_ewsin_back_bitmask;
425
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
426
1
}
427
428
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWSIN_FORWARD)
429
#define CMD_EWSIN_FORWARD() ccv_nnc_cmd(CCV_NNC_EWSIN_FORWARD, 0, ccv_nnc_cmd_auto, 0)
430
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWSIN_BACKWARD)
431
#define CMD_EWSIN_BACKWARD() ccv_nnc_cmd(CCV_NNC_EWSIN_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
432
433
static int _ccv_nnc_ewcos_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
434
2
{
435
2
  if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 1u)
436
2
    return 1;
437
0
  return 0;
438
2
}
439
440
static int _ccv_nnc_ewcos_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
441
9
{
442
  // We only care about the original input.
443
9
  if ((input_bitmasks[0] & (7u & ~((uint64_t)1u << 0) & ~((uint64_t)1u << 2))) == ((0u << 0) | (1u << 1) | (0u << 2)) && 
output_bitmasks[0] == 1u5
)
444
5
    return 1;
445
4
  return 0;
446
9
}
447
448
REGISTER_COMMAND(CCV_NNC_EWCOS_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
449
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
450
1
{
451
1
  registry->bitmask = _ccv_nnc_ewcos_forw_bitmask;
452
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
453
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
454
1
}
455
456
REGISTER_COMMAND(CCV_NNC_EWCOS_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
457
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu)
458
1
{
459
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
460
1
  registry->bitmask = _ccv_nnc_ewcos_back_bitmask;
461
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
462
1
}
463
464
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWCOS_FORWARD)
465
#define CMD_EWCOS_FORWARD() ccv_nnc_cmd(CCV_NNC_EWCOS_FORWARD, 0, ccv_nnc_cmd_auto, 0)
466
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWCOS_BACKWARD)
467
#define CMD_EWCOS_BACKWARD() ccv_nnc_cmd(CCV_NNC_EWCOS_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
468
469
static int _ccv_nnc_ewabs_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
470
0
{
471
0
  if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 1u)
472
0
    return 1;
473
0
  return 0;
474
0
}
475
476
static int _ccv_nnc_ewabs_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
477
0
{
478
  // We only care about the original input.
479
0
  if ((input_bitmasks[0] & (7u & ~((uint64_t)1u << 2))) == ((1u << 0) | (1u << 1) | (0u << 2)) && output_bitmasks[0] == 1u)
480
0
    return 1;
481
0
  return 0;
482
0
}
483
484
REGISTER_COMMAND(CCV_NNC_EWABS_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
485
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
486
1
{
487
1
  registry->bitmask = _ccv_nnc_ewabs_forw_bitmask;
488
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
489
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
490
1
}
491
492
REGISTER_COMMAND(CCV_NNC_EWABS_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
493
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
494
1
{
495
1
  registry->bitmask = _ccv_nnc_ewabs_back_bitmask;
496
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
497
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
498
1
}
499
500
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWABS_FORWARD)
501
#define CMD_EWABS_FORWARD() ccv_nnc_cmd(CCV_NNC_EWABS_FORWARD, 0, ccv_nnc_cmd_auto, 0)
502
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_EWABS_BACKWARD)
503
#define CMD_EWABS_BACKWARD() ccv_nnc_cmd(CCV_NNC_EWABS_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
504
505
static int _ccv_nnc_clamp_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
506
0
{
507
0
  if ((input_bitmasks[0] & 1u) == 1u && output_bitmasks[0] == 1u)
508
0
    return 1;
509
0
  return 0;
510
0
}
511
512
static int _ccv_nnc_clamp_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
513
0
{
514
  // We don't care about the original input.
515
0
  if ((input_bitmasks[0] & (7u & ~((uint64_t)1u << 1))) == ((1u << 0) | (0u << 1) | (1u << 2)) && output_bitmasks[0] == 1u)
516
0
    return 1;
517
0
  return 0;
518
0
}
519
520
REGISTER_COMMAND(CCV_NNC_CLAMP_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
521
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
522
1
{
523
1
  registry->bitmask = _ccv_nnc_clamp_forw_bitmask;
524
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
525
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
526
1
}
527
528
REGISTER_COMMAND(CCV_NNC_CLAMP_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
529
  FIND_BACKEND(ccv_nnc_ew_cpu_ref.c, gpu/ccv_nnc_ew_gpu_ref.cu, mps/ccv_nnc_ew_mps.m)
530
1
{
531
1
  registry->flags = CCV_NNC_CMD_ATTR_NULL_IS_ONES;
532
1
  registry->bitmask = _ccv_nnc_clamp_back_bitmask;
533
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
534
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
535
1
}
536
537
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_CLAMP_FORWARD)
538
#define CMD_CLAMP_FORWARD(_min, _max) ccv_nnc_cmd(CCV_NNC_CLAMP_FORWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.clamp={.min=_min,.max=_max}}, 0)
539
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_CLAMP_BACKWARD)
540
#define CMD_CLAMP_BACKWARD(_min, _max) ccv_nnc_cmd(CCV_NNC_CLAMP_BACKWARD, 0, (ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.clamp={.min=_min,.max=_max}}, 0)