Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/comm/ccv_nnc_comm.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "nnc/ccv_nnc.h"
3
#include "nnc/ccv_nnc_internal.h"
4
5
static int _ccv_nnc_allreduce_allow_inplace(const ccv_nnc_cmd_param_t cmd, const int input_idx, const int input_size, const int output_idx, const int output_size)
6
1.11k
{
7
1.11k
  return input_idx == output_idx;
8
1.11k
}
9
10
static int _ccv_nnc_allreduce_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
11
0
{
12
0
  int i, j;
13
0
  int input_flag = 0;
14
0
  int input_bitcount = 0;
15
0
  for (i = 0; i < input_bitmask_size; i++)
16
0
  {
17
0
    for (j = 0; j < 64; j++)
18
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
19
0
      {
20
0
        if (input_flag)
21
0
          return 0;
22
0
      } else
23
0
        break;
24
0
    input_bitcount += j;
25
0
    if (j < 64)
26
0
      input_flag = 1;
27
    // Always like 1111100000, no 1110010101
28
0
    for (; j < 64; j++)
29
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
30
0
        return 0;
31
0
  }
32
0
  int output_flag = 0;
33
0
  int output_bitcount = 0;
34
0
  for (i = 0; i < output_bitmask_size; i++)
35
0
  {
36
0
    for (j = 0; j < 64; j++)
37
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
38
0
      {
39
0
        if (output_flag)
40
0
          return 0;
41
0
      } else
42
0
        break;
43
0
    output_bitcount += j;
44
0
    if (j < 64)
45
0
      output_flag = 1;
46
0
    for (; j < 64; j++)
47
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
48
0
        return 0;
49
0
  }
50
0
  return output_bitcount == input_bitcount && input_size == output_size && input_size == input_bitcount;
51
0
}
52
53
static int _ccv_nnc_allreduce_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
54
0
{
55
0
  int i, j;
56
0
  int input_flag = 0;
57
0
  int input_bitcount = 0;
58
0
  for (i = 0; i < input_bitmask_size; i++)
59
0
  {
60
0
    for (j = 0; j < 64; j++)
61
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
62
0
      {
63
0
        if (input_flag)
64
0
          return 0;
65
0
      } else
66
0
        break;
67
0
    input_bitcount += j;
68
0
    if (j < 64)
69
0
      input_flag = 1;
70
    // Always like 1111100000, no 1110010101
71
0
    for (; j < 64; j++)
72
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
73
0
        return 0;
74
0
  }
75
0
  int output_flag = 0;
76
0
  int output_bitcount = 0;
77
0
  for (i = 0; i < output_bitmask_size; i++)
78
0
  {
79
0
    for (j = 0; j < 64; j++)
80
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
81
0
      {
82
0
        if (output_flag)
83
0
          return 0;
84
0
      } else
85
0
        break;
86
0
    output_bitcount += j;
87
0
    if (j < 64)
88
0
      output_flag = 1;
89
0
    for (; j < 64; j++)
90
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
91
0
        return 0;
92
0
  }
93
0
  return output_bitcount <= input_bitcount && output_bitcount == output_size;
94
0
}
95
96
REGISTER_COMMAND(CCV_NNC_COMM_ALLREDUCE_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
97
  FIND_BACKEND(gpu/ccv_nnc_comm_gpu_nccl.cu)
98
1
{
99
1
  registry->bitmask = _ccv_nnc_allreduce_forw_bitmask;
100
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
101
1
  registry->allow_inplace = _ccv_nnc_allreduce_allow_inplace;
102
1
}
103
104
REGISTER_COMMAND(CCV_NNC_COMM_ALLREDUCE_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
105
  FIND_BACKEND(gpu/ccv_nnc_comm_gpu_nccl.cu)
106
1
{
107
1
  registry->bitmask = _ccv_nnc_allreduce_back_bitmask;
108
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
109
1
  registry->allow_inplace = _ccv_nnc_allreduce_allow_inplace;
110
1
}
111
112
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_COMM_ALLREDUCE_FORWARD)
113
#define CMD_COMM_ALLREDUCE_FORWARD() ccv_nnc_cmd(CCV_NNC_COMM_ALLREDUCE_FORWARD, 0, ccv_nnc_cmd_auto, 0)
114
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_COMM_ALLREDUCE_BACKWARD)
115
#define CMD_COMM_ALLREDUCE_BACKWARD() ccv_nnc_cmd(CCV_NNC_COMM_ALLREDUCE_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
116
117
static int _ccv_nnc_arbitary_inplace(const ccv_nnc_cmd_param_t cmd, const int input_idx, const int input_size, const int output_idx, const int output_size)
118
16
{
119
16
  return 1;
120
16
}
121
122
static int _ccv_nnc_broadcast_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
123
0
{
124
0
  int i, j;
125
0
  int input_flag = 0;
126
0
  int input_bitcount = 0;
127
0
  for (i = 0; i < input_bitmask_size; i++)
128
0
  {
129
0
    for (j = 0; j < 64; j++)
130
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
131
0
      {
132
0
        if (input_flag)
133
0
          return 0;
134
0
      } else
135
0
        break;
136
0
    input_bitcount += j;
137
0
    if (j < 64)
138
0
      input_flag = 1;
139
    // Always like 1111100000, no 1110010101
140
0
    for (; j < 64; j++)
141
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
142
0
        return 0;
143
0
  }
144
0
  int output_flag = 0;
145
0
  int output_bitcount = 0;
146
0
  for (i = 0; i < output_bitmask_size; i++)
147
0
  {
148
0
    for (j = 0; j < 64; j++)
149
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
150
0
      {
151
0
        if (output_flag)
152
0
          return 0;
153
0
      } else
154
0
        break;
155
0
    output_bitcount += j;
156
0
    if (j < 64)
157
0
      output_flag = 1;
158
0
    for (; j < 64; j++)
159
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
160
0
        return 0;
161
0
  }
162
0
  return output_bitcount >= 1 && output_bitcount == output_size && input_size == 1 && input_size == input_bitcount;
163
0
}
164
165
static int _ccv_nnc_broadcast_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
166
0
{
167
0
  int i, j;
168
0
  int input_flag = 0;
169
0
  int input_bitcount = 0;
170
0
  for (i = 0; i < input_bitmask_size; i++)
171
0
  {
172
0
    for (j = 0; j < 64; j++)
173
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
174
0
      {
175
0
        if (input_flag)
176
0
          return 0;
177
0
      } else
178
0
        break;
179
0
    input_bitcount += j;
180
0
    if (j < 64)
181
0
      input_flag = 1;
182
    // Always like 1111100000, no 1110010101
183
0
    for (; j < 64; j++)
184
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
185
0
        return 0;
186
0
  }
187
0
  int output_flag = 0;
188
0
  int output_bitcount = 0;
189
0
  for (i = 0; i < output_bitmask_size; i++)
190
0
  {
191
0
    for (j = 0; j < 64; j++)
192
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
193
0
      {
194
0
        if (output_flag)
195
0
          return 0;
196
0
      } else
197
0
        break;
198
0
    output_bitcount += j;
199
0
    if (j < 64)
200
0
      output_flag = 1;
201
0
    for (; j < 64; j++)
202
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
203
0
        return 0;
204
0
  }
205
0
  return input_bitcount >= 1 && input_bitcount == input_size && output_bitcount == output_size && output_size >= 1;
206
0
}
207
208
REGISTER_COMMAND(CCV_NNC_COMM_BROADCAST_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
209
  FIND_BACKEND(gpu/ccv_nnc_comm_gpu_nccl.cu)
210
1
{
211
1
  registry->bitmask = _ccv_nnc_broadcast_forw_bitmask;
212
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
213
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
214
1
}
215
216
REGISTER_COMMAND(CCV_NNC_COMM_BROADCAST_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
217
  FIND_BACKEND(gpu/ccv_nnc_comm_gpu_nccl.cu)
218
1
{
219
1
  registry->bitmask = _ccv_nnc_broadcast_back_bitmask;
220
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
221
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
222
1
}
223
224
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_COMM_BROADCAST_FORWARD)
225
#define CMD_COMM_BROADCAST_FORWARD() ccv_nnc_cmd(CCV_NNC_COMM_BROADCAST_FORWARD, 0, ccv_nnc_cmd_auto, 0)
226
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_COMM_ALLREDUCE_BACKWARD)
227
#define CMD_COMM_BROADCAST_BACKWARD() ccv_nnc_cmd(CCV_NNC_COMM_BROADCAST_BACKWARD, 0, ccv_nnc_cmd_auto, 0)
228
229
static int _ccv_nnc_first_inplace(const ccv_nnc_cmd_param_t cmd, const int input_idx, const int input_size, const int output_idx, const int output_size)
230
16
{
231
16
  return input_idx == output_idx && 
input_idx == 08
;
232
16
}
233
234
static int _ccv_nnc_reduce_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
235
0
{
236
0
  int i, j;
237
0
  int input_flag = 0;
238
0
  int input_bitcount = 0;
239
0
  for (i = 0; i < input_bitmask_size; i++)
240
0
  {
241
0
    for (j = 0; j < 64; j++)
242
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
243
0
      {
244
0
        if (input_flag)
245
0
          return 0;
246
0
      } else
247
0
        break;
248
0
    input_bitcount += j;
249
0
    if (j < 64)
250
0
      input_flag = 1;
251
    // Always like 1111100000, no 1110010101
252
0
    for (; j < 64; j++)
253
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
254
0
        return 0;
255
0
  }
256
0
  int output_flag = 0;
257
0
  int output_bitcount = 0;
258
0
  for (i = 0; i < output_bitmask_size; i++)
259
0
  {
260
0
    for (j = 0; j < 64; j++)
261
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
262
0
      {
263
0
        if (output_flag)
264
0
          return 0;
265
0
      } else
266
0
        break;
267
0
    output_bitcount += j;
268
0
    if (j < 64)
269
0
      output_flag = 1;
270
0
    for (; j < 64; j++)
271
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
272
0
        return 0;
273
0
  }
274
0
  return output_size == input_size && output_bitcount == output_size && input_size >= 1 && input_size == input_bitcount;
275
0
}
276
277
static int _ccv_nnc_reduce_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
278
0
{
279
0
  int i, j;
280
0
  int input_flag = 0;
281
0
  int input_bitcount = 0;
282
0
  for (i = 0; i < input_bitmask_size; i++)
283
0
  {
284
0
    for (j = 0; j < 64; j++)
285
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
286
0
      {
287
0
        if (input_flag)
288
0
          return 0;
289
0
      } else
290
0
        break;
291
0
    input_bitcount += j;
292
0
    if (j < 64)
293
0
      input_flag = 1;
294
    // Always like 1111100000, no 1110010101
295
0
    for (; j < 64; j++)
296
0
      if (input_bitmasks[i] & (uint64_t)1 << j)
297
0
        return 0;
298
0
  }
299
0
  int output_flag = 0;
300
0
  int output_bitcount = 0;
301
0
  for (i = 0; i < output_bitmask_size; i++)
302
0
  {
303
0
    for (j = 0; j < 64; j++)
304
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
305
0
      {
306
0
        if (output_flag)
307
0
          return 0;
308
0
      } else
309
0
        break;
310
0
    output_bitcount += j;
311
0
    if (j < 64)
312
0
      output_flag = 1;
313
0
    for (; j < 64; j++)
314
0
      if (output_bitmasks[i] & (uint64_t)1 << j)
315
0
        return 0;
316
0
  }
317
0
  return input_bitcount >= 1 && input_bitcount == input_size && output_bitcount == output_size && output_size >= 1;
318
0
}
319
320
REGISTER_COMMAND(CCV_NNC_COMM_REDUCE_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
321
  FIND_BACKEND(gpu/ccv_nnc_comm_gpu_nccl.cu)
322
1
{
323
1
  registry->bitmask = _ccv_nnc_reduce_forw_bitmask;
324
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_forward_from_inputs;
325
1
  registry->allow_inplace = _ccv_nnc_first_inplace;
326
1
}
327
328
REGISTER_COMMAND(CCV_NNC_COMM_REDUCE_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
329
  FIND_BACKEND(gpu/ccv_nnc_comm_gpu_nccl.cu)
330
1
{
331
1
  registry->bitmask = _ccv_nnc_reduce_back_bitmask;
332
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient;
333
1
  registry->allow_inplace = _ccv_nnc_arbitary_inplace;
334
1
}
335
336
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_COMM_REDUCE_FORWARD)
337
#define CMD_COMM_REDUCE_FORWARD() ccv_nnc_cmd(CCV_NNC_COMM_REDUCE_FORWARD, 0, ccv_nnc_cmd_auto, 0)
338
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_COMM_REDUCE_BACKWARD)
339
#define CMD_COMM_REDUCE_BACKWARD() ccv_nnc_cmd(CCV_NNC_COMM_REDUCE_BACKWARD, 0, ccv_nnc_cmd_auto, 0)