Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/dropout/ccv_nnc_dropout_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
#include "3rdparty/dsfmt/dSFMT.h"
13
14
// Shared methods.
15
#include "../_ccv_nnc_cpu_ref.h"
16
17
static int _ccv_nnc_dropout_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
18
4
{
19
4
  const float p = cmd.info.dropout.p;
20
4
  const float inv_p = 1. / (1. - p);
21
4
  assert(output_size >= 2);
22
  // Assuming this is float 32.
23
4
  int dim[CCV_NNC_MAX_DIM_ALLOC];
24
4
  int astride[CCV_NNC_MAX_DIM_ALLOC];
25
4
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
26
4
  ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
27
4
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
28
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
29
4
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
30
4
  ccv_nnc_tensor_view_get_dim(a, dim);
31
4
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
32
4
  const int tensor_count = ccv_nnc_tensor_count(inputs[0]->info);
33
4
  uint8_t* const maskdata = outputs[1]->data.u8;
34
4
  dsfmt_t dsfmt;
35
4
  dsfmt_init_gen_rand(&dsfmt, ccv_nnc_stream_context_genrand_uint32(stream_context));
36
4
  int x;
37
4
  if (cmd.info.dropout.entirety)
38
2
  {
39
2
    const int32_t drop = ((int32_t*)maskdata)[0] = (dsfmt_genrand_open_close(&dsfmt) <= p);
40
2
    if (!CCV_IS_TENSOR_VIEW(a) && !CCV_IS_TENSOR_VIEW(b))
41
2
    {
42
      // Super optimal case, just do one for-loop for sum.
43
2.00k
      for (x = 0; x < tensor_count; 
x++2.00k
)
44
2.00k
        b->data.f32[x] = drop ? 
00
: a->data.f32[x] * inv_p;
45
2
      return CCV_NNC_EXEC_SUCCESS;
46
2
    }
47
2
    assert
(CCV_NNC_MAX_DIM == 2)0
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
48
0
    ccv_nnc_tensor_view_get_stride(a, astride);
49
0
    ccv_nnc_tensor_view_get_stride(b, bstride);
50
0
    int i[CCV_NNC_MAX_DIM + 2];
51
0
    float* const ap = a->data.f32;
52
0
    float* const bp = b->data.f32;
53
0
    const int count = dim[2] * dim[3];
54
0
    if (astride[2] == dim[3] && bstride[2] == dim[3])
55
0
    {
56
      // Special casing if the ainc[3] is the same as dim[3]
57
0
      for (i[0] = 0; i[0] < dim[0]; i[0]++)
58
0
      {
59
0
        float* ap0 = ap + i[0] * astride[0];
60
0
        float* bp0 = bp + i[0] * bstride[0];
61
0
        for (i[1] = 0; i[1] < dim[1]; i[1]++)
62
0
        {
63
0
          for (x = 0; x < count; x++)
64
0
            bp0[x] = drop ? 0 : ap0[x] * inv_p;
65
0
          ap0 += astride[1];
66
0
          bp0 += bstride[1];
67
0
        }
68
0
      }
69
0
      return CCV_NNC_EXEC_SUCCESS;
70
0
    }
71
    // Non-optimal case, need to do skip copy.
72
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
73
0
    {
74
0
      float* const ap0 = ap + i[0] * astride[0];
75
0
      float* const bp0 = bp + i[0] * bstride[0];
76
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
77
0
      {
78
0
        float* ap1 = ap0 + i[1] * astride[1];
79
0
        float* bp1 = bp0 + i[1] * bstride[1];
80
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
81
0
        {
82
0
          for (x = 0; x < dim[3]; x++)
83
0
            bp1[x] = drop ? 0 : ap1[x] * inv_p;
84
0
          ap1 += astride[2];
85
0
          bp1 += bstride[2];
86
0
        }
87
0
      }
88
0
    }
89
2
  } else {
90
2
    uint8_t* maskp = maskdata + (tensor_count - 1);
91
2.00k
    for (; maskp >= maskdata; 
--maskp2.00k
)
92
2.00k
      *maskp = (dsfmt_genrand_open_close(&dsfmt) <= p);
93
2
    if (!CCV_IS_TENSOR_VIEW(a) && !CCV_IS_TENSOR_VIEW(b))
94
2
    {
95
      // Super optimal case, just do one for-loop for sum.
96
2.00k
      for (x = 0; x < tensor_count; 
x++2.00k
)
97
2.00k
        b->data.f32[x] = maskdata[x] ? 
0794
:
a->data.f32[x] * inv_p1.20k
;
98
2
      return CCV_NNC_EXEC_SUCCESS;
99
2
    }
100
2
    assert
(CCV_NNC_MAX_DIM == 2)0
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
101
0
    ccv_nnc_tensor_view_get_stride(a, astride);
102
0
    ccv_nnc_tensor_view_get_stride(b, bstride);
103
0
    int i[CCV_NNC_MAX_DIM + 2];
104
0
    float* const ap = a->data.f32;
105
0
    float* const bp = b->data.f32;
106
0
    const int count = dim[2] * dim[3];
107
0
    maskp = maskdata;
108
0
    if (astride[2] == dim[3] && bstride[2] == dim[3])
109
0
    {
110
      // Special casing if the ainc[3] is the same as dim[3]
111
0
      for (i[0] = 0; i[0] < dim[0]; i[0]++)
112
0
      {
113
0
        float* ap0 = ap + i[0] * astride[0];
114
0
        float* bp0 = bp + i[0] * bstride[0];
115
0
        for (i[1] = 0; i[1] < dim[1]; i[1]++)
116
0
        {
117
0
          for (x = 0; x < count; x++)
118
0
            bp0[x] = maskp[x] ? 0 : ap0[x] * inv_p;
119
0
          ap0 += astride[1];
120
0
          bp0 += bstride[1];
121
0
          maskp += count;
122
0
        }
123
0
      }
124
0
      return CCV_NNC_EXEC_SUCCESS;
125
0
    }
126
    // Non-optimal case, need to do skip copy.
127
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
128
0
    {
129
0
      float* const ap0 = ap + i[0] * astride[0];
130
0
      float* const bp0 = bp + i[0] * bstride[0];
131
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
132
0
      {
133
0
        float* ap1 = ap0 + i[1] * astride[1];
134
0
        float* bp1 = bp0 + i[1] * bstride[1];
135
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
136
0
        {
137
0
          for (x = 0; x < dim[3]; x++)
138
0
            bp1[x] = maskp[x] ? 0 : ap1[x] * inv_p;
139
0
          maskp += dim[3];
140
0
          ap1 += astride[2];
141
0
          bp1 += bstride[2];
142
0
        }
143
0
      }
144
0
    }
145
0
  }
146
0
  return CCV_NNC_EXEC_SUCCESS;
147
4
}
148
149
static int _ccv_nnc_dropout_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
150
2
{
151
2
  assert(input_size == 5);
152
2
  const float p = cmd.info.dropout.p;
153
2
  const float inv_p = 1. / (1. - p);
154
2
  uint8_t* const maskdata = inputs[4]->data.u8;
155
  // Assuming this is float 32.
156
2
  int dim[CCV_NNC_MAX_DIM_ALLOC];
157
2
  int gstride[CCV_NNC_MAX_DIM_ALLOC];
158
2
  int hstride[CCV_NNC_MAX_DIM_ALLOC];
159
2
  ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
160
2
  ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
161
2
  assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2);
162
2
  assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2);
163
2
  ccv_nnc_tensor_view_get_dim(g, dim);
164
2
  assert(ccv_nnc_tensor_view_check_dim(h, dim));
165
2
  int x;
166
2
  if (cmd.info.dropout.entirety)
167
1
  {
168
1
    const int32_t drop = ((int32_t*)maskdata)[0];
169
1
    if (!CCV_IS_TENSOR_VIEW(g) && !CCV_IS_TENSOR_VIEW(h))
170
1
    {
171
      // Super optimal case, just do one for-loop for sum.
172
1
      const int tensor_count = ccv_nnc_tensor_count(inputs[0]->info);
173
1.00k
      for (x = 0; x < tensor_count; 
x++1.00k
)
174
1.00k
        h->data.f32[x] = drop ? 
00
: g->data.f32[x] * inv_p;
175
1
      return CCV_NNC_EXEC_SUCCESS;
176
1
    }
177
1
    assert
(CCV_NNC_MAX_DIM == 2)0
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
178
0
    ccv_nnc_tensor_view_get_stride(g, gstride);
179
0
    ccv_nnc_tensor_view_get_stride(h, hstride);
180
0
    int i[CCV_NNC_MAX_DIM + 2];
181
0
    float* const gp = g->data.f32;
182
0
    float* const hp = h->data.f32;
183
0
    const int count = dim[2] * dim[3];
184
0
    if (gstride[2] == dim[3] && hstride[2] == dim[3])
185
0
    {
186
      // Special casing if the ginc[3] is the same as dim[3]
187
0
      for (i[0] = 0; i[0] < dim[0]; i[0]++)
188
0
      {
189
0
        float* gp0 = gp + i[0] * gstride[0];
190
0
        float* hp0 = hp + i[0] * hstride[0];
191
0
        for (i[1] = 0; i[1] < dim[1]; i[1]++)
192
0
        {
193
0
          for (x = 0; x < count; x++)
194
0
            hp0[x] = drop ? 0 : gp0[x] * inv_p;
195
0
          gp0 += gstride[1];
196
0
          hp0 += hstride[1];
197
0
        }
198
0
      }
199
0
      return CCV_NNC_EXEC_SUCCESS;
200
0
    }
201
    // Non-optimal case, need to do skip copy.
202
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
203
0
    {
204
0
      float* const gp0 = gp + i[0] * gstride[0];
205
0
      float* const hp0 = hp + i[0] * hstride[0];
206
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
207
0
      {
208
0
        float* gp1 = gp0 + i[1] * gstride[1];
209
0
        float* hp1 = hp0 + i[1] * hstride[1];
210
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
211
0
        {
212
0
          for (x = 0; x < dim[3]; x++)
213
0
            hp1[x] = drop ? 0 : gp1[x] * inv_p;
214
0
          gp1 += gstride[2];
215
0
          hp1 += hstride[2];
216
0
        }
217
0
      }
218
0
    }
219
1
  } else {
220
1
    if (!CCV_IS_TENSOR_VIEW(g) && !CCV_IS_TENSOR_VIEW(h))
221
1
    {
222
      // Super optimal case, just do one for-loop for sum.
223
1
      const int tensor_count = ccv_nnc_tensor_count(inputs[0]->info);
224
1.00k
      for (x = 0; x < tensor_count; 
x++1.00k
)
225
1.00k
        h->data.f32[x] = maskdata[x] ? 
0399
:
g->data.f32[x] * inv_p601
;
226
1
      return CCV_NNC_EXEC_SUCCESS;
227
1
    }
228
1
    assert
(CCV_NNC_MAX_DIM == 2)0
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
229
0
    ccv_nnc_tensor_view_get_stride(g, gstride);
230
0
    ccv_nnc_tensor_view_get_stride(h, hstride);
231
0
    int i[CCV_NNC_MAX_DIM + 2];
232
0
    float* const gp = g->data.f32;
233
0
    float* const hp = h->data.f32;
234
0
    const int count = dim[2] * dim[3];
235
0
    uint8_t* maskp = maskdata;
236
0
    if (gstride[2] == dim[3] && hstride[2] == dim[3])
237
0
    {
238
      // Special casing if the ginc[3] is the same as dim[3]
239
0
      for (i[0] = 0; i[0] < dim[0]; i[0]++)
240
0
      {
241
0
        float* gp0 = gp + i[0] * gstride[0];
242
0
        float* hp0 = hp + i[0] * hstride[0];
243
0
        for (i[1] = 0; i[1] < dim[1]; i[1]++)
244
0
        {
245
0
          for (x = 0; x < count; x++)
246
0
            hp0[x] = maskp[x] ? 0 : gp0[x] * inv_p;
247
0
          gp0 += gstride[1];
248
0
          hp0 += hstride[1];
249
0
          maskp += count;
250
0
        }
251
0
      }
252
0
      return CCV_NNC_EXEC_SUCCESS;
253
0
    }
254
    // Non-optimal case, need to do skip copy.
255
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
256
0
    {
257
0
      float* const gp0 = gp + i[0] * gstride[0];
258
0
      float* const hp0 = hp + i[0] * hstride[0];
259
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
260
0
      {
261
0
        float* gp1 = gp0 + i[1] * gstride[1];
262
0
        float* hp1 = hp0 + i[1] * hstride[1];
263
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
264
0
        {
265
0
          for (x = 0; x < dim[3]; x++)
266
0
            hp1[x] = maskp[x] ? 0 : gp1[x] * inv_p;
267
0
          maskp += dim[3];
268
0
          gp1 += gstride[2];
269
0
          hp1 += hstride[2];
270
0
        }
271
0
      }
272
0
    }
273
0
  }
274
0
  return CCV_NNC_EXEC_SUCCESS;
275
2
}
276
277
REGISTER_COMMAND_BACKEND(CCV_NNC_DROPOUT_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
278
1
{
279
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
280
1
  registry->tensor_datatypes = CCV_32F;
281
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
282
1
  registry->algorithms = 1;
283
1
  registry->exec = _ccv_nnc_dropout_forw;
284
1
}
285
286
REGISTER_COMMAND_BACKEND(CCV_NNC_DROPOUT_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
287
1
{
288
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
289
1
  registry->tensor_datatypes = CCV_32F;
290
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
291
1
  registry->algorithms = 1;
292
1
  registry->exec = _ccv_nnc_dropout_back;
293
1
}