Coverage Report

Created: 2021-04-14 19:30

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/dropout/ccv_nnc_dropout_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
#include "3rdparty/dsfmt/dSFMT.h"
13
14
// Shared methods.
15
#include "../_ccv_nnc_cpu_ref.h"
16
17
static int _ccv_nnc_dropout_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
18
4
{
19
4
  const float p = cmd.info.dropout.p;
20
4
  const float inv_p = 1. / (1. - p);
21
4
  assert(output_size >= 2);
22
4
  // Assuming this is float 32.
23
4
  int dim[CCV_NNC_MAX_DIM_ALLOC];
24
4
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
25
4
  int binc[CCV_NNC_MAX_DIM_ALLOC];
26
4
  ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
27
4
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
28
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
29
4
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
30
4
  ccv_nnc_tensor_view_get_dim(a, dim);
31
4
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
32
4
  const int tensor_count = ccv_nnc_tensor_count(inputs[0]->info);
33
4
  uint8_t* const maskdata = outputs[1]->data.u8;
34
4
  dsfmt_t dsfmt;
35
4
  dsfmt_init_gen_rand(&dsfmt, (uint32_t)a->data.i32[0]);
36
4
  int x;
37
4
  if (cmd.info.dropout.entirety)
38
2
  {
39
2
    const int32_t drop = ((int32_t*)maskdata)[0] = (dsfmt_genrand_open_close(&dsfmt) <= p);
40
2
    if (!CCV_IS_TENSOR_VIEW(a) && !CCV_IS_TENSOR_VIEW(b))
41
2
    {
42
2
      // Super optimal case, just do one for-loop for sum.
43
2.00k
      for (x = 0; x < tensor_count; 
x++2.00k
)
44
2.00k
        b->data.f32[x] = drop ? 0 : 
a->data.f32[x] * inv_p0
;
45
2
      return CCV_NNC_EXEC_SUCCESS;
46
2
    }
47
0
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
48
0
    ccv_nnc_tensor_view_get_inc(a, ainc);
49
0
    ccv_nnc_tensor_view_get_inc(b, binc);
50
0
    int i[CCV_NNC_MAX_DIM + 2];
51
0
    float* ap = a->data.f32;
52
0
    float* bp = b->data.f32;
53
0
    const int count = dim[2] * dim[3];
54
0
    if (ainc[3] == dim[3] && binc[3] == dim[3])
55
0
    {
56
0
      // Special casing if the ainc[3] is the same as dim[3]
57
0
      for (i[0] = 0; i[0] < dim[0]; i[0]++)
58
0
      {
59
0
        for (i[1] = 0; i[1] < dim[1]; i[1]++)
60
0
        {
61
0
          for (x = 0; x < count; x++)
62
0
            bp[x] = drop ? 0 : ap[x] * inv_p;
63
0
          ap += ainc[2] * ainc[3];
64
0
          bp += binc[2] * binc[3];
65
0
        }
66
0
        ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
67
0
        bp += (binc[1] - dim[1]) * binc[2] * binc[3];
68
0
      }
69
0
      return CCV_NNC_EXEC_SUCCESS;
70
0
    }
71
0
    // Non-optimal case, need to do skip copy.
72
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
73
0
    {
74
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
75
0
      {
76
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
77
0
        {
78
0
          for (x = 0; x < dim[3]; x++)
79
0
            bp[x] = drop ? 0 : ap[x] * inv_p;
80
0
          ap += ainc[3];
81
0
          bp += binc[3];
82
0
        }
83
0
        ap += (ainc[2] - dim[2]) * ainc[3];
84
0
        bp += (binc[2] - dim[2]) * binc[3];
85
0
      }
86
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
87
0
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
88
0
    }
89
2
  } else {
90
2
    uint8_t* maskp = maskdata + (tensor_count - 1);
91
2.00k
    for (; maskp >= maskdata; 
--maskp2.00k
)
92
2.00k
      *maskp = (dsfmt_genrand_open_close(&dsfmt) <= p);
93
2
    if (!CCV_IS_TENSOR_VIEW(a) && !CCV_IS_TENSOR_VIEW(b))
94
2
    {
95
2
      // Super optimal case, just do one for-loop for sum.
96
2.00k
      for (x = 0; x < tensor_count; 
x++2.00k
)
97
2.00k
        b->data.f32[x] = maskdata[x] ? 
0826
:
a->data.f32[x] * inv_p1.17k
;
98
2
      return CCV_NNC_EXEC_SUCCESS;
99
2
    }
100
0
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
101
0
    ccv_nnc_tensor_view_get_inc(a, ainc);
102
0
    ccv_nnc_tensor_view_get_inc(b, binc);
103
0
    int i[CCV_NNC_MAX_DIM + 2];
104
0
    float* ap = a->data.f32;
105
0
    float* bp = b->data.f32;
106
0
    const int count = dim[2] * dim[3];
107
0
    maskp = maskdata;
108
0
    if (ainc[3] == dim[3] && binc[3] == dim[3])
109
0
    {
110
0
      // Special casing if the ainc[3] is the same as dim[3]
111
0
      for (i[0] = 0; i[0] < dim[0]; i[0]++)
112
0
      {
113
0
        for (i[1] = 0; i[1] < dim[1]; i[1]++)
114
0
        {
115
0
          for (x = 0; x < count; x++)
116
0
            bp[x] = maskp[x] ? 0 : ap[x] * inv_p;
117
0
          ap += ainc[2] * ainc[3];
118
0
          bp += binc[2] * binc[3];
119
0
          maskp += count;
120
0
        }
121
0
        ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
122
0
        bp += (binc[1] - dim[1]) * binc[2] * binc[3];
123
0
      }
124
0
      return CCV_NNC_EXEC_SUCCESS;
125
0
    }
126
0
    // Non-optimal case, need to do skip copy.
127
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
128
0
    {
129
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
130
0
      {
131
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
132
0
        {
133
0
          for (x = 0; x < dim[3]; x++)
134
0
            bp[x] = maskp[x] ? 0 : ap[x] * inv_p;
135
0
          maskp += dim[3];
136
0
          ap += ainc[3];
137
0
          bp += binc[3];
138
0
        }
139
0
        ap += (ainc[2] - dim[2]) * ainc[3];
140
0
        bp += (binc[2] - dim[2]) * binc[3];
141
0
      }
142
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
143
0
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
144
0
    }
145
0
  }
146
4
  
return CCV_NNC_EXEC_SUCCESS0
;
147
4
}
148
149
static int _ccv_nnc_dropout_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
150
2
{
151
2
  assert(input_size == 5);
152
2
  const float p = cmd.info.dropout.p;
153
2
  const float inv_p = 1. / (1. - p);
154
2
  uint8_t* const maskdata = inputs[4]->data.u8;
155
2
  // Assuming this is float 32.
156
2
  int dim[CCV_NNC_MAX_DIM_ALLOC];
157
2
  int ginc[CCV_NNC_MAX_DIM_ALLOC];
158
2
  int hinc[CCV_NNC_MAX_DIM_ALLOC];
159
2
  ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
160
2
  ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
161
2
  assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2);
162
2
  assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2);
163
2
  ccv_nnc_tensor_view_get_dim(g, dim);
164
2
  assert(ccv_nnc_tensor_view_check_dim(h, dim));
165
2
  int x;
166
2
  if (cmd.info.dropout.entirety)
167
1
  {
168
1
    const int32_t drop = ((int32_t*)maskdata)[0];
169
1
    if (!CCV_IS_TENSOR_VIEW(g) && !CCV_IS_TENSOR_VIEW(h))
170
1
    {
171
1
      // Super optimal case, just do one for-loop for sum.
172
1
      const int tensor_count = ccv_nnc_tensor_count(inputs[0]->info);
173
1.00k
      for (x = 0; x < tensor_count; 
x++1.00k
)
174
1.00k
        h->data.f32[x] = drop ? 0 : 
g->data.f32[x] * inv_p0
;
175
1
      return CCV_NNC_EXEC_SUCCESS;
176
1
    }
177
0
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
178
0
    ccv_nnc_tensor_view_get_inc(g, ginc);
179
0
    ccv_nnc_tensor_view_get_inc(h, hinc);
180
0
    int i[CCV_NNC_MAX_DIM + 2];
181
0
    float* gp = g->data.f32;
182
0
    float* hp = h->data.f32;
183
0
    const int count = dim[2] * dim[3];
184
0
    if (ginc[3] == dim[3] && hinc[3] == dim[3])
185
0
    {
186
0
      // Special casing if the ginc[3] is the same as dim[3]
187
0
      for (i[0] = 0; i[0] < dim[0]; i[0]++)
188
0
      {
189
0
        for (i[1] = 0; i[1] < dim[1]; i[1]++)
190
0
        {
191
0
          for (x = 0; x < count; x++)
192
0
            hp[x] = drop ? 0 : gp[x] * inv_p;
193
0
          gp += ginc[2] * ginc[3];
194
0
          hp += hinc[2] * hinc[3];
195
0
        }
196
0
        gp += (ginc[1] - dim[1]) * ginc[2] * ginc[3];
197
0
        hp += (hinc[1] - dim[1]) * hinc[2] * hinc[3];
198
0
      }
199
0
      return CCV_NNC_EXEC_SUCCESS;
200
0
    }
201
0
    // Non-optimal case, need to do skip copy.
202
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
203
0
    {
204
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
205
0
      {
206
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
207
0
        {
208
0
          for (x = 0; x < dim[3]; x++)
209
0
            hp[x] = drop ? 0 : gp[x] * inv_p;
210
0
          gp += ginc[3];
211
0
          hp += hinc[3];
212
0
        }
213
0
        gp += (ginc[2] - dim[2]) * ginc[3];
214
0
        hp += (hinc[2] - dim[2]) * hinc[3];
215
0
      }
216
0
      gp += (ginc[1] - dim[1]) * ginc[2] * ginc[3];
217
0
      hp += (hinc[1] - dim[1]) * hinc[2] * hinc[3];
218
0
    }
219
1
  } else {
220
1
    if (!CCV_IS_TENSOR_VIEW(g) && !CCV_IS_TENSOR_VIEW(h))
221
1
    {
222
1
      // Super optimal case, just do one for-loop for sum.
223
1
      const int tensor_count = ccv_nnc_tensor_count(inputs[0]->info);
224
1.00k
      for (x = 0; x < tensor_count; 
x++1.00k
)
225
1.00k
        h->data.f32[x] = maskdata[x] ? 
0413
:
g->data.f32[x] * inv_p587
;
226
1
      return CCV_NNC_EXEC_SUCCESS;
227
1
    }
228
0
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
229
0
    ccv_nnc_tensor_view_get_inc(g, ginc);
230
0
    ccv_nnc_tensor_view_get_inc(h, hinc);
231
0
    int i[CCV_NNC_MAX_DIM + 2];
232
0
    float* gp = g->data.f32;
233
0
    float* hp = h->data.f32;
234
0
    const int count = dim[2] * dim[3];
235
0
    uint8_t* maskp = maskdata;
236
0
    if (ginc[3] == dim[3] && hinc[3] == dim[3])
237
0
    {
238
0
      // Special casing if the ginc[3] is the same as dim[3]
239
0
      for (i[0] = 0; i[0] < dim[0]; i[0]++)
240
0
      {
241
0
        for (i[1] = 0; i[1] < dim[1]; i[1]++)
242
0
        {
243
0
          for (x = 0; x < count; x++)
244
0
            hp[x] = maskp[x] ? 0 : gp[x] * inv_p;
245
0
          gp += ginc[2] * ginc[3];
246
0
          hp += hinc[2] * hinc[3];
247
0
          maskp += count;
248
0
        }
249
0
        gp += (ginc[1] - dim[1]) * ginc[2] * ginc[3];
250
0
        hp += (hinc[1] - dim[1]) * hinc[2] * hinc[3];
251
0
      }
252
0
      return CCV_NNC_EXEC_SUCCESS;
253
0
    }
254
0
    // Non-optimal case, need to do skip copy.
255
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
256
0
    {
257
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
258
0
      {
259
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
260
0
        {
261
0
          for (x = 0; x < dim[3]; x++)
262
0
            hp[x] = maskp[x] ? 0 : gp[x] * inv_p;
263
0
          maskp += dim[3];
264
0
          gp += ginc[3];
265
0
          hp += hinc[3];
266
0
        }
267
0
        gp += (ginc[2] - dim[2]) * ginc[3];
268
0
        hp += (hinc[2] - dim[2]) * hinc[3];
269
0
      }
270
0
      gp += (ginc[1] - dim[1]) * ginc[2] * ginc[3];
271
0
      hp += (hinc[1] - dim[1]) * hinc[2] * hinc[3];
272
0
    }
273
0
  }
274
2
  
return CCV_NNC_EXEC_SUCCESS0
;
275
2
}
276
277
REGISTER_COMMAND_BACKEND(CCV_NNC_DROPOUT_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
278
1
{
279
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
280
1
  registry->tensor_datatypes = CCV_32F;
281
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
282
1
  registry->algorithms = 1;
283
1
  registry->exec = _ccv_nnc_dropout_forw;
284
1
}
285
286
REGISTER_COMMAND_BACKEND(CCV_NNC_DROPOUT_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
287
1
{
288
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
289
1
  registry->tensor_datatypes = CCV_32F;
290
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
291
1
  registry->algorithms = 1;
292
1
  registry->exec = _ccv_nnc_dropout_back;
293
1
}