Coverage Report

Created: 2021-04-14 04:30

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/compare/ccv_nnc_max_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_max_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
4
{
15
4
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
16
4
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[1];
17
4
  ccv_nnc_tensor_view_t* const c = (ccv_nnc_tensor_view_t*)outputs[0];
18
4
  // Assuming this is float 32.
19
4
  int dim[CCV_NNC_MAX_DIM_ALLOC];
20
4
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
21
4
  int binc[CCV_NNC_MAX_DIM_ALLOC];
22
4
  int cinc[CCV_NNC_MAX_DIM_ALLOC];
23
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
24
4
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
25
4
  assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2);
26
4
  ccv_nnc_tensor_view_get_dim(a, dim);
27
4
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
28
4
  assert(ccv_nnc_tensor_view_check_dim(c, dim));
29
4
  int x;
30
4
  if (!CCV_IS_TENSOR_VIEW(a) && 
!3
CCV_IS_TENSOR_VIEW3
(b) &&
!3
CCV_IS_TENSOR_VIEW3
(c))
31
4
  {
32
3
    // Super optimal case, just do one for-loop for sum.
33
3
    const int tensor_count = ccv_nnc_tensor_count(a->info);
34
1.02k
    for (x = 0; x < tensor_count; 
x++1.02k
)
35
1.02k
      c->data.f32[x] = ccv_max(a->data.f32[x], b->data.f32[x]);
36
3
    return CCV_NNC_EXEC_SUCCESS;
37
3
  }
38
1
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
39
1
  ccv_nnc_tensor_view_get_inc(a, ainc);
40
1
  ccv_nnc_tensor_view_get_inc(b, binc);
41
1
  ccv_nnc_tensor_view_get_inc(c, cinc);
42
1
  int i[CCV_NNC_MAX_DIM + 2];
43
1
  float* ap = a->data.f32;
44
1
  float* bp = b->data.f32;
45
1
  float* cp = c->data.f32;
46
1
  const int count = dim[2] * dim[3];
47
1
  if (ainc[3] == dim[3] && binc[3] == dim[3] && cinc[3] == dim[3])
48
1
  {
49
1
    // Special casing if the ainc[3] is the same as dim[3]
50
2
    for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
51
1
    {
52
3
      for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
53
2
      {
54
14
        for (x = 0; x < count; 
x++12
)
55
12
          cp[x] = ccv_max(ap[x], bp[x]);
56
2
        ap += ainc[2] * ainc[3];
57
2
        bp += binc[2] * binc[3];
58
2
        cp += cinc[2] * cinc[3];
59
2
      }
60
1
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
61
1
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
62
1
      cp += (cinc[1] - dim[1]) * cinc[2] * cinc[3];
63
1
    }
64
1
    return CCV_NNC_EXEC_SUCCESS;
65
1
  }
66
0
  // Non-optimal case, need to do skip copy.
67
0
  for (i[0] = 0; i[0] < dim[0]; i[0]++)
68
0
  {
69
0
    for (i[1] = 0; i[1] < dim[1]; i[1]++)
70
0
    {
71
0
      for (i[2] = 0; i[2] < dim[2]; i[2]++)
72
0
      {
73
0
        for (x = 0; x < dim[3]; x++)
74
0
          cp[x] = ccv_max(ap[x], bp[x]);
75
0
        ap += ainc[3];
76
0
        bp += binc[3];
77
0
        cp += cinc[3];
78
0
      }
79
0
      ap += (ainc[2] - dim[2]) * ainc[3];
80
0
      bp += (binc[2] - dim[2]) * binc[3];
81
0
      cp += (cinc[2] - dim[2]) * cinc[3];
82
0
    }
83
0
    ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
84
0
    bp += (binc[1] - dim[1]) * binc[2] * binc[3];
85
0
    cp += (cinc[1] - dim[1]) * cinc[2] * cinc[3];
86
0
  }
87
0
  return CCV_NNC_EXEC_SUCCESS;
88
0
}
89
90
static int _ccv_nnc_max_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
91
5
{
92
5
  ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
93
5
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
94
5
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
95
5
  ccv_nnc_tensor_view_t* const ha = (ccv_nnc_tensor_view_t*)outputs[0];
96
5
  ccv_nnc_tensor_view_t* const hb = (ccv_nnc_tensor_view_t*)outputs[1];
97
5
  // Assuming this is float 32.
98
5
  int dim[CCV_NNC_MAX_DIM_ALLOC];
99
5
  int ginc[CCV_NNC_MAX_DIM_ALLOC];
100
5
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
101
5
  int binc[CCV_NNC_MAX_DIM_ALLOC];
102
5
  int hainc[CCV_NNC_MAX_DIM_ALLOC];
103
5
  int hbinc[CCV_NNC_MAX_DIM_ALLOC];
104
5
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
105
5
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
106
5
  assert(ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2);
107
5
  assert(ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2);
108
5
  ccv_nnc_tensor_view_get_dim(a, dim);
109
5
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
110
5
  assert(ccv_nnc_tensor_view_check_dim(ha, dim));
111
5
  assert(ccv_nnc_tensor_view_check_dim(hb, dim));
112
5
  if (g)
113
3
  {
114
3
    assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2);
115
3
    assert(ccv_nnc_tensor_view_check_dim(g, dim));
116
3
    int x;
117
3
    if (!CCV_IS_TENSOR_VIEW(g) && !CCV_IS_TENSOR_VIEW(a) && 
!2
CCV_IS_TENSOR_VIEW2
(b) &&
!2
CCV_IS_TENSOR_VIEW2
(ha) &&
!2
CCV_IS_TENSOR_VIEW2
(hb))
118
3
    {
119
2
      // Super optimal case, just do one for-loop for sum.
120
2
      const int tensor_count = ccv_nnc_tensor_count(a->info);
121
1.01k
      for (x = 0; x < tensor_count; 
x++1.01k
)
122
1.01k
        if (a->data.f32[x] > b->data.f32[x]) {
123
503
          ha->data.f32[x] = g->data.f32[x];
124
503
          hb->data.f32[x] = 0;
125
509
        } else if (a->data.f32[x] < b->data.f32[x]) {
126
506
          ha->data.f32[x] = 0;
127
506
          hb->data.f32[x] = g->data.f32[x];
128
506
        } else
129
3
          ha->data.f32[x] = hb->data.f32[x] = g->data.f32[x];
130
2
      return CCV_NNC_EXEC_SUCCESS;
131
2
    }
132
1
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
133
1
    ccv_nnc_tensor_view_get_inc(g, ginc);
134
1
    ccv_nnc_tensor_view_get_inc(a, ainc);
135
1
    ccv_nnc_tensor_view_get_inc(b, binc);
136
1
    ccv_nnc_tensor_view_get_inc(ha, hainc);
137
1
    ccv_nnc_tensor_view_get_inc(hb, hbinc);
138
1
    int i[CCV_NNC_MAX_DIM + 2];
139
1
    float* gp = g->data.f32;
140
1
    float* ap = a->data.f32;
141
1
    float* bp = b->data.f32;
142
1
    float* hap = ha->data.f32;
143
1
    float* hbp = hb->data.f32;
144
1
    const int count = dim[2] * dim[3];
145
1
    if (ainc[3] == dim[3] && binc[3] == dim[3] && hainc[3] == dim[3] && hbinc[3] == dim[3])
146
1
    {
147
1
      // Special casing if the ainc[3] is the same as dim[3]
148
2
      for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
149
1
      {
150
3
        for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
151
2
        {
152
14
          for (x = 0; x < count; 
x++12
)
153
12
            if (ap[x] > bp[x]) {
154
0
              hap[x] = gp[x];
155
0
              hbp[x] = 0;
156
12
            } else if (ap[x] < bp[x]) {
157
9
              hap[x] = 0;
158
9
              hbp[x] = gp[x];
159
9
            } else
160
3
              hap[x] = hbp[x] = gp[x];
161
2
          gp += ginc[2] * ginc[3];
162
2
          ap += ainc[2] * ainc[3];
163
2
          bp += binc[2] * binc[3];
164
2
          hap += hainc[2] * hainc[3];
165
2
          hbp += hbinc[2] * hbinc[3];
166
2
        }
167
1
        gp += (ginc[1] - dim[1]) * ginc[2] * ginc[3];
168
1
        ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
169
1
        bp += (binc[1] - dim[1]) * binc[2] * binc[3];
170
1
        hap += (hainc[1] - dim[1]) * hainc[2] * hainc[3];
171
1
        hbp += (hbinc[1] - dim[1]) * hbinc[2] * hbinc[3];
172
1
      }
173
1
      return CCV_NNC_EXEC_SUCCESS;
174
1
    }
175
0
    // Non-optimal case, need to do skip copy.
176
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
177
0
    {
178
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
179
0
      {
180
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
181
0
        {
182
0
          for (x = 0; x < dim[3]; x++)
183
0
            if (ap[x] > bp[x]) {
184
0
              hap[x] = gp[x];
185
0
              hbp[x] = 0;
186
0
            } else if (ap[x] < bp[x]) {
187
0
              hap[x] = 0;
188
0
              hbp[x] = gp[x];
189
0
            } else
190
0
              hap[x] = hbp[x] = gp[x];
191
0
          gp += ginc[3];
192
0
          ap += ainc[3];
193
0
          bp += binc[3];
194
0
          hap += hainc[3];
195
0
          hbp += hbinc[3];
196
0
        }
197
0
        gp += (ginc[2] - dim[2]) * ginc[3];
198
0
        ap += (ainc[2] - dim[2]) * ainc[3];
199
0
        bp += (binc[2] - dim[2]) * binc[3];
200
0
        hap += (hainc[2] - dim[2]) * hainc[3];
201
0
        hbp += (hbinc[2] - dim[2]) * hbinc[3];
202
0
      }
203
0
      gp += (ginc[1] - dim[1]) * ginc[2] * ginc[3];
204
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
205
0
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
206
0
      hap += (hainc[1] - dim[1]) * hainc[2] * hainc[3];
207
0
      hbp += (hbinc[1] - dim[1]) * hbinc[2] * hbinc[3];
208
0
    }
209
2
  } else {
210
2
    int x;
211
2
    if (!CCV_IS_TENSOR_VIEW(a) && 
!1
CCV_IS_TENSOR_VIEW1
(b) &&
!1
CCV_IS_TENSOR_VIEW1
(ha) &&
!1
CCV_IS_TENSOR_VIEW1
(hb))
212
2
    {
213
1
      // Super optimal case, just do one for-loop for sum.
214
1
      const int tensor_count = ccv_nnc_tensor_count(a->info);
215
13
      for (x = 0; x < tensor_count; 
x++12
)
216
12
        if (a->data.f32[x] > b->data.f32[x]) {
217
0
          ha->data.f32[x] = 1;
218
0
          hb->data.f32[x] = 0;
219
12
        } else if (a->data.f32[x] < b->data.f32[x]) {
220
9
          ha->data.f32[x] = 0;
221
9
          hb->data.f32[x] = 1;
222
9
        } else
223
3
          ha->data.f32[x] = hb->data.f32[x] = 1;
224
1
      return CCV_NNC_EXEC_SUCCESS;
225
1
    }
226
1
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
227
1
    ccv_nnc_tensor_view_get_inc(a, ainc);
228
1
    ccv_nnc_tensor_view_get_inc(b, binc);
229
1
    ccv_nnc_tensor_view_get_inc(ha, hainc);
230
1
    ccv_nnc_tensor_view_get_inc(hb, hbinc);
231
1
    int i[CCV_NNC_MAX_DIM + 2];
232
1
    float* ap = a->data.f32;
233
1
    float* bp = b->data.f32;
234
1
    float* hap = ha->data.f32;
235
1
    float* hbp = hb->data.f32;
236
1
    const int count = dim[2] * dim[3];
237
1
    if (ainc[3] == dim[3] && binc[3] == dim[3] && hainc[3] == dim[3] && hbinc[3] == dim[3])
238
1
    {
239
1
      // Special casing if the ainc[3] is the same as dim[3]
240
2
      for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
241
1
      {
242
3
        for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
243
2
        {
244
14
          for (x = 0; x < count; 
x++12
)
245
12
            if (ap[x] > bp[x]) {
246
0
              hap[x] = 1;
247
0
              hbp[x] = 0;
248
12
            } else if (ap[x] < bp[x]) {
249
9
              hap[x] = 0;
250
9
              hbp[x] = 1;
251
9
            } else
252
3
              hap[x] = hbp[x] = 1;
253
2
          ap += ainc[2] * ainc[3];
254
2
          bp += binc[2] * binc[3];
255
2
          hap += hainc[2] * hainc[3];
256
2
          hbp += hbinc[2] * hbinc[3];
257
2
        }
258
1
        ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
259
1
        bp += (binc[1] - dim[1]) * binc[2] * binc[3];
260
1
        hap += (hainc[1] - dim[1]) * hainc[2] * hainc[3];
261
1
        hbp += (hbinc[1] - dim[1]) * hbinc[2] * hbinc[3];
262
1
      }
263
1
      return CCV_NNC_EXEC_SUCCESS;
264
1
    }
265
0
    // Non-optimal case, need to do skip copy.
266
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
267
0
    {
268
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
269
0
      {
270
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
271
0
        {
272
0
          for (x = 0; x < dim[3]; x++)
273
0
            if (ap[x] > bp[x]) {
274
0
              hap[x] = 1;
275
0
              hbp[x] = 0;
276
0
            } else if (ap[x] < bp[x]) {
277
0
              hap[x] = 0;
278
0
              hbp[x] = 1;
279
0
            } else
280
0
              hap[x] = hbp[x] = 1;
281
0
          ap += ainc[3];
282
0
          bp += binc[3];
283
0
          hap += hainc[3];
284
0
          hbp += hbinc[3];
285
0
        }
286
0
        ap += (ainc[2] - dim[2]) * ainc[3];
287
0
        bp += (binc[2] - dim[2]) * binc[3];
288
0
        hap += (hainc[2] - dim[2]) * hainc[3];
289
0
        hbp += (hbinc[2] - dim[2]) * hbinc[3];
290
0
      }
291
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
292
0
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
293
0
      hap += (hainc[1] - dim[1]) * hainc[2] * hainc[3];
294
0
      hbp += (hbinc[1] - dim[1]) * hbinc[2] * hbinc[3];
295
0
    }
296
0
  }
297
5
  
return CCV_NNC_EXEC_SUCCESS0
;
298
5
}
299
300
REGISTER_COMMAND_BACKEND(CCV_NNC_MAX_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
301
1
{
302
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
303
1
  registry->tensor_datatypes = CCV_32F;
304
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
305
1
  registry->algorithms = 1;
306
1
  registry->exec = _ccv_nnc_max_forw;
307
1
}
308
309
REGISTER_COMMAND_BACKEND(CCV_NNC_MAX_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
310
1
{
311
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
312
1
  registry->tensor_datatypes = CCV_32F;
313
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
314
1
  registry->algorithms = 1;
315
1
  registry->exec = _ccv_nnc_max_back;
316
1
}