Coverage Report

Created: 2021-09-21 22:26

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/compare/ccv_nnc_min_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_min_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
4
{
15
4
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
16
4
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[1];
17
4
  ccv_nnc_tensor_view_t* const c = (ccv_nnc_tensor_view_t*)outputs[0];
18
4
  // Assuming this is float 32.
19
4
  int dim[CCV_NNC_MAX_DIM_ALLOC];
20
4
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
21
4
  int binc[CCV_NNC_MAX_DIM_ALLOC];
22
4
  int cinc[CCV_NNC_MAX_DIM_ALLOC];
23
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
24
4
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
25
4
  assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2);
26
4
  ccv_nnc_tensor_view_get_dim(a, dim);
27
4
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
28
4
  assert(ccv_nnc_tensor_view_check_dim(c, dim));
29
4
  int x;
30
4
  if (!CCV_IS_TENSOR_VIEW(a) && 
!3
CCV_IS_TENSOR_VIEW3
(b) &&
!3
CCV_IS_TENSOR_VIEW3
(c))
31
4
  {
32
3
    // Super optimal case, just do one for-loop for sum.
33
3
    const int tensor_count = ccv_nnc_tensor_count(a->info);
34
1.02k
    for (x = 0; x < tensor_count; 
x++1.02k
)
35
1.02k
      c->data.f32[x] = ccv_min(a->data.f32[x], b->data.f32[x]);
36
3
    return CCV_NNC_EXEC_SUCCESS;
37
3
  }
38
1
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
39
1
  ccv_nnc_tensor_view_get_inc(a, ainc);
40
1
  ccv_nnc_tensor_view_get_inc(b, binc);
41
1
  ccv_nnc_tensor_view_get_inc(c, cinc);
42
1
  int i[CCV_NNC_MAX_DIM + 2];
43
1
  float* ap = a->data.f32;
44
1
  float* bp = b->data.f32;
45
1
  float* cp = c->data.f32;
46
1
  const int count = dim[2] * dim[3];
47
1
  if (ainc[3] == dim[3] && binc[3] == dim[3] && cinc[3] == dim[3])
48
1
  {
49
1
    // Special casing if the ainc[3] is the same as dim[3]
50
2
    for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
51
1
    {
52
3
      for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
53
2
      {
54
14
        for (x = 0; x < count; 
x++12
)
55
12
          cp[x] = ccv_min(ap[x], bp[x]);
56
2
        ap += ainc[2] * ainc[3];
57
2
        bp += binc[2] * binc[3];
58
2
        cp += cinc[2] * cinc[3];
59
2
      }
60
1
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
61
1
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
62
1
      cp += (cinc[1] - dim[1]) * cinc[2] * cinc[3];
63
1
    }
64
1
    return CCV_NNC_EXEC_SUCCESS;
65
1
  }
66
0
  // Non-optimal case, need to do skip copy.
67
0
  for (i[0] = 0; i[0] < dim[0]; i[0]++)
68
0
  {
69
0
    for (i[1] = 0; i[1] < dim[1]; i[1]++)
70
0
    {
71
0
      for (i[2] = 0; i[2] < dim[2]; i[2]++)
72
0
      {
73
0
        for (x = 0; x < dim[3]; x++)
74
0
          cp[x] = ccv_min(ap[x], bp[x]);
75
0
        ap += ainc[3];
76
0
        bp += binc[3];
77
0
        cp += cinc[3];
78
0
      }
79
0
      ap += (ainc[2] - dim[2]) * ainc[3];
80
0
      bp += (binc[2] - dim[2]) * binc[3];
81
0
      cp += (cinc[2] - dim[2]) * cinc[3];
82
0
    }
83
0
    ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
84
0
    bp += (binc[1] - dim[1]) * binc[2] * binc[3];
85
0
    cp += (cinc[1] - dim[1]) * cinc[2] * cinc[3];
86
0
  }
87
0
  return CCV_NNC_EXEC_SUCCESS;
88
0
}
89
90
static int _ccv_nnc_min_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
91
5
{
92
5
  ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
93
5
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
94
5
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
95
5
  ccv_nnc_tensor_view_t* const ha = (ccv_nnc_tensor_view_t*)outputs[0];
96
5
  ccv_nnc_tensor_view_t* const hb = (ccv_nnc_tensor_view_t*)outputs[1];
97
5
  // Assuming this is float 32.
98
5
  int dim[CCV_NNC_MAX_DIM_ALLOC];
99
5
  int ginc[CCV_NNC_MAX_DIM_ALLOC];
100
5
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
101
5
  int binc[CCV_NNC_MAX_DIM_ALLOC];
102
5
  int hainc[CCV_NNC_MAX_DIM_ALLOC];
103
5
  int hbinc[CCV_NNC_MAX_DIM_ALLOC];
104
5
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
105
5
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
106
5
  assert(ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2);
107
5
  assert(ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2);
108
5
  ccv_nnc_tensor_view_get_dim(a, dim);
109
5
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
110
5
  assert(ccv_nnc_tensor_view_check_dim(ha, dim));
111
5
  assert(ccv_nnc_tensor_view_check_dim(hb, dim));
112
5
  if (g)
113
3
  {
114
3
    assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2);
115
3
    assert(ccv_nnc_tensor_view_check_dim(g, dim));
116
3
    int x;
117
3
    if (!CCV_IS_TENSOR_VIEW(g) && !CCV_IS_TENSOR_VIEW(a) && 
!2
CCV_IS_TENSOR_VIEW2
(b) &&
!2
CCV_IS_TENSOR_VIEW2
(ha) &&
!2
CCV_IS_TENSOR_VIEW2
(hb))
118
3
    {
119
2
      // Super optimal case, just do one for-loop for sum.
120
2
      const int tensor_count = ccv_nnc_tensor_count(a->info);
121
1.01k
      for (x = 0; x < tensor_count; 
x++1.01k
)
122
1.01k
        if (a->data.f32[x] < b->data.f32[x])
123
497
        {
124
497
          ha->data.f32[x] = g->data.f32[x];
125
497
          hb->data.f32[x] = 0;
126
515
        } else if (a->data.f32[x] > b->data.f32[x]) {
127
512
          ha->data.f32[x] = 0;
128
512
          hb->data.f32[x] = g->data.f32[x];
129
512
        } else
130
3
          ha->data.f32[x] = hb->data.f32[x] = g->data.f32[x];
131
2
      return CCV_NNC_EXEC_SUCCESS;
132
2
    }
133
1
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
134
1
    ccv_nnc_tensor_view_get_inc(g, ginc);
135
1
    ccv_nnc_tensor_view_get_inc(a, ainc);
136
1
    ccv_nnc_tensor_view_get_inc(b, binc);
137
1
    ccv_nnc_tensor_view_get_inc(ha, hainc);
138
1
    ccv_nnc_tensor_view_get_inc(hb, hbinc);
139
1
    int i[CCV_NNC_MAX_DIM + 2];
140
1
    float* gp = g->data.f32;
141
1
    float* ap = a->data.f32;
142
1
    float* bp = b->data.f32;
143
1
    float* hap = ha->data.f32;
144
1
    float* hbp = hb->data.f32;
145
1
    const int count = dim[2] * dim[3];
146
1
    if (ainc[3] == dim[3] && binc[3] == dim[3] && hainc[3] == dim[3] && hbinc[3] == dim[3])
147
1
    {
148
1
      // Special casing if the ainc[3] is the same as dim[3]
149
2
      for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
150
1
      {
151
3
        for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
152
2
        {
153
14
          for (x = 0; x < count; 
x++12
)
154
12
            if (ap[x] < bp[x]) {
155
0
              hap[x] = gp[x];
156
0
              hbp[x] = 0;
157
12
            } else if (ap[x] > bp[x]) {
158
9
              hap[x] = 0;
159
9
              hbp[x] = gp[x];
160
9
            } else
161
3
              hap[x] = hbp[x] = gp[x];
162
2
          gp += ginc[2] * ginc[3];
163
2
          ap += ainc[2] * ainc[3];
164
2
          bp += binc[2] * binc[3];
165
2
          hap += hainc[2] * hainc[3];
166
2
          hbp += hbinc[2] * hbinc[3];
167
2
        }
168
1
        gp += (ginc[1] - dim[1]) * ginc[2] * ginc[3];
169
1
        ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
170
1
        bp += (binc[1] - dim[1]) * binc[2] * binc[3];
171
1
        hap += (hainc[1] - dim[1]) * hainc[2] * hainc[3];
172
1
        hbp += (hbinc[1] - dim[1]) * hbinc[2] * hbinc[3];
173
1
      }
174
1
      return CCV_NNC_EXEC_SUCCESS;
175
1
    }
176
0
    // Non-optimal case, need to do skip copy.
177
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
178
0
    {
179
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
180
0
      {
181
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
182
0
        {
183
0
          for (x = 0; x < dim[3]; x++)
184
0
            if (ap[x] < bp[x]) {
185
0
              hap[x] = gp[x];
186
0
              hbp[x] = 0;
187
0
            } else if (ap[x] > bp[x]) {
188
0
              hap[x] = 0;
189
0
              hbp[x] = gp[x];
190
0
            } else
191
0
              hap[x] = hbp[x] = gp[x];
192
0
          gp += ginc[3];
193
0
          ap += ainc[3];
194
0
          bp += binc[3];
195
0
          hap += hainc[3];
196
0
          hbp += hbinc[3];
197
0
        }
198
0
        gp += (ginc[2] - dim[2]) * ginc[3];
199
0
        ap += (ainc[2] - dim[2]) * ainc[3];
200
0
        bp += (binc[2] - dim[2]) * binc[3];
201
0
        hap += (hainc[2] - dim[2]) * hainc[3];
202
0
        hbp += (hbinc[2] - dim[2]) * hbinc[3];
203
0
      }
204
0
      gp += (ginc[1] - dim[1]) * ginc[2] * ginc[3];
205
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
206
0
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
207
0
      hap += (hainc[1] - dim[1]) * hainc[2] * hainc[3];
208
0
      hbp += (hbinc[1] - dim[1]) * hbinc[2] * hbinc[3];
209
0
    }
210
2
  } else {
211
2
    int x;
212
2
    if (!CCV_IS_TENSOR_VIEW(a) && 
!1
CCV_IS_TENSOR_VIEW1
(b) &&
!1
CCV_IS_TENSOR_VIEW1
(ha) &&
!1
CCV_IS_TENSOR_VIEW1
(hb))
213
2
    {
214
1
      // Super optimal case, just do one for-loop for sum.
215
1
      const int tensor_count = ccv_nnc_tensor_count(a->info);
216
13
      for (x = 0; x < tensor_count; 
x++12
)
217
12
        if (a->data.f32[x] < b->data.f32[x]) {
218
0
          ha->data.f32[x] = 1;
219
0
          hb->data.f32[x] = 0;
220
12
        } else if (a->data.f32[x] > b->data.f32[x]) {
221
9
          ha->data.f32[x] = 0;
222
9
          hb->data.f32[x] = 1;
223
9
        } else
224
3
          ha->data.f32[x] = hb->data.f32[x] = 1;
225
1
      return CCV_NNC_EXEC_SUCCESS;
226
1
    }
227
1
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
228
1
    ccv_nnc_tensor_view_get_inc(a, ainc);
229
1
    ccv_nnc_tensor_view_get_inc(b, binc);
230
1
    ccv_nnc_tensor_view_get_inc(ha, hainc);
231
1
    ccv_nnc_tensor_view_get_inc(hb, hbinc);
232
1
    int i[CCV_NNC_MAX_DIM + 2];
233
1
    float* ap = a->data.f32;
234
1
    float* bp = b->data.f32;
235
1
    float* hap = ha->data.f32;
236
1
    float* hbp = hb->data.f32;
237
1
    const int count = dim[2] * dim[3];
238
1
    if (ainc[3] == dim[3] && binc[3] == dim[3] && hainc[3] == dim[3] && hbinc[3] == dim[3])
239
1
    {
240
1
      // Special casing if the ainc[3] is the same as dim[3]
241
2
      for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
242
1
      {
243
3
        for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
244
2
        {
245
14
          for (x = 0; x < count; 
x++12
)
246
12
            if (ap[x] < bp[x]) {
247
0
              hap[x] = 1;
248
0
              hbp[x] = 0;
249
12
            } else if (ap[x] > bp[x]) {
250
9
              hap[x] = 0;
251
9
              hbp[x] = 1;
252
9
            } else
253
3
              hap[x] = hbp[x] = 1;
254
2
          ap += ainc[2] * ainc[3];
255
2
          bp += binc[2] * binc[3];
256
2
          hap += hainc[2] * hainc[3];
257
2
          hbp += hbinc[2] * hbinc[3];
258
2
        }
259
1
        ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
260
1
        bp += (binc[1] - dim[1]) * binc[2] * binc[3];
261
1
        hap += (hainc[1] - dim[1]) * hainc[2] * hainc[3];
262
1
        hbp += (hbinc[1] - dim[1]) * hbinc[2] * hbinc[3];
263
1
      }
264
1
      return CCV_NNC_EXEC_SUCCESS;
265
1
    }
266
0
    // Non-optimal case, need to do skip copy.
267
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
268
0
    {
269
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
270
0
      {
271
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
272
0
        {
273
0
          for (x = 0; x < dim[3]; x++)
274
0
            if (ap[x] < bp[x]) {
275
0
              hap[x] = 1;
276
0
              hbp[x] = 0;
277
0
            } else if (ap[x] > bp[x]) {
278
0
              hap[x] = 0;
279
0
              hbp[x] = 1;
280
0
            } else
281
0
              hap[x] = hbp[x] = 1;
282
0
          ap += ainc[3];
283
0
          bp += binc[3];
284
0
          hap += hainc[3];
285
0
          hbp += hbinc[3];
286
0
        }
287
0
        ap += (ainc[2] - dim[2]) * ainc[3];
288
0
        bp += (binc[2] - dim[2]) * binc[3];
289
0
        hap += (hainc[2] - dim[2]) * hainc[3];
290
0
        hbp += (hbinc[2] - dim[2]) * hbinc[3];
291
0
      }
292
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
293
0
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
294
0
      hap += (hainc[1] - dim[1]) * hainc[2] * hainc[3];
295
0
      hbp += (hbinc[1] - dim[1]) * hbinc[2] * hbinc[3];
296
0
    }
297
0
  }
298
5
  
return CCV_NNC_EXEC_SUCCESS0
;
299
5
}
300
301
REGISTER_COMMAND_BACKEND(CCV_NNC_MIN_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
302
1
{
303
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
304
1
  registry->tensor_datatypes = CCV_32F;
305
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
306
1
  registry->algorithms = 1;
307
1
  registry->exec = _ccv_nnc_min_forw;
308
1
}
309
310
REGISTER_COMMAND_BACKEND(CCV_NNC_MIN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
311
1
{
312
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
313
1
  registry->tensor_datatypes = CCV_32F;
314
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
315
1
  registry->algorithms = 1;
316
1
  registry->exec = _ccv_nnc_min_back;
317
1
}