Coverage Report

Created: 2021-04-07 03:47

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/nms/ccv_nnc_nms_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
typedef struct {
14
  float v[5];
15
} float5;
16
5.08k
#define less_than(a, b, aux) ((a).v[0] > (b).v[0])
17
1.54k
#define swap_func(a, b, array, aux, t) do { \
18
1.53k
  (t) = (a); \
19
1.53k
  (a) = (b); \
20
1.53k
  (b) = (t); \
21
1.53k
  int _t = aux[&(a) - array]; \
22
1.53k
  aux[&(a) - array] = aux[&(b) - array]; \
23
1.53k
  aux[&(b) - array] = _t; \
24
1.53k
} while (0)
25
5.08k
CCV_IMPLEMENT_QSORT_EX(_ccv_nnc_nms_sortby_f5_32f, float5, less_than, 
swap_func1.53k
, int*)
26
#undef less_than
27
#undef swap_func
28
29
static int _ccv_nnc_nms_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
30
5
{
31
5
  assert(input_size == 1);
32
5
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
33
5
  assert(output_size == 2);
34
5
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
35
5
  ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[1];
36
5
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
37
5
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
38
5
  const int c_nd = ccv_nnc_tensor_nd(c->info.dim);
39
5
  assert(a_nd == b_nd);
40
5
  int i;
41
15
  for (i = 0; i < a_nd; 
i++10
)
42
10
    { assert(a->info.dim[i] == b->info.dim[i]); }
43
5
  const int* ainc = CCV_IS_TENSOR_VIEW(a) ? 
a->inc1
:
a->info.dim4
;
44
5
  const int* binc = CCV_IS_TENSOR_VIEW(b) ? 
b->inc1
:
b->info.dim4
;
45
5
  const int* cinc = CCV_IS_TENSOR_VIEW(c) ? 
c->inc0
: c->info.dim;
46
5
  const int n = a_nd >= 3 ? 
a->info.dim[0]0
: 1;
47
5
  const int aninc = a_nd >= 3 ? 
ainc[1] * ainc[2]0
: 0;
48
5
  const int bninc = b_nd >= 3 ? 
binc[1] * binc[2]0
: 0;
49
5
  const int cninc = c_nd >= 2 ? 
cinc[1]0
: 0;
50
5
  const int m = a_nd >= 3 ? 
a->info.dim[1]0
: a->info.dim[0];
51
5
  if (c_nd == 1)
52
5
    { assert(m == c->info.dim[0]); }
53
0
  else
54
0
    { assert(c_nd == 2 && n == c->info.dim[0] && m == c->info.dim[1]); }
55
5
  const int aminc = ainc[a_nd - 1];
56
5
  const int bminc = binc[b_nd - 1];
57
5
  const int d = a_nd <= 1 ? 
10
: a->info.dim[a_nd - 1];
58
5
  const float iou_threshold = cmd.info.nms.iou_threshold;
59
5
  if (d == 5 && aminc == 5 && 
aminc == bminc4
) // If it is 5, we can use our quick sort implementation.
60
4
  {
61
4
    parallel_for(i, n)
62
0
    {
63
0
      int x, y;
64
0
      const float* const ap = a->data.f32 + i * aninc;
65
0
      float* const bp = b->data.f32 + i * bninc;
66
0
      int* const cp = c->data.i32 + i * cninc;
67
1.03k
      for (x = 0; x < m; x++)
68
1.03k
        cp[x] = x;
69
5.15k
      for (x = 0; x < m * d; x++)
70
5.15k
        bp[x] = ap[x];
71
0
      _ccv_nnc_nms_sortby_f5_32f((float5*)bp, m, cp);
72
1.03k
      for (x = 0; x < m; x++)
73
1.03k
      {
74
1.03k
        float v = bp[x * 5];
75
1.03k
        if (v == -FLT_MAX) // Suppressed.
76
1.03k
          
continue510
;
77
520
        const float area1 = bp[x * 5 + 3] * bp[x * 5 + 4];
78
250k
        for (y = x + 1; y < m; 
y++250k
)
79
250k
        {
80
250k
          const float u = bp[y * 5];
81
250k
          if (u == -FLT_MAX) // Suppressed.
82
250k
            
continue0
;
83
250k
          const float area2 = bp[y * 5 + 3] * bp[y * 5 + 4];
84
250k
          const float xdiff = ccv_max(0, ccv_min(bp[x * 5 + 1] + bp[x * 5 + 3], bp[y * 5 + 1] + bp[y * 5 + 3]) - ccv_max(bp[x * 5 + 1], bp[y * 5 + 1]));
85
250k
          const float ydiff = ccv_max(0, ccv_min(bp[x * 5 + 2] + bp[x * 5 + 4], bp[y * 5 + 2] + bp[y * 5 + 4]) - ccv_max(bp[x * 5 + 2], bp[y * 5 + 2]));
86
250k
          const float intersection = xdiff * ydiff;
87
250k
          const float iou = intersection / (area1 + area2 - intersection);
88
250k
          if (iou >= iou_threshold)
89
510
            bp[y * 5] = -FLT_MAX;
90
250k
        }
91
520
      }
92
0
      // Move these values up and move suppressed to the end.
93
1.03k
      for (x = 0, y = 0; x < m; x++)
94
1.03k
        if (bp[x * 5] != -FLT_MAX)
95
1.03k
        {
96
520
          int j;
97
520
          if (x != y)
98
507
          {
99
3.04k
            for (j = 0; j < 5; 
j++2.53k
)
100
2.53k
              bp[y * 5 + j] = bp[x * 5 + j];
101
507
            cp[y] = cp[x];
102
507
          }
103
520
          ++y;
104
520
        }
105
510
      for (x = y; x < m; x++)
106
510
        cp[x] = -1, bp[x * 5] = -FLT_MAX;
107
4
    } parallel_endfor
108
4
  } else {
109
1
    // Otherwise, fall to use selection sort.
110
1
    parallel_for(i, n)
111
0
    {
112
0
      int x, y;
113
0
      const float* const ap = a->data.f32 + i * aninc;
114
0
      float* const bp = b->data.f32 + i * bninc;
115
0
      int* const cp = c->data.i32 + i * cninc;
116
10
      for (x = 0; x < m; x++)
117
10
        cp[x] = x;
118
10
      for (x = 0; x < m; x++)
119
60
        
for (y = 0; 10
y < d;
y++50
)
120
50
          bp[x * bminc + y] = ap[x * aminc + y];
121
10
      for (x = 0; x < m; x++)
122
10
      {
123
10
        float v = bp[x * bminc];
124
10
        int k = x;
125
55
        for (y = x + 1; y < m; 
y++45
)
126
45
        {
127
45
          const float u = bp[y * bminc];
128
45
          if (u > v)
129
25
            k = y, v = u;
130
45
        }
131
60
        for (y = 0; y < d; 
y++50
)
132
50
        {
133
50
          const float t = bp[k * bminc + y];
134
50
          bp[k * bminc + y] = bp[x * bminc + y];
135
50
          bp[x * bminc + y] = t;
136
50
          const int u = cp[k];
137
50
          cp[k] = cp[x];
138
50
          cp[x] = u;
139
50
        }
140
10
      }
141
10
      for (x = 0; x < m; x++)
142
10
      {
143
10
        float v = bp[x * bminc];
144
10
        if (v == -FLT_MAX) // Suppressed.
145
10
          
continue0
;
146
10
        const float area1 = bp[x * bminc + 3] * bp[x * bminc + 4];
147
55
        for (y = x + 1; y < m; 
y++45
)
148
45
        {
149
45
          const float u = bp[y * bminc];
150
45
          if (u == -FLT_MAX) // Suppressed.
151
45
            
continue0
;
152
45
          const float area2 = bp[y * bminc + 3] * bp[y * bminc + 4];
153
45
          const float xdiff = ccv_max(0, ccv_min(bp[x * bminc + 1] + bp[x * bminc + 3], bp[y * bminc + 1] + bp[y * bminc + 3]) - ccv_max(bp[x * bminc + 1], bp[y * bminc + 1]));
154
45
          const float ydiff = ccv_max(0, ccv_min(bp[x * bminc + 2] + bp[x * bminc + 4], bp[y * bminc + 2] + bp[y * bminc + 4]) - ccv_max(bp[x * bminc + 2], bp[y * bminc + 2]));
155
45
          const float intersection = xdiff * ydiff;
156
45
          const float iou = intersection / (area1 + area2 - intersection);
157
45
          if (iou >= iou_threshold)
158
0
            bp[y * bminc] = -FLT_MAX;
159
45
        }
160
10
      }
161
10
      for (x = 0, y = 0; x < m; x++)
162
10
        if (bp[x * bminc] != -FLT_MAX)
163
10
        {
164
10
          int j;
165
10
          if (x != y)
166
0
          {
167
0
            for (j = 0; j < 5; j++)
168
0
              bp[y * bminc + j] = bp[x * bminc + j];
169
0
            cp[y] = cp[x];
170
0
          }
171
10
          ++y;
172
10
        }
173
0
      for (x = y; x < m; x++)
174
0
        cp[x] = -1, bp[x * bminc] = -FLT_MAX;
175
1
    } parallel_endfor
176
1
  }
177
5
  return CCV_NNC_EXEC_SUCCESS;
178
5
}
179
180
static int _ccv_nnc_nms_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
181
1
{
182
1
  assert(input_size >= 5);
183
1
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
184
1
  const ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)inputs[4];
185
1
  assert(output_size == 1);
186
1
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
187
1
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
188
1
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
189
1
  const int c_nd = ccv_nnc_tensor_nd(c->info.dim);
190
1
  assert(a_nd == b_nd);
191
1
  int i;
192
3
  for (i = 0; i < a_nd; 
i++2
)
193
2
    { assert(a->info.dim[i] == b->info.dim[i]); }
194
1
  const int* ainc = CCV_IS_TENSOR_VIEW(a) ? 
a->inc0
: a->info.dim;
195
1
  const int* binc = CCV_IS_TENSOR_VIEW(b) ? 
b->inc0
: b->info.dim;
196
1
  const int* cinc = CCV_IS_TENSOR_VIEW(c) ? 
c->inc0
: c->info.dim;
197
1
  const int n = a_nd >= 3 ? 
a->info.dim[0]0
: 1;
198
1
  const int aninc = a_nd >= 3 ? 
ainc[1] * ainc[2]0
: 0;
199
1
  const int bninc = b_nd >= 3 ? 
binc[1] * binc[2]0
: 0;
200
1
  const int cninc = c_nd >= 2 ? 
cinc[1]0
: 0;
201
1
  const int m = a_nd >= 3 ? 
a->info.dim[1]0
: a->info.dim[0];
202
1
  if (c_nd == 1)
203
1
    { assert(m == c->info.dim[0]); }
204
0
  else
205
0
    { assert(c_nd == 2 && n == c->info.dim[0] && m == c->info.dim[1]); }
206
1
  const int aminc = ainc[a_nd - 1];
207
1
  const int bminc = binc[b_nd - 1];
208
1
  const int d = a_nd <= 1 ? 
10
: a->info.dim[a_nd - 1];
209
1
  parallel_for(i, n)
210
0
  {
211
0
    int x, y;
212
0
    const float* const ap = a->data.f32 + i * aninc;
213
0
    const int* const cp = c->data.i32 + i * cninc;
214
0
    float* const bp = b->data.f32 + i * bninc;
215
10
    for (x = 0; x < m; x++)
216
60
      
for (y = 0; 10
y < d;
y++50
)
217
50
        bp[x * bminc + y] = 0;
218
5
    for (x = 0; x < m; x++)
219
6
    {
220
6
      const int k = cp[x];
221
6
      if (k < 0)
222
1
        break;
223
30
      
for (y = 0; 5
y < d;
y++25
)
224
25
        bp[k * bminc + y] = ap[x * aminc + y];
225
5
    }
226
1
  } parallel_endfor
227
1
  return CCV_NNC_EXEC_SUCCESS;
228
1
}
229
230
REGISTER_COMMAND_BACKEND(CCV_NNC_NMS_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
231
1
{
232
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
233
1
  registry->tensor_datatypes = CCV_32F | CCV_32S;
234
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
235
1
  registry->algorithms = 1;
236
1
  registry->exec = _ccv_nnc_nms_forw;
237
1
}
238
239
REGISTER_COMMAND_BACKEND(CCV_NNC_NMS_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
240
1
{
241
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
242
1
  registry->tensor_datatypes = CCV_32F | CCV_32S;
243
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
244
1
  registry->algorithms = 1;
245
1
  registry->exec = _ccv_nnc_nms_back;
246
1
}