Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/nms/ccv_nnc_nms_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
typedef struct {
14
  float v[5];
15
} float5;
16
5.08k
#define less_than(a, b, aux) ((a).v[0] > (b).v[0])
17
1.54k
#define swap_func(a, b, array, aux, t) do { \
18
1.53k
  (t) = (a); \
19
1.53k
  (a) = (b); \
20
1.53k
  (b) = (t); \
21
1.53k
  int _t = aux[&(a) - array]; \
22
1.53k
  aux[&(a) - array] = aux[&(b) - array]; \
23
1.53k
  aux[&(b) - array] = _t; \
24
1.53k
} while (0)
25
5.08k
CCV_IMPLEMENT_QSORT_EX(_ccv_nnc_nms_sortby_f5_32f, float5, less_than, 
swap_func1.53k
, int*)
26
#undef less_than
27
#undef swap_func
28
29
static int _ccv_nnc_nms_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
30
5
{
31
5
  assert(input_size == 1);
32
5
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
33
5
  assert(output_size == 2);
34
5
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
35
5
  ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[1];
36
5
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
37
5
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
38
5
  const int c_nd = ccv_nnc_tensor_nd(c->info.dim);
39
5
  assert(a_nd == b_nd);
40
5
  int i;
41
15
  for (i = 0; i < a_nd; 
i++10
)
42
10
    { assert(a->info.dim[i] == b->info.dim[i]); }
43
5
  const int n = a_nd >= 3 ? 
a->info.dim[0]0
: 1;
44
5
  const int aninc = CCV_IS_TENSOR_VIEW(a) ? 
(1
a_nd >= 31
?
a->stride[0]0
:
01
) :
(4
a_nd >= 34
?
a->info.dim[1] * a->info.dim[2]0
:
04
);
45
5
  const int bninc = CCV_IS_TENSOR_VIEW(b) ? 
(1
b_nd >= 31
?
b->stride[0]0
:
01
) :
(4
b_nd >= 34
?
b->info.dim[1] * b->info.dim[2]0
:
04
);
46
5
  const int cninc = CCV_IS_TENSOR_VIEW(c) ? 
(0
c_nd >= 20
?
c->stride[0]0
:
00
) : (c_nd >= 2 ?
c->info.dim[1]0
: 0);
47
5
  const int m = a_nd >= 3 ? 
a->info.dim[1]0
: a->info.dim[0];
48
5
  if (c_nd == 1)
49
5
    { assert(m == c->info.dim[0]); }
50
0
  else
51
0
    { assert(c_nd == 2 && n == c->info.dim[0] && m == c->info.dim[1]); }
52
5
  const int aminc = CCV_IS_TENSOR_VIEW(a) ? 
a->stride[a_nd - 2]1
:
a->info.dim[a_nd - 1]4
;
53
5
  const int bminc = CCV_IS_TENSOR_VIEW(b) ? 
b->stride[b_nd - 2]1
:
b->info.dim[b_nd - 1]4
;
54
5
  const int d = a_nd <= 1 ? 
10
: a->info.dim[a_nd - 1];
55
5
  const float iou_threshold = cmd.info.nms.iou_threshold;
56
5
  if (d == 5 && aminc == 5 && 
aminc == bminc4
) // If it is 5, we can use our quick sort implementation.
57
4
  {
58
4
    parallel_for(i, n)
59
4
    {
60
4
      int x, y;
61
4
      const float* const ap = a->data.f32 + i * aninc;
62
4
      float* const bp = b->data.f32 + i * bninc;
63
4
      int* const cp = c->data.i32 + i * cninc;
64
1.03k
      for (x = 0; x < m; 
x++1.03k
)
65
1.03k
        cp[x] = x;
66
5.15k
      for (x = 0; x < m * d; 
x++5.15k
)
67
5.15k
        bp[x] = ap[x];
68
4
      _ccv_nnc_nms_sortby_f5_32f((float5*)bp, m, cp);
69
1.03k
      for (x = 0; x < m; 
x++1.03k
)
70
1.03k
      {
71
1.03k
        float v = bp[x * 5];
72
1.03k
        if (v == -FLT_MAX) // Suppressed.
73
510
          continue;
74
520
        const float area1 = bp[x * 5 + 3] * bp[x * 5 + 4];
75
250k
        for (y = x + 1; y < m; 
y++250k
)
76
250k
        {
77
250k
          const float u = bp[y * 5];
78
250k
          if (u == -FLT_MAX) // Suppressed.
79
0
            continue;
80
250k
          const float area2 = bp[y * 5 + 3] * bp[y * 5 + 4];
81
250k
          const float xdiff = ccv_max(0, ccv_min(bp[x * 5 + 1] + bp[x * 5 + 3], bp[y * 5 + 1] + bp[y * 5 + 3]) - ccv_max(bp[x * 5 + 1], bp[y * 5 + 1]));
82
250k
          const float ydiff = ccv_max(0, ccv_min(bp[x * 5 + 2] + bp[x * 5 + 4], bp[y * 5 + 2] + bp[y * 5 + 4]) - ccv_max(bp[x * 5 + 2], bp[y * 5 + 2]));
83
250k
          const float intersection = xdiff * ydiff;
84
250k
          const float iou = intersection / (area1 + area2 - intersection);
85
250k
          if (iou >= iou_threshold)
86
510
            bp[y * 5] = -FLT_MAX;
87
250k
        }
88
520
      }
89
      // Move these values up and move suppressed to the end.
90
1.03k
      for (x = 0, y = 0; x < m; 
x++1.03k
)
91
1.03k
        if (bp[x * 5] != -FLT_MAX)
92
520
        {
93
520
          int j;
94
520
          if (x != y)
95
507
          {
96
3.04k
            for (j = 0; j < 5; 
j++2.53k
)
97
2.53k
              bp[y * 5 + j] = bp[x * 5 + j];
98
507
            cp[y] = cp[x];
99
507
          }
100
520
          ++y;
101
520
        }
102
514
      for (x = y; x < m; 
x++510
)
103
510
        cp[x] = -1, bp[x * 5] = -FLT_MAX;
104
4
    } parallel_endfor
105
4
  } else {
106
    // Otherwise, fall to use selection sort.
107
1
    parallel_for(i, n)
108
1
    {
109
1
      int x, y;
110
1
      const float* const ap = a->data.f32 + i * aninc;
111
1
      float* const bp = b->data.f32 + i * bninc;
112
1
      int* const cp = c->data.i32 + i * cninc;
113
11
      for (x = 0; x < m; 
x++10
)
114
10
        cp[x] = x;
115
11
      for (x = 0; x < m; 
x++10
)
116
60
        
for (y = 0; 10
y < d;
y++50
)
117
50
          bp[x * bminc + y] = ap[x * aminc + y];
118
11
      for (x = 0; x < m; 
x++10
)
119
10
      {
120
10
        float v = bp[x * bminc];
121
10
        int k = x;
122
55
        for (y = x + 1; y < m; 
y++45
)
123
45
        {
124
45
          const float u = bp[y * bminc];
125
45
          if (u > v)
126
25
            k = y, v = u;
127
45
        }
128
60
        for (y = 0; y < d; 
y++50
)
129
50
        {
130
50
          const float t = bp[k * bminc + y];
131
50
          bp[k * bminc + y] = bp[x * bminc + y];
132
50
          bp[x * bminc + y] = t;
133
50
          const int u = cp[k];
134
50
          cp[k] = cp[x];
135
50
          cp[x] = u;
136
50
        }
137
10
      }
138
11
      for (x = 0; x < m; 
x++10
)
139
10
      {
140
10
        float v = bp[x * bminc];
141
10
        if (v == -FLT_MAX) // Suppressed.
142
0
          continue;
143
10
        const float area1 = bp[x * bminc + 3] * bp[x * bminc + 4];
144
55
        for (y = x + 1; y < m; 
y++45
)
145
45
        {
146
45
          const float u = bp[y * bminc];
147
45
          if (u == -FLT_MAX) // Suppressed.
148
0
            continue;
149
45
          const float area2 = bp[y * bminc + 3] * bp[y * bminc + 4];
150
45
          const float xdiff = ccv_max(0, ccv_min(bp[x * bminc + 1] + bp[x * bminc + 3], bp[y * bminc + 1] + bp[y * bminc + 3]) - ccv_max(bp[x * bminc + 1], bp[y * bminc + 1]));
151
45
          const float ydiff = ccv_max(0, ccv_min(bp[x * bminc + 2] + bp[x * bminc + 4], bp[y * bminc + 2] + bp[y * bminc + 4]) - ccv_max(bp[x * bminc + 2], bp[y * bminc + 2]));
152
45
          const float intersection = xdiff * ydiff;
153
45
          const float iou = intersection / (area1 + area2 - intersection);
154
45
          if (iou >= iou_threshold)
155
0
            bp[y * bminc] = -FLT_MAX;
156
45
        }
157
10
      }
158
11
      for (x = 0, y = 0; x < m; 
x++10
)
159
10
        if (bp[x * bminc] != -FLT_MAX)
160
10
        {
161
10
          int j;
162
10
          if (x != y)
163
0
          {
164
0
            for (j = 0; j < 5; j++)
165
0
              bp[y * bminc + j] = bp[x * bminc + j];
166
0
            cp[y] = cp[x];
167
0
          }
168
10
          ++y;
169
10
        }
170
1
      for (x = y; x < m; 
x++0
)
171
0
        cp[x] = -1, bp[x * bminc] = -FLT_MAX;
172
1
    } parallel_endfor
173
1
  }
174
5
  return CCV_NNC_EXEC_SUCCESS;
175
5
}
176
177
static int _ccv_nnc_nms_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
178
1
{
179
1
  assert(input_size >= 5);
180
1
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
181
1
  const ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)inputs[4];
182
1
  assert(output_size == 1);
183
1
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
184
1
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
185
1
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
186
1
  const int c_nd = ccv_nnc_tensor_nd(c->info.dim);
187
1
  assert(a_nd == b_nd);
188
1
  int i;
189
3
  for (i = 0; i < a_nd; 
i++2
)
190
2
    { assert(a->info.dim[i] == b->info.dim[i]); }
191
1
  const int n = a_nd >= 3 ? 
a->info.dim[0]0
: 1;
192
1
  const int aninc = CCV_IS_TENSOR_VIEW(a) ? 
(0
a_nd >= 30
?
a->stride[0]0
:
00
) : (a_nd >= 3 ?
a->info.dim[1] * a->info.dim[2]0
: 0);
193
1
  const int bninc = CCV_IS_TENSOR_VIEW(b) ? 
(0
b_nd >= 30
?
b->stride[0]0
:
00
) : (b_nd >= 3 ?
b->info.dim[1] * b->info.dim[2]0
: 0);
194
1
  const int cninc = CCV_IS_TENSOR_VIEW(c) ? 
(0
c_nd >= 20
?
c->stride[0]0
:
00
) : (c_nd >= 2 ?
c->info.dim[1]0
: 0);
195
1
  const int m = a_nd >= 3 ? 
a->info.dim[1]0
: a->info.dim[0];
196
1
  if (c_nd == 1)
197
1
    { assert(m == c->info.dim[0]); }
198
0
  else
199
0
    { assert(c_nd == 2 && n == c->info.dim[0] && m == c->info.dim[1]); }
200
1
  const int aminc = CCV_IS_TENSOR_VIEW(a) ? 
a->stride[a_nd - 2]0
: a->info.dim[a_nd - 1];
201
1
  const int bminc = CCV_IS_TENSOR_VIEW(b) ? 
b->stride[b_nd - 2]0
: b->info.dim[b_nd - 1];
202
1
  const int d = a_nd <= 1 ? 
10
: a->info.dim[a_nd - 1];
203
1
  parallel_for(i, n)
204
1
  {
205
1
    int x, y;
206
1
    const float* const ap = a->data.f32 + i * aninc;
207
1
    const int* const cp = c->data.i32 + i * cninc;
208
1
    float* const bp = b->data.f32 + i * bninc;
209
11
    for (x = 0; x < m; 
x++10
)
210
60
      
for (y = 0; 10
y < d;
y++50
)
211
50
        bp[x * bminc + y] = 0;
212
6
    for (x = 0; x < m; 
x++5
)
213
6
    {
214
6
      const int k = cp[x];
215
6
      if (k < 0)
216
1
        break;
217
30
      
for (y = 0; 5
y < d;
y++25
)
218
25
        bp[k * bminc + y] = ap[x * aminc + y];
219
5
    }
220
1
  } parallel_endfor
221
1
  return CCV_NNC_EXEC_SUCCESS;
222
1
}
223
224
REGISTER_COMMAND_BACKEND(CCV_NNC_NMS_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
225
1
{
226
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
227
1
  registry->tensor_datatypes = CCV_32F | CCV_32S;
228
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
229
1
  registry->algorithms = 1;
230
1
  registry->exec = _ccv_nnc_nms_forw;
231
1
}
232
233
REGISTER_COMMAND_BACKEND(CCV_NNC_NMS_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
234
1
{
235
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
236
1
  registry->tensor_datatypes = CCV_32F | CCV_32S;
237
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
238
1
  registry->algorithms = 1;
239
1
  registry->exec = _ccv_nnc_nms_back;
240
1
}