Coverage Report

Created: 2021-04-07 21:56

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/roi/ccv_nnc_roi_align_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
typedef struct {
14
  int i0, i1, mute;
15
  float r;
16
} roi_align_coeffs_t;
17
18
static void _ccv_nnc_bilinear_coeffs(ccv_nnc_stream_context_t* const stream_context, const int h, const int w, const float roi_y, const float roi_x, const float roi_h, const float roi_w, const int pool_h, const int pool_w, int* const bin_h_ref, int* const bin_w_ref, roi_align_coeffs_t** const y_coeffs_ref, roi_align_coeffs_t** const x_coeffs_ref, int** const bin_h_at_y_ref, int** const bin_w_at_x_ref, int* const start_h_ref, int* const start_w_ref, int* const end_h_ref, int* const end_w_ref)
19
16
{
20
16
  const int bin_h = (int)ceilf(roi_h / pool_h); // How many bins in each point of the pool. We slightly sampling at higher resolution (due to ceiling) with bilinear interpolation.
21
16
  const int bin_w = (int)ceilf(roi_w / pool_w);
22
16
  const int bin_pool_h = bin_h * pool_h; // Before averaging, what's the size of the region in integral term.
23
16
  const int bin_pool_w = bin_w * pool_w;
24
16
  const float scale_y = roi_h / bin_pool_h; // The scale to multiply back to get original coordinate.
25
16
  const float scale_x = roi_w / bin_pool_w;
26
16
  int x, y, i, j;
27
16
  roi_align_coeffs_t* const y_coeffs = (roi_align_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(roi_align_coeffs_t) * (bin_pool_h + bin_pool_w) + sizeof(int) * (pool_h + pool_w), CCV_TENSOR_CPU_MEMORY);
28
16
  roi_align_coeffs_t* const x_coeffs = y_coeffs + bin_pool_h;
29
16
  int* const bin_h_at_y = (int*)(x_coeffs + bin_pool_w);
30
16
  int* const bin_w_at_x = bin_h_at_y + pool_h;
31
80
  for (i = 0; i < pool_h; 
i++64
)
32
64
  {
33
64
    const int pi = i * bin_h;
34
64
    int count = 0;
35
456
    for (y = 0; y < bin_h; 
y++392
)
36
392
    {
37
392
      const float ay = roi_y + (y + pi + 0.5) * scale_y - 0.5;
38
392
      const int iy = (int)floorf(ay);
39
392
      const float ry = ay - iy;
40
392
      const int iy0 = ccv_clamp(iy, 0, h - 1);
41
392
      const int iy1 = ccv_clamp(iy + 1, 0, h - 1);
42
392
      y_coeffs[pi + y].i0 = iy0;
43
392
      y_coeffs[pi + y].i1 = iy1;
44
392
      y_coeffs[pi + y].r = ry;
45
392
      const int mute = (iy + 1 < 0 || iy > h - 1);
46
392
      y_coeffs[pi + y].mute = mute;
47
392
      if (!mute)
48
392
        ++count;
49
392
    }
50
64
    bin_h_at_y[i] = count;
51
64
  }
52
16
  int start_h = pool_h;
53
32
  for (i = 0; start_h == pool_h && 
i < pool_h16
;
i++16
)
54
16
    if (bin_h_at_y[i] > 0)
55
16
      start_h = i;
56
16
  int end_h = 0;
57
32
  for (i = pool_h - 1; end_h == 0 && 
i >= 016
;
i--16
)
58
16
    if (bin_h_at_y[i] > 0)
59
16
      end_h = i + 1;
60
80
  for (j = 0; j < pool_w; 
j++64
)
61
64
  {
62
64
    const int pj = j * bin_w;
63
64
    int count = 0;
64
528
    for (x = 0; x < bin_w; 
x++464
)
65
464
    {
66
464
      const float ax = roi_x + (x + pj + 0.5) * scale_x - 0.5;
67
464
      const int ix = (int)floorf(ax);
68
464
      const float rx = ax - ix;
69
464
      const int ix0 = ccv_clamp(ix, 0, w - 1);
70
464
      const int ix1 = ccv_clamp(ix + 1, 0, w - 1);
71
464
      x_coeffs[pj + x].i0 = ix0;
72
464
      x_coeffs[pj + x].i1 = ix1;
73
464
      x_coeffs[pj + x].r = rx;
74
464
      const int mute = (ix + 1 < 0 || ix > w - 1);
75
464
      x_coeffs[pj + x].mute = mute;
76
464
      if (!mute)
77
464
        ++count;
78
464
    }
79
64
    bin_w_at_x[j] = count;
80
64
  }
81
16
  int start_w = pool_w;
82
32
  for (j = 0; start_w == pool_w && 
j < pool_w16
;
j++16
)
83
16
    if (bin_w_at_x[j] > 0)
84
16
      start_w = j;
85
16
  int end_w = 0;
86
32
  for (j = pool_w - 1; end_w == 0 && 
j >= 016
;
j--16
)
87
16
    if (bin_w_at_x[j] > 0)
88
16
      end_w = j + 1;
89
16
  *bin_h_ref = bin_h;
90
16
  *bin_w_ref = bin_w;
91
16
  *y_coeffs_ref = y_coeffs;
92
16
  *x_coeffs_ref = x_coeffs;
93
16
  *bin_h_at_y_ref = bin_h_at_y;
94
16
  *bin_w_at_x_ref = bin_w_at_x;
95
16
  *start_h_ref = start_h;
96
16
  *start_w_ref = start_w;
97
16
  *end_h_ref = end_h;
98
16
  *end_w_ref = end_w;
99
16
}
100
101
static int _ccv_nnc_roi_align_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
102
7
{
103
7
  assert(input_size == 2);
104
7
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
105
7
  assert(output_size == 1);
106
7
  const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
107
7
  ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
108
7
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
109
7
  assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2);
110
7
  const int* adim = (a_nd == CCV_NNC_MAX_DIM + 1) ? 
a->info.dim4
:
a->info.dim + 13
;
111
7
  const int h = adim[0];
112
7
  const int w = adim[1];
113
7
  const int c_nd = ccv_nnc_tensor_nd(c->info.dim);
114
7
  assert(c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2);
115
7
  const int* cdim = (c_nd == CCV_NNC_MAX_DIM + 1) ? 
c->info.dim4
:
c->info.dim + 13
;
116
7
  const int pool_h = cdim[0];
117
7
  const int pool_w = cdim[1];
118
7
  assert(cdim[2] == adim[2]);
119
7
  const int ch = cdim[2];
120
7
  const float* const ap = a->data.f32;
121
7
  const int* ainc = CCV_IS_TENSOR_VIEW(a) ? 
((a_nd == 0
CCV_NNC_MAX_DIM0
+ 1) ?
a->inc0
:
a->inc + 10
) : adim;
122
7
  const float* const bp = b->data.f32;
123
7
  float* cp = c->data.f32;
124
7
  const int* cinc = CCV_IS_TENSOR_VIEW(c) ? 
((c_nd == 0
CCV_NNC_MAX_DIM0
+ 1) ?
c->inc0
:
c->inc + 10
) : cdim;
125
7
  const int a_n = ccv_nnc_tensor_get_n(a->info);
126
7
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
127
7
  assert(b_nd == 1 || b_nd == 2);
128
7
  const int b_n = b_nd == 1 ? 
16
:
b->info.dim[0]1
;
129
7
  const int c_n = ccv_nnc_tensor_get_n(c->info);
130
7
  assert(c_n == ccv_max(a_n, b_n));
131
7
  const int aninc = a_nd == CCV_NNC_MAX_DIM + 1 ? 
04
:
ainc[0] * ainc[1] * ainc[2]3
;
132
7
  const int* binc = CCV_IS_TENSOR_VIEW(b) ? 
b->inc0
: b->info.dim;
133
7
  const int bninc = b_nd == 1 ? 
06
:
binc[1]1
;
134
7
  const int cninc = c_nd == CCV_NNC_MAX_DIM + 1 ? 
04
:
cinc[0] * cinc[1] * cinc[2]3
;
135
7
  ccv_nnc_tensor_zero(c);
136
7
  int bin_h, bin_w;
137
7
  roi_align_coeffs_t* y_coeffs;
138
7
  roi_align_coeffs_t* x_coeffs;
139
7
  int* bin_h_at_y;
140
7
  int* bin_w_at_x;
141
7
  int start_h, start_w, end_h, end_w;
142
7
  int n;
143
17
  for (n = 0; n < c_n; 
n++10
)
144
10
  {
145
10
    const float* const apn = ap + (n % a_n) * aninc;
146
10
    float* cpn = cp + n * cninc;
147
10
    const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1.
148
10
    const float roi_y = bp[(n % b_n) * bninc + 1] * h;
149
10
    const float roi_w = bp[(n % b_n) * bninc + 2] * w;
150
10
    const float roi_h = bp[(n % b_n) * bninc + 3] * h;
151
10
    // Re-compute the offsets if b changes or it is the first time.
152
10
    if ((b_n == 1 && 
n == 08
) ||
b_n > 14
)
153
8
      _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w);
154
10
    int i, j, x, y, k;
155
50
    for (i = start_h; i < end_h; 
i++40
)
156
40
    {
157
40
      const int pi = i * bin_h;
158
40
      const int bin_hz = bin_h_at_y[i];
159
200
      for (j = start_w; j < end_w; 
j++160
)
160
160
      {
161
160
        const int pj = j * bin_w;
162
160
        const int bin_wz = bin_w_at_x[j];
163
160
        const float inv = 1.0 / (bin_hz * bin_wz);
164
160
        float* const cpz = cpn + j * cinc[CCV_NNC_MAX_DIM];
165
1.12k
        for (y = 0; y < bin_h; 
y++960
)
166
960
        {
167
960
          if (y_coeffs[pi + y].mute)
168
0
            continue;
169
960
          const float ry = y_coeffs[pi + y].r;
170
960
          const int iy0 = y_coeffs[pi + y].i0;
171
960
          const int iy1 = y_coeffs[pi + y].i1;
172
8.25k
          for (x = 0; x < bin_w; 
x++7.29k
)
173
7.29k
          {
174
7.29k
            if (x_coeffs[pj + x].mute)
175
0
              continue;
176
7.29k
            const float rx = x_coeffs[pj + x].r;
177
7.29k
            const int ix0 = x_coeffs[pj + x].i0;
178
7.29k
            const int ix1 = x_coeffs[pj + x].i1;
179
7.29k
            const float c00 = (1 - ry) * (1 - rx);
180
7.29k
            const float c01 = (1 - ry) * rx;
181
7.29k
            const float c10 = ry * (1 - rx);
182
7.29k
            const float c11 = ry * rx;
183
7.29k
            const float* const ap00 = apn + (iy0 * ainc[CCV_NNC_MAX_DIM - 1] + ix0) * ainc[CCV_NNC_MAX_DIM];
184
7.29k
            const float* const ap01 = apn + (iy0 * ainc[CCV_NNC_MAX_DIM - 1] + ix1) * ainc[CCV_NNC_MAX_DIM];
185
7.29k
            const float* const ap10 = apn + (iy1 * ainc[CCV_NNC_MAX_DIM - 1] + ix0) * ainc[CCV_NNC_MAX_DIM];
186
7.29k
            const float* const ap11 = apn + (iy1 * ainc[CCV_NNC_MAX_DIM - 1] + ix1) * ainc[CCV_NNC_MAX_DIM];
187
797k
            for (k = 0; k < ch; 
k++789k
)
188
789k
              cpz[k] += ap00[k] * c00 + ap01[k] * c01 + ap10[k] * c10 + ap11[k] * c11;
189
7.29k
          }
190
960
        }
191
12.6k
        for (k = 0; k < ch; 
k++12.4k
)
192
12.4k
          cpz[k] *= inv;
193
160
      }
194
40
      cpn += cinc[CCV_NNC_MAX_DIM - 1] * cinc[CCV_NNC_MAX_DIM];
195
40
    }
196
10
  }
197
7
  return CCV_NNC_EXEC_SUCCESS;
198
7
}
199
200
static int _ccv_nnc_roi_align_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
201
6
{
202
6
  assert(input_size >= 3);
203
6
  const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
204
6
  assert(output_size == 1);
205
6
  ccv_nnc_tensor_view_t* o = (ccv_nnc_tensor_view_t*)outputs[0];
206
6
  const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
207
6
  assert(g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2);
208
6
  const int* gdim = (g_nd == CCV_NNC_MAX_DIM + 1) ? 
g->info.dim3
:
g->info.dim + 13
;
209
6
  const int pool_h = gdim[0];
210
6
  const int pool_w = gdim[1];
211
6
  const int o_nd = ccv_nnc_tensor_nd(o->info.dim);
212
6
  assert(o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2);
213
6
  const int* odim = (o_nd == CCV_NNC_MAX_DIM + 1) ? 
o->info.dim3
:
o->info.dim + 13
;
214
6
  const int h = odim[0];
215
6
  const int w = odim[1];
216
6
  assert(gdim[2] == odim[2]);
217
6
  const int ch = gdim[2];
218
6
  float* gp = g->data.f32;
219
6
  const int* ginc = CCV_IS_TENSOR_VIEW(g) ? 
((g_nd == 0
CCV_NNC_MAX_DIM0
+ 1) ?
g->inc0
:
g->inc + 10
) : gdim;
220
6
  float* op = o->data.f32;
221
6
  const int* oinc = CCV_IS_TENSOR_VIEW(o) ? 
((o_nd == 0
CCV_NNC_MAX_DIM0
+ 1) ?
o->inc0
:
o->inc + 10
) : odim;
222
6
  const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2];
223
6
  const float* const bp = b->data.f32;
224
6
  const int o_n = ccv_nnc_tensor_get_n(o->info);
225
6
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
226
6
  assert(b_nd == 1 || b_nd == 2);
227
6
  const int b_n = b_nd == 1 ? 
14
:
b->info.dim[0]2
;
228
6
  const int g_n = ccv_nnc_tensor_get_n(g->info);
229
6
  assert(g_n == ccv_max(o_n, b_n));
230
6
  const int oninc = o_nd == CCV_NNC_MAX_DIM + 1 ? 
03
:
oinc[0] * oinc[1] * oinc[2]3
;
231
6
  const int* binc = CCV_IS_TENSOR_VIEW(b) ? 
b->inc0
: b->info.dim;
232
6
  const int bninc = b_nd == 1 ? 
04
:
binc[1]2
;
233
6
  const int gninc = g_nd == CCV_NNC_MAX_DIM + 1 ? 
03
:
ginc[0] * ginc[1] * ginc[2]3
;
234
6
  int bin_h, bin_w;
235
6
  roi_align_coeffs_t* y_coeffs;
236
6
  roi_align_coeffs_t* x_coeffs;
237
6
  int* bin_h_at_y;
238
6
  int* bin_w_at_x;
239
6
  int start_h, start_w, end_h, end_w;
240
6
  int n;
241
6
  ccv_nnc_tensor_zero(o);
242
15
  for (n = 0; n < g_n; 
n++9
)
243
9
  {
244
9
    const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1.
245
9
    const float roi_y = bp[(n % b_n) * bninc + 1] * h;
246
9
    const float roi_w = bp[(n % b_n) * bninc + 2] * w;
247
9
    const float roi_h = bp[(n % b_n) * bninc + 3] * h;
248
9
    // Re-compute the offsets if b changes or it is the first time.
249
9
    if ((b_n == 1 && 
n == 05
) ||
b_n > 15
)
250
8
      _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w);
251
9
    const float* gpn = gp + n * gninc;
252
9
    float* const opn = op + (n % o_n) * oninc;
253
9
    int x, y, i, j, k;
254
45
    for (i = 0; i < pool_h; 
i++36
)
255
36
    {
256
36
      const int pi = i * bin_h;
257
36
      const int bin_hz = bin_h_at_y[i];
258
180
      for (j = 0; j < pool_w; 
j++144
)
259
144
      {
260
144
        const int pj = j * bin_w;
261
144
        const int bin_wz = bin_w_at_x[j];
262
144
        const float inv = 1.0 / (bin_hz * bin_wz);
263
144
        const float* const gpz = gpn + j * ginc[CCV_NNC_MAX_DIM];
264
1.05k
        for (y = 0; y < bin_h; 
y++912
)
265
912
        {
266
912
          if (y_coeffs[pi + y].mute)
267
0
            continue;
268
912
          const float ry = y_coeffs[pi + y].r;
269
912
          const int iy0 = y_coeffs[pi + y].i0;
270
912
          const int iy1 = y_coeffs[pi + y].i1;
271
7.92k
          for (x = 0; x < bin_w; 
x++7.00k
)
272
7.00k
          {
273
7.00k
            if (x_coeffs[pj + x].mute)
274
0
              continue;
275
7.00k
            const float rx = x_coeffs[pj + x].r;
276
7.00k
            const int ix0 = x_coeffs[pj + x].i0;
277
7.00k
            const int ix1 = x_coeffs[pj + x].i1;
278
7.00k
            const float c00 = (1 - ry) * (1 - rx);
279
7.00k
            const float c01 = (1 - ry) * rx;
280
7.00k
            const float c10 = ry * (1 - rx);
281
7.00k
            const float c11 = ry * rx;
282
7.00k
            float* const op00 = opn + (iy0 * oinc[CCV_NNC_MAX_DIM - 1] + ix0) * oinc[CCV_NNC_MAX_DIM];
283
7.00k
            float* const op01 = opn + (iy0 * oinc[CCV_NNC_MAX_DIM - 1] + ix1) * oinc[CCV_NNC_MAX_DIM];
284
7.00k
            float* const op10 = opn + (iy1 * oinc[CCV_NNC_MAX_DIM - 1] + ix0) * oinc[CCV_NNC_MAX_DIM];
285
7.00k
            float* const op11 = opn + (iy1 * oinc[CCV_NNC_MAX_DIM - 1] + ix1) * oinc[CCV_NNC_MAX_DIM];
286
796k
            for (k = 0; k < ch; 
k++789k
)
287
789k
            {
288
789k
              op00[k] += gpz[k] * c00 * inv;
289
789k
              op01[k] += gpz[k] * c01 * inv;
290
789k
              op10[k] += gpz[k] * c10 * inv;
291
789k
              op11[k] += gpz[k] * c11 * inv;
292
789k
            }
293
7.00k
          }
294
912
        }
295
144
      }
296
36
      gpn += ginc[CCV_NNC_MAX_DIM - 1] * ginc[CCV_NNC_MAX_DIM];
297
36
    }
298
9
  }
299
6
  return CCV_NNC_EXEC_SUCCESS;
300
6
}
301
302
REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
303
1
{
304
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
305
1
  registry->tensor_datatypes = CCV_32F;
306
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
307
1
  registry->algorithms = 1;
308
1
  registry->exec = _ccv_nnc_roi_align_forw;
309
1
}
310
311
REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
312
1
{
313
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
314
1
  registry->tensor_datatypes = CCV_32F;
315
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
316
1
  registry->algorithms = 1;
317
1
  registry->exec = _ccv_nnc_roi_align_back;
318
1
}