Bug Summary

File:nnc/cmd/roi/ccv_nnc_roi_align_cpu_ref.c
Warning:line 158, column 10
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_roi_align_cpu_ref.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -resource-dir /usr/local/lib/clang/18 -I ../../ -I .. -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -D HAVE_CUDA_SM80 -I /usr/local/include -internal-isystem /usr/local/lib/clang/18/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-09-15-195407-363978-1 -x c roi/ccv_nnc_roi_align_cpu_ref.c
1#include "ccv.h"
2#include "ccv_internal.h"
3#include "nnc/ccv_nnc.h"
4#include "nnc/ccv_nnc_easy.h"
5#include "nnc/ccv_nnc_internal.h"
6#ifdef USE_OPENMP
7#include <omp.h>
8#endif
9#ifdef USE_DISPATCH
10#include <dispatch/dispatch.h>
11#endif
12
13typedef struct {
14 int i0, i1, mute;
15 float r;
16} roi_align_coeffs_t;
17
18static void _ccv_nnc_bilinear_coeffs(ccv_nnc_stream_context_t* const stream_context, const int h, const int w, const float roi_y, const float roi_x, const float roi_h, const float roi_w, const int pool_h, const int pool_w, int* const bin_h_ref, int* const bin_w_ref, roi_align_coeffs_t** const y_coeffs_ref, roi_align_coeffs_t** const x_coeffs_ref, int** const bin_h_at_y_ref, int** const bin_w_at_x_ref, int* const start_h_ref, int* const start_w_ref, int* const end_h_ref, int* const end_w_ref)
19{
20 const int bin_h = (int)ceilf(roi_h / pool_h); // How many bins in each point of the pool. We slightly sampling at higher resolution (due to ceiling) with bilinear interpolation.
21 const int bin_w = (int)ceilf(roi_w / pool_w);
22 const int bin_pool_h = bin_h * pool_h; // Before averaging, what's the size of the region in integral term.
23 const int bin_pool_w = bin_w * pool_w;
24 const float scale_y = roi_h / bin_pool_h; // The scale to multiply back to get original coordinate.
25 const float scale_x = roi_w / bin_pool_w;
26 int x, y, i, j;
27 roi_align_coeffs_t* const y_coeffs = (roi_align_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(roi_align_coeffs_t) * (bin_pool_h + bin_pool_w) + sizeof(int) * (pool_h + pool_w), CCV_TENSOR_CPU_MEMORY);
28 roi_align_coeffs_t* const x_coeffs = y_coeffs + bin_pool_h;
29 int* const bin_h_at_y = (int*)(x_coeffs + bin_pool_w);
30 int* const bin_w_at_x = bin_h_at_y + pool_h;
31 for (i = 0; i < pool_h; i++)
32 {
33 const int pi = i * bin_h;
34 int count = 0;
35 for (y = 0; y < bin_h; y++)
36 {
37 const float ay = roi_y + (y + pi + 0.5) * scale_y - 0.5;
38 const int iy = (int)floorf(ay);
39 const float ry = ay - iy;
40 const int iy0 = ccv_clamp(iy, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof (
iy) _x = (iy); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }
)
;
41 const int iy1 = ccv_clamp(iy + 1, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof (
iy + 1) _x = (iy + 1); (_x < _a) ? _a : ((_x > _b) ? _b
: _x); })
;
42 y_coeffs[pi + y].i0 = iy0;
43 y_coeffs[pi + y].i1 = iy1;
44 y_coeffs[pi + y].r = ry;
45 const int mute = (iy + 1 < 0 || iy > h - 1);
46 y_coeffs[pi + y].mute = mute;
47 if (!mute)
48 ++count;
49 }
50 bin_h_at_y[i] = count;
51 }
52 int start_h = pool_h;
53 for (i = 0; start_h == pool_h && i < pool_h; i++)
54 if (bin_h_at_y[i] > 0)
55 start_h = i;
56 int end_h = 0;
57 for (i = pool_h - 1; end_h == 0 && i >= 0; i--)
58 if (bin_h_at_y[i] > 0)
59 end_h = i + 1;
60 for (j = 0; j < pool_w; j++)
61 {
62 const int pj = j * bin_w;
63 int count = 0;
64 for (x = 0; x < bin_w; x++)
65 {
66 const float ax = roi_x + (x + pj + 0.5) * scale_x - 0.5;
67 const int ix = (int)floorf(ax);
68 const float rx = ax - ix;
69 const int ix0 = ccv_clamp(ix, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof (
ix) _x = (ix); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }
)
;
70 const int ix1 = ccv_clamp(ix + 1, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof (
ix + 1) _x = (ix + 1); (_x < _a) ? _a : ((_x > _b) ? _b
: _x); })
;
71 x_coeffs[pj + x].i0 = ix0;
72 x_coeffs[pj + x].i1 = ix1;
73 x_coeffs[pj + x].r = rx;
74 const int mute = (ix + 1 < 0 || ix > w - 1);
75 x_coeffs[pj + x].mute = mute;
76 if (!mute)
77 ++count;
78 }
79 bin_w_at_x[j] = count;
80 }
81 int start_w = pool_w;
82 for (j = 0; start_w == pool_w && j < pool_w; j++)
83 if (bin_w_at_x[j] > 0)
84 start_w = j;
85 int end_w = 0;
86 for (j = pool_w - 1; end_w == 0 && j >= 0; j--)
87 if (bin_w_at_x[j] > 0)
88 end_w = j + 1;
89 *bin_h_ref = bin_h;
90 *bin_w_ref = bin_w;
91 *y_coeffs_ref = y_coeffs;
92 *x_coeffs_ref = x_coeffs;
93 *bin_h_at_y_ref = bin_h_at_y;
94 *bin_w_at_x_ref = bin_w_at_x;
95 *start_h_ref = start_h;
96 *start_w_ref = start_w;
97 *end_h_ref = end_h;
98 *end_w_ref = end_w;
99}
100
101static int _ccv_nnc_roi_align_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
102{
103 assert(input_size == 2)((void) sizeof ((input_size == 2) ? 1 : 0), __extension__ ({ if
(input_size == 2) ; else __assert_fail ("input_size == 2", "roi/ccv_nnc_roi_align_cpu_ref.c"
, 103, __extension__ __PRETTY_FUNCTION__); }))
;
1
Assuming 'input_size' is equal to 2
2
Taking true branch
104 const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
105 assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({
if (output_size == 1) ; else __assert_fail ("output_size == 1"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 105, __extension__ __PRETTY_FUNCTION__
); }))
;
3
Assuming 'output_size' is equal to 1
4
Taking true branch
106 const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
107 ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
108 const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
109 assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((a_nd == (2) + 1 || a_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (a_nd == (2) + 1 || a_nd == (2) + 2) ; else
__assert_fail ("a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 109, __extension__ __PRETTY_FUNCTION__
); }))
;
5
Assuming the condition is true
110 const int* adim = (a_nd == CCV_NNC_MAX_DIM(2) + 1) ? a->info.dim : a->info.dim + 1;
6
'?' condition is true
111 const int h = adim[0];
112 const int w = adim[1];
113 const int c_nd = ccv_nnc_tensor_nd(c->info.dim);
114 assert(c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((c_nd == (2) + 1 || c_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (c_nd == (2) + 1 || c_nd == (2) + 2) ; else
__assert_fail ("c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 114, __extension__ __PRETTY_FUNCTION__
); }))
;
7
Assuming the condition is true
115 const int* cdim = (c_nd == CCV_NNC_MAX_DIM(2) + 1) ? c->info.dim : c->info.dim + 1;
8
'?' condition is true
116 const int pool_h = cdim[0];
117 const int pool_w = cdim[1];
118 assert(cdim[2] == adim[2])((void) sizeof ((cdim[2] == adim[2]) ? 1 : 0), __extension__ (
{ if (cdim[2] == adim[2]) ; else __assert_fail ("cdim[2] == adim[2]"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 118, __extension__ __PRETTY_FUNCTION__
); }))
;
9
Assuming the condition is true
10
Taking true branch
119 const int ch = cdim[2];
120 const float* const ap = a->data.f32;
121 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
122 ccv_nnc_tensor_view_get_stride(a, astride);
123 const float* const bp = b->data.f32;
124 float* cp = c->data.f32;
125 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
126 ccv_nnc_tensor_view_get_stride(c, cstride);
127 const int a_n = ccv_nnc_tensor_get_n(a->info);
128 const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
129 assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__
({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 129, __extension__ __PRETTY_FUNCTION__
); }))
;
11
Assuming 'b_nd' is not equal to 1
12
Assuming 'b_nd' is equal to 2
13
Taking true branch
130 const int b_n = b_nd
13.1
'b_nd' is not equal to 1
== 1 ? 1 : b->info.dim[0];
14
'?' condition is false
131 const int c_n = ccv_nnc_tensor_get_n(c->info);
132 assert(c_n == ccv_max(a_n, b_n))((void) sizeof ((c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n
) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n) _b = (
b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("c_n == ccv_max(a_n, b_n)"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 132, __extension__ __PRETTY_FUNCTION__
); }))
;
15
Assuming '_a' is > '_b'
16
'?' condition is true
17
Assuming the condition is true
18
Taking true branch
133 const int aninc = a_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : astride[0];
19
'?' condition is true
134 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
135 ccv_nnc_tensor_view_get_stride(b, bstride);
136 const int bninc = b_nd
19.1
'b_nd' is not equal to 1
== 1 ? 0 : bstride[CCV_NNC_MAX_DIM(2) + 2 - b_nd];
20
'?' condition is false
137 const int cninc = c_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : cstride[0];
21
'?' condition is true
138 ccv_nnc_tensor_zero(c);
139 int bin_h, bin_w;
140 roi_align_coeffs_t* y_coeffs;
141 roi_align_coeffs_t* x_coeffs;
142 int* bin_h_at_y;
143 int* bin_w_at_x;
144 int start_h, start_w, end_h, end_w;
22
'start_h' declared without an initial value
145 int n;
146 for (n = 0; n < c_n; n++)
23
Assuming 'n' is < 'c_n'
24
Loop condition is true. Entering loop body
147 {
148 const float* const apn = ap + (n % a_n) * aninc;
149 float* cpn = cp + n * cninc;
150 const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1.
151 const float roi_y = bp[(n % b_n) * bninc + 1] * h;
152 const float roi_w = bp[(n % b_n) * bninc + 2] * w;
153 const float roi_h = bp[(n % b_n) * bninc + 3] * h;
154 // Re-compute the offsets if b changes or it is the first time.
155 if ((b_n == 1 && n == 0) || b_n > 1)
25
Assuming 'b_n' is not equal to 1
26
Assuming 'b_n' is <= 1
27
Taking false branch
156 _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w);
157 int i, j, x, y, k;
158 for (i = start_h; i < end_h; i++)
28
Assigned value is garbage or undefined
159 {
160 const int pi = i * bin_h;
161 const int bin_hz = bin_h_at_y[i];
162 for (j = start_w; j < end_w; j++)
163 {
164 const int pj = j * bin_w;
165 const int bin_wz = bin_w_at_x[j];
166 const float inv = 1.0 / (bin_hz * bin_wz);
167 float* const cpz = cpn + j * cstride[CCV_NNC_MAX_DIM(2)];
168 for (y = 0; y < bin_h; y++)
169 {
170 if (y_coeffs[pi + y].mute)
171 continue;
172 const float ry = y_coeffs[pi + y].r;
173 const int iy0 = y_coeffs[pi + y].i0;
174 const int iy1 = y_coeffs[pi + y].i1;
175 for (x = 0; x < bin_w; x++)
176 {
177 if (x_coeffs[pj + x].mute)
178 continue;
179 const float rx = x_coeffs[pj + x].r;
180 const int ix0 = x_coeffs[pj + x].i0;
181 const int ix1 = x_coeffs[pj + x].i1;
182 const float c00 = (1 - ry) * (1 - rx);
183 const float c01 = (1 - ry) * rx;
184 const float c10 = ry * (1 - rx);
185 const float c11 = ry * rx;
186 const float* const ap00 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)];
187 const float* const ap01 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)];
188 const float* const ap10 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)];
189 const float* const ap11 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)];
190 for (k = 0; k < ch; k++)
191 cpz[k] += ap00[k] * c00 + ap01[k] * c01 + ap10[k] * c10 + ap11[k] * c11;
192 }
193 }
194 for (k = 0; k < ch; k++)
195 cpz[k] *= inv;
196 }
197 cpn += cstride[CCV_NNC_MAX_DIM(2) - 1];
198 }
199 }
200 return CCV_NNC_EXEC_SUCCESS;
201}
202
203static int _ccv_nnc_roi_align_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
204{
205 assert(input_size >= 3)((void) sizeof ((input_size >= 3) ? 1 : 0), __extension__ (
{ if (input_size >= 3) ; else __assert_fail ("input_size >= 3"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 205, __extension__ __PRETTY_FUNCTION__
); }))
;
206 const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
207 assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({
if (output_size == 1) ; else __assert_fail ("output_size == 1"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 207, __extension__ __PRETTY_FUNCTION__
); }))
;
208 ccv_nnc_tensor_view_t* o = (ccv_nnc_tensor_view_t*)outputs[0];
209 const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
210 assert(g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((g_nd == (2) + 1 || g_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (g_nd == (2) + 1 || g_nd == (2) + 2) ; else
__assert_fail ("g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 210, __extension__ __PRETTY_FUNCTION__
); }))
;
211 const int* gdim = (g_nd == CCV_NNC_MAX_DIM(2) + 1) ? g->info.dim : g->info.dim + 1;
212 const int pool_h = gdim[0];
213 const int pool_w = gdim[1];
214 const int o_nd = ccv_nnc_tensor_nd(o->info.dim);
215 assert(o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((o_nd == (2) + 1 || o_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (o_nd == (2) + 1 || o_nd == (2) + 2) ; else
__assert_fail ("o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 215, __extension__ __PRETTY_FUNCTION__
); }))
;
216 const int* odim = (o_nd == CCV_NNC_MAX_DIM(2) + 1) ? o->info.dim : o->info.dim + 1;
217 const int h = odim[0];
218 const int w = odim[1];
219 assert(gdim[2] == odim[2])((void) sizeof ((gdim[2] == odim[2]) ? 1 : 0), __extension__ (
{ if (gdim[2] == odim[2]) ; else __assert_fail ("gdim[2] == odim[2]"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 219, __extension__ __PRETTY_FUNCTION__
); }))
;
220 const int ch = gdim[2];
221 float* gp = g->data.f32;
222 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
223 ccv_nnc_tensor_view_get_stride(g, gstride);
224 float* op = o->data.f32;
225 int ostride[CCV_NNC_MAX_DIM_ALLOC(12)];
226 ccv_nnc_tensor_view_get_stride(o, ostride);
227 const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2];
228 const float* const bp = b->data.f32;
229 const int o_n = ccv_nnc_tensor_get_n(o->info);
230 const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
231 assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__
({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 231, __extension__ __PRETTY_FUNCTION__
); }))
;
232 const int b_n = b_nd == 1 ? 1 : b->info.dim[0];
233 const int g_n = ccv_nnc_tensor_get_n(g->info);
234 assert(g_n == ccv_max(o_n, b_n))((void) sizeof ((g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n
) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n) _b = (
b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("g_n == ccv_max(o_n, b_n)"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 234, __extension__ __PRETTY_FUNCTION__
); }))
;
235 const int oninc = o_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : ostride[0];
236 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
237 ccv_nnc_tensor_view_get_stride(b, bstride);
238 const int bninc = b_nd == 1 ? 0 : bstride[CCV_NNC_MAX_DIM(2) + 2 - b_nd];
239 const int gninc = g_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : gstride[0];
240 int bin_h, bin_w;
241 roi_align_coeffs_t* y_coeffs;
242 roi_align_coeffs_t* x_coeffs;
243 int* bin_h_at_y;
244 int* bin_w_at_x;
245 int start_h, start_w, end_h, end_w;
246 int n;
247 ccv_nnc_tensor_zero(o);
248 for (n = 0; n < g_n; n++)
249 {
250 const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1.
251 const float roi_y = bp[(n % b_n) * bninc + 1] * h;
252 const float roi_w = bp[(n % b_n) * bninc + 2] * w;
253 const float roi_h = bp[(n % b_n) * bninc + 3] * h;
254 // Re-compute the offsets if b changes or it is the first time.
255 if ((b_n == 1 && n == 0) || b_n > 1)
256 _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w);
257 const float* gpn = gp + n * gninc;
258 float* const opn = op + (n % o_n) * oninc;
259 int x, y, i, j, k;
260 for (i = 0; i < pool_h; i++)
261 {
262 const int pi = i * bin_h;
263 const int bin_hz = bin_h_at_y[i];
264 for (j = 0; j < pool_w; j++)
265 {
266 const int pj = j * bin_w;
267 const int bin_wz = bin_w_at_x[j];
268 const float inv = 1.0 / (bin_hz * bin_wz);
269 const float* const gpz = gpn + j * gstride[CCV_NNC_MAX_DIM(2)];
270 for (y = 0; y < bin_h; y++)
271 {
272 if (y_coeffs[pi + y].mute)
273 continue;
274 const float ry = y_coeffs[pi + y].r;
275 const int iy0 = y_coeffs[pi + y].i0;
276 const int iy1 = y_coeffs[pi + y].i1;
277 for (x = 0; x < bin_w; x++)
278 {
279 if (x_coeffs[pj + x].mute)
280 continue;
281 const float rx = x_coeffs[pj + x].r;
282 const int ix0 = x_coeffs[pj + x].i0;
283 const int ix1 = x_coeffs[pj + x].i1;
284 const float c00 = (1 - ry) * (1 - rx);
285 const float c01 = (1 - ry) * rx;
286 const float c10 = ry * (1 - rx);
287 const float c11 = ry * rx;
288 float* const op00 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)];
289 float* const op01 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)];
290 float* const op10 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)];
291 float* const op11 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)];
292 for (k = 0; k < ch; k++)
293 {
294 op00[k] += gpz[k] * c00 * inv;
295 op01[k] += gpz[k] * c01 * inv;
296 op10[k] += gpz[k] * c10 * inv;
297 op11[k] += gpz[k] * c11 * inv;
298 }
299 }
300 }
301 }
302 gpn += gstride[CCV_NNC_MAX_DIM(2) - 1];
303 }
304 }
305 return CCV_NNC_EXEC_SUCCESS;
306}
307
308REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
309{
310 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
311 registry->tensor_datatypes = CCV_32F;
312 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
313 registry->algorithms = 1;
314 registry->exec = _ccv_nnc_roi_align_forw;
315}
316
317REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
318{
319 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
320 registry->tensor_datatypes = CCV_32F;
321 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
322 registry->algorithms = 1;
323 registry->exec = _ccv_nnc_roi_align_back;
324}