Bug Summary

File:nnc/cmd/roi/ccv_nnc_roi_align_cpu_ref.c
Warning:line 155, column 10
Assigned value is garbage or undefined

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_roi_align_cpu_ref.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd -resource-dir /usr/local/lib/clang/13.0.0 -I ../../ -I .. -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/9/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir=/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd -ferror-limit 19 -fblocks -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/buildslave/public_html/analyze/2021-10-28-200245-469756-1 -x c roi/ccv_nnc_roi_align_cpu_ref.c
1#include "ccv.h"
2#include "ccv_internal.h"
3#include "nnc/ccv_nnc.h"
4#include "nnc/ccv_nnc_easy.h"
5#include "nnc/ccv_nnc_internal.h"
6#ifdef USE_OPENMP
7#include <omp.h>
8#endif
9#ifdef USE_DISPATCH1
10#include <dispatch/dispatch.h>
11#endif
12
13typedef struct {
14 int i0, i1, mute;
15 float r;
16} roi_align_coeffs_t;
17
18static void _ccv_nnc_bilinear_coeffs(ccv_nnc_stream_context_t* const stream_context, const int h, const int w, const float roi_y, const float roi_x, const float roi_h, const float roi_w, const int pool_h, const int pool_w, int* const bin_h_ref, int* const bin_w_ref, roi_align_coeffs_t** const y_coeffs_ref, roi_align_coeffs_t** const x_coeffs_ref, int** const bin_h_at_y_ref, int** const bin_w_at_x_ref, int* const start_h_ref, int* const start_w_ref, int* const end_h_ref, int* const end_w_ref)
19{
20 const int bin_h = (int)ceilf(roi_h / pool_h); // How many bins in each point of the pool. We slightly sampling at higher resolution (due to ceiling) with bilinear interpolation.
21 const int bin_w = (int)ceilf(roi_w / pool_w);
22 const int bin_pool_h = bin_h * pool_h; // Before averaging, what's the size of the region in integral term.
23 const int bin_pool_w = bin_w * pool_w;
24 const float scale_y = roi_h / bin_pool_h; // The scale to multiply back to get original coordinate.
25 const float scale_x = roi_w / bin_pool_w;
26 int x, y, i, j;
27 roi_align_coeffs_t* const y_coeffs = (roi_align_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(roi_align_coeffs_t) * (bin_pool_h + bin_pool_w) + sizeof(int) * (pool_h + pool_w), CCV_TENSOR_CPU_MEMORY);
28 roi_align_coeffs_t* const x_coeffs = y_coeffs + bin_pool_h;
29 int* const bin_h_at_y = (int*)(x_coeffs + bin_pool_w);
30 int* const bin_w_at_x = bin_h_at_y + pool_h;
31 for (i = 0; i < pool_h; i++)
32 {
33 const int pi = i * bin_h;
34 int count = 0;
35 for (y = 0; y < bin_h; y++)
36 {
37 const float ay = roi_y + (y + pi + 0.5) * scale_y - 0.5;
38 const int iy = (int)floorf(ay);
39 const float ry = ay - iy;
40 const int iy0 = ccv_clamp(iy, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof (
iy) _x = (iy); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }
)
;
41 const int iy1 = ccv_clamp(iy + 1, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof (
iy + 1) _x = (iy + 1); (_x < _a) ? _a : ((_x > _b) ? _b
: _x); })
;
42 y_coeffs[pi + y].i0 = iy0;
43 y_coeffs[pi + y].i1 = iy1;
44 y_coeffs[pi + y].r = ry;
45 const int mute = (iy + 1 < 0 || iy > h - 1);
46 y_coeffs[pi + y].mute = mute;
47 if (!mute)
48 ++count;
49 }
50 bin_h_at_y[i] = count;
51 }
52 int start_h = pool_h;
53 for (i = 0; start_h == pool_h && i < pool_h; i++)
54 if (bin_h_at_y[i] > 0)
55 start_h = i;
56 int end_h = 0;
57 for (i = pool_h - 1; end_h == 0 && i >= 0; i--)
58 if (bin_h_at_y[i] > 0)
59 end_h = i + 1;
60 for (j = 0; j < pool_w; j++)
61 {
62 const int pj = j * bin_w;
63 int count = 0;
64 for (x = 0; x < bin_w; x++)
65 {
66 const float ax = roi_x + (x + pj + 0.5) * scale_x - 0.5;
67 const int ix = (int)floorf(ax);
68 const float rx = ax - ix;
69 const int ix0 = ccv_clamp(ix, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof (
ix) _x = (ix); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }
)
;
70 const int ix1 = ccv_clamp(ix + 1, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof (
ix + 1) _x = (ix + 1); (_x < _a) ? _a : ((_x > _b) ? _b
: _x); })
;
71 x_coeffs[pj + x].i0 = ix0;
72 x_coeffs[pj + x].i1 = ix1;
73 x_coeffs[pj + x].r = rx;
74 const int mute = (ix + 1 < 0 || ix > w - 1);
75 x_coeffs[pj + x].mute = mute;
76 if (!mute)
77 ++count;
78 }
79 bin_w_at_x[j] = count;
80 }
81 int start_w = pool_w;
82 for (j = 0; start_w == pool_w && j < pool_w; j++)
83 if (bin_w_at_x[j] > 0)
84 start_w = j;
85 int end_w = 0;
86 for (j = pool_w - 1; end_w == 0 && j >= 0; j--)
87 if (bin_w_at_x[j] > 0)
88 end_w = j + 1;
89 *bin_h_ref = bin_h;
90 *bin_w_ref = bin_w;
91 *y_coeffs_ref = y_coeffs;
92 *x_coeffs_ref = x_coeffs;
93 *bin_h_at_y_ref = bin_h_at_y;
94 *bin_w_at_x_ref = bin_w_at_x;
95 *start_h_ref = start_h;
96 *start_w_ref = start_w;
97 *end_h_ref = end_h;
98 *end_w_ref = end_w;
99}
100
101static int _ccv_nnc_roi_align_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
102{
103 assert(input_size == 2)((void) sizeof ((input_size == 2) ? 1 : 0), __extension__ ({ if
(input_size == 2) ; else __assert_fail ("input_size == 2", "roi/ccv_nnc_roi_align_cpu_ref.c"
, 103, __extension__ __PRETTY_FUNCTION__); }))
;
1
Assuming 'input_size' is equal to 2
2
Taking true branch
104 const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
105 assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({
if (output_size == 1) ; else __assert_fail ("output_size == 1"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 105, __extension__ __PRETTY_FUNCTION__
); }))
;
3
Assuming 'output_size' is equal to 1
4
Taking true branch
106 const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
107 ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
108 const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
109 assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((a_nd == (2) + 1 || a_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (a_nd == (2) + 1 || a_nd == (2) + 2) ; else
__assert_fail ("a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 109, __extension__ __PRETTY_FUNCTION__
); }))
;
5
Assuming the condition is false
6
Assuming the condition is true
7
Taking true branch
110 const int* adim = (a_nd == CCV_NNC_MAX_DIM(2) + 1) ? a->info.dim : a->info.dim + 1;
8
'?' condition is false
111 const int h = adim[0];
112 const int w = adim[1];
113 const int c_nd = ccv_nnc_tensor_nd(c->info.dim);
114 assert(c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((c_nd == (2) + 1 || c_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (c_nd == (2) + 1 || c_nd == (2) + 2) ; else
__assert_fail ("c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 114, __extension__ __PRETTY_FUNCTION__
); }))
;
9
Assuming the condition is false
10
Assuming the condition is true
11
Taking true branch
115 const int* cdim = (c_nd == CCV_NNC_MAX_DIM(2) + 1) ? c->info.dim : c->info.dim + 1;
12
'?' condition is false
116 const int pool_h = cdim[0];
117 const int pool_w = cdim[1];
118 assert(cdim[2] == adim[2])((void) sizeof ((cdim[2] == adim[2]) ? 1 : 0), __extension__ (
{ if (cdim[2] == adim[2]) ; else __assert_fail ("cdim[2] == adim[2]"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 118, __extension__ __PRETTY_FUNCTION__
); }))
;
13
Assuming the condition is true
14
Taking true branch
119 const int ch = cdim[2];
120 const float* const ap = a->data.f32;
121 const int* ainc = CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) ? ((a_nd == CCV_NNC_MAX_DIM(2) + 1) ? a->inc : a->inc + 1) : adim;
15
Assuming the condition is false
16
'?' condition is false
122 const float* const bp = b->data.f32;
123 float* cp = c->data.f32;
124 const int* cinc = CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW) ? ((c_nd == CCV_NNC_MAX_DIM(2) + 1) ? c->inc : c->inc + 1) : cdim;
17
Assuming the condition is false
18
'?' condition is false
125 const int a_n = ccv_nnc_tensor_get_n(a->info);
126 const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
127 assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__
({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 127, __extension__ __PRETTY_FUNCTION__
); }))
;
19
Assuming 'b_nd' is not equal to 1
20
Assuming 'b_nd' is equal to 2
21
Taking true branch
128 const int b_n = b_nd
21.1
'b_nd' is not equal to 1
== 1 ? 1 : b->info.dim[0];
22
'?' condition is false
129 const int c_n = ccv_nnc_tensor_get_n(c->info);
130 assert(c_n == ccv_max(a_n, b_n))((void) sizeof ((c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n
) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n) _b = (
b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("c_n == ccv_max(a_n, b_n)"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 130, __extension__ __PRETTY_FUNCTION__
); }))
;
23
Assuming '_a' is > '_b'
24
'?' condition is true
25
Taking true branch
131 const int aninc = a_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : ainc[0] * ainc[1] * ainc[2];
26
'?' condition is false
132 const int* binc = CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) ? b->inc : b->info.dim;
27
Assuming the condition is true
28
'?' condition is true
133 const int bninc = b_nd
28.1
'b_nd' is not equal to 1
== 1 ? 0 : binc[1];
29
'?' condition is false
134 const int cninc = c_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : cinc[0] * cinc[1] * cinc[2];
30
'?' condition is false
135 ccv_nnc_tensor_zero(c);
136 int bin_h, bin_w;
137 roi_align_coeffs_t* y_coeffs;
138 roi_align_coeffs_t* x_coeffs;
139 int* bin_h_at_y;
140 int* bin_w_at_x;
141 int start_h, start_w, end_h, end_w;
31
'start_h' declared without an initial value
142 int n;
143 for (n = 0; n < c_n; n++)
32
Assuming 'n' is < 'c_n'
33
Loop condition is true. Entering loop body
144 {
145 const float* const apn = ap + (n % a_n) * aninc;
146 float* cpn = cp + n * cninc;
147 const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1.
148 const float roi_y = bp[(n % b_n) * bninc + 1] * h;
149 const float roi_w = bp[(n % b_n) * bninc + 2] * w;
150 const float roi_h = bp[(n % b_n) * bninc + 3] * h;
151 // Re-compute the offsets if b changes or it is the first time.
152 if ((b_n == 1 && n == 0) || b_n > 1)
34
Assuming 'b_n' is not equal to 1
35
Assuming 'b_n' is <= 1
36
Taking false branch
153 _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w);
154 int i, j, x, y, k;
155 for (i = start_h; i < end_h; i++)
37
Assigned value is garbage or undefined
156 {
157 const int pi = i * bin_h;
158 const int bin_hz = bin_h_at_y[i];
159 for (j = start_w; j < end_w; j++)
160 {
161 const int pj = j * bin_w;
162 const int bin_wz = bin_w_at_x[j];
163 const float inv = 1.0 / (bin_hz * bin_wz);
164 float* const cpz = cpn + j * cinc[CCV_NNC_MAX_DIM(2)];
165 for (y = 0; y < bin_h; y++)
166 {
167 if (y_coeffs[pi + y].mute)
168 continue;
169 const float ry = y_coeffs[pi + y].r;
170 const int iy0 = y_coeffs[pi + y].i0;
171 const int iy1 = y_coeffs[pi + y].i1;
172 for (x = 0; x < bin_w; x++)
173 {
174 if (x_coeffs[pj + x].mute)
175 continue;
176 const float rx = x_coeffs[pj + x].r;
177 const int ix0 = x_coeffs[pj + x].i0;
178 const int ix1 = x_coeffs[pj + x].i1;
179 const float c00 = (1 - ry) * (1 - rx);
180 const float c01 = (1 - ry) * rx;
181 const float c10 = ry * (1 - rx);
182 const float c11 = ry * rx;
183 const float* const ap00 = apn + (iy0 * ainc[CCV_NNC_MAX_DIM(2) - 1] + ix0) * ainc[CCV_NNC_MAX_DIM(2)];
184 const float* const ap01 = apn + (iy0 * ainc[CCV_NNC_MAX_DIM(2) - 1] + ix1) * ainc[CCV_NNC_MAX_DIM(2)];
185 const float* const ap10 = apn + (iy1 * ainc[CCV_NNC_MAX_DIM(2) - 1] + ix0) * ainc[CCV_NNC_MAX_DIM(2)];
186 const float* const ap11 = apn + (iy1 * ainc[CCV_NNC_MAX_DIM(2) - 1] + ix1) * ainc[CCV_NNC_MAX_DIM(2)];
187 for (k = 0; k < ch; k++)
188 cpz[k] += ap00[k] * c00 + ap01[k] * c01 + ap10[k] * c10 + ap11[k] * c11;
189 }
190 }
191 for (k = 0; k < ch; k++)
192 cpz[k] *= inv;
193 }
194 cpn += cinc[CCV_NNC_MAX_DIM(2) - 1] * cinc[CCV_NNC_MAX_DIM(2)];
195 }
196 }
197 return CCV_NNC_EXEC_SUCCESS;
198}
199
200static int _ccv_nnc_roi_align_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
201{
202 assert(input_size >= 3)((void) sizeof ((input_size >= 3) ? 1 : 0), __extension__ (
{ if (input_size >= 3) ; else __assert_fail ("input_size >= 3"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 202, __extension__ __PRETTY_FUNCTION__
); }))
;
203 const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
204 assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({
if (output_size == 1) ; else __assert_fail ("output_size == 1"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 204, __extension__ __PRETTY_FUNCTION__
); }))
;
205 ccv_nnc_tensor_view_t* o = (ccv_nnc_tensor_view_t*)outputs[0];
206 const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
207 assert(g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((g_nd == (2) + 1 || g_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (g_nd == (2) + 1 || g_nd == (2) + 2) ; else
__assert_fail ("g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 207, __extension__ __PRETTY_FUNCTION__
); }))
;
208 const int* gdim = (g_nd == CCV_NNC_MAX_DIM(2) + 1) ? g->info.dim : g->info.dim + 1;
209 const int pool_h = gdim[0];
210 const int pool_w = gdim[1];
211 const int o_nd = ccv_nnc_tensor_nd(o->info.dim);
212 assert(o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((o_nd == (2) + 1 || o_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (o_nd == (2) + 1 || o_nd == (2) + 2) ; else
__assert_fail ("o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 212, __extension__ __PRETTY_FUNCTION__
); }))
;
213 const int* odim = (o_nd == CCV_NNC_MAX_DIM(2) + 1) ? o->info.dim : o->info.dim + 1;
214 const int h = odim[0];
215 const int w = odim[1];
216 assert(gdim[2] == odim[2])((void) sizeof ((gdim[2] == odim[2]) ? 1 : 0), __extension__ (
{ if (gdim[2] == odim[2]) ; else __assert_fail ("gdim[2] == odim[2]"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 216, __extension__ __PRETTY_FUNCTION__
); }))
;
217 const int ch = gdim[2];
218 float* gp = g->data.f32;
219 const int* ginc = CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW) ? ((g_nd == CCV_NNC_MAX_DIM(2) + 1) ? g->inc : g->inc + 1) : gdim;
220 float* op = o->data.f32;
221 const int* oinc = CCV_IS_TENSOR_VIEW(o)((*(int*)(o)) & CCV_TENSOR_VIEW) ? ((o_nd == CCV_NNC_MAX_DIM(2) + 1) ? o->inc : o->inc + 1) : odim;
222 const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2];
223 const float* const bp = b->data.f32;
224 const int o_n = ccv_nnc_tensor_get_n(o->info);
225 const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
226 assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__
({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 226, __extension__ __PRETTY_FUNCTION__
); }))
;
227 const int b_n = b_nd == 1 ? 1 : b->info.dim[0];
228 const int g_n = ccv_nnc_tensor_get_n(g->info);
229 assert(g_n == ccv_max(o_n, b_n))((void) sizeof ((g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n
) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
({ if (g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n) _b = (
b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("g_n == ccv_max(o_n, b_n)"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 229, __extension__ __PRETTY_FUNCTION__
); }))
;
230 const int oninc = o_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : oinc[0] * oinc[1] * oinc[2];
231 const int* binc = CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) ? b->inc : b->info.dim;
232 const int bninc = b_nd == 1 ? 0 : binc[1];
233 const int gninc = g_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : ginc[0] * ginc[1] * ginc[2];
234 int bin_h, bin_w;
235 roi_align_coeffs_t* y_coeffs;
236 roi_align_coeffs_t* x_coeffs;
237 int* bin_h_at_y;
238 int* bin_w_at_x;
239 int start_h, start_w, end_h, end_w;
240 int n;
241 ccv_nnc_tensor_zero(o);
242 for (n = 0; n < g_n; n++)
243 {
244 const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1.
245 const float roi_y = bp[(n % b_n) * bninc + 1] * h;
246 const float roi_w = bp[(n % b_n) * bninc + 2] * w;
247 const float roi_h = bp[(n % b_n) * bninc + 3] * h;
248 // Re-compute the offsets if b changes or it is the first time.
249 if ((b_n == 1 && n == 0) || b_n > 1)
250 _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w);
251 const float* gpn = gp + n * gninc;
252 float* const opn = op + (n % o_n) * oninc;
253 int x, y, i, j, k;
254 for (i = 0; i < pool_h; i++)
255 {
256 const int pi = i * bin_h;
257 const int bin_hz = bin_h_at_y[i];
258 for (j = 0; j < pool_w; j++)
259 {
260 const int pj = j * bin_w;
261 const int bin_wz = bin_w_at_x[j];
262 const float inv = 1.0 / (bin_hz * bin_wz);
263 const float* const gpz = gpn + j * ginc[CCV_NNC_MAX_DIM(2)];
264 for (y = 0; y < bin_h; y++)
265 {
266 if (y_coeffs[pi + y].mute)
267 continue;
268 const float ry = y_coeffs[pi + y].r;
269 const int iy0 = y_coeffs[pi + y].i0;
270 const int iy1 = y_coeffs[pi + y].i1;
271 for (x = 0; x < bin_w; x++)
272 {
273 if (x_coeffs[pj + x].mute)
274 continue;
275 const float rx = x_coeffs[pj + x].r;
276 const int ix0 = x_coeffs[pj + x].i0;
277 const int ix1 = x_coeffs[pj + x].i1;
278 const float c00 = (1 - ry) * (1 - rx);
279 const float c01 = (1 - ry) * rx;
280 const float c10 = ry * (1 - rx);
281 const float c11 = ry * rx;
282 float* const op00 = opn + (iy0 * oinc[CCV_NNC_MAX_DIM(2) - 1] + ix0) * oinc[CCV_NNC_MAX_DIM(2)];
283 float* const op01 = opn + (iy0 * oinc[CCV_NNC_MAX_DIM(2) - 1] + ix1) * oinc[CCV_NNC_MAX_DIM(2)];
284 float* const op10 = opn + (iy1 * oinc[CCV_NNC_MAX_DIM(2) - 1] + ix0) * oinc[CCV_NNC_MAX_DIM(2)];
285 float* const op11 = opn + (iy1 * oinc[CCV_NNC_MAX_DIM(2) - 1] + ix1) * oinc[CCV_NNC_MAX_DIM(2)];
286 for (k = 0; k < ch; k++)
287 {
288 op00[k] += gpz[k] * c00 * inv;
289 op01[k] += gpz[k] * c01 * inv;
290 op10[k] += gpz[k] * c10 * inv;
291 op11[k] += gpz[k] * c11 * inv;
292 }
293 }
294 }
295 }
296 gpn += ginc[CCV_NNC_MAX_DIM(2) - 1] * ginc[CCV_NNC_MAX_DIM(2)];
297 }
298 }
299 return CCV_NNC_EXEC_SUCCESS;
300}
301
302REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
303{
304 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
305 registry->tensor_datatypes = CCV_32F;
306 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
307 registry->algorithms = 1;
308 registry->exec = _ccv_nnc_roi_align_forw;
309}
310
311REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
312{
313 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
314 registry->tensor_datatypes = CCV_32F;
315 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
316 registry->algorithms = 1;
317 registry->exec = _ccv_nnc_roi_align_back;
318}