File: | nnc/cmd/roi/ccv_nnc_roi_align_cpu_ref.c |
Warning: | line 256, column 21 The right operand of '*' is a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv.h" | ||||
2 | #include "ccv_internal.h" | ||||
3 | #include "nnc/ccv_nnc.h" | ||||
4 | #include "nnc/ccv_nnc_easy.h" | ||||
5 | #include "nnc/ccv_nnc_internal.h" | ||||
6 | #ifdef USE_OPENMP | ||||
7 | #include <omp.h> | ||||
8 | #endif | ||||
9 | #ifdef USE_DISPATCH1 | ||||
10 | #include <dispatch/dispatch.h> | ||||
11 | #endif | ||||
12 | |||||
13 | typedef struct { | ||||
14 | int i0, i1, mute; | ||||
15 | float r; | ||||
16 | } roi_align_coeffs_t; | ||||
17 | |||||
18 | static void _ccv_nnc_bilinear_coeffs(ccv_nnc_stream_context_t* const stream_context, const int h, const int w, const float roi_y, const float roi_x, const float roi_h, const float roi_w, const int pool_h, const int pool_w, int* const bin_h_ref, int* const bin_w_ref, roi_align_coeffs_t** const y_coeffs_ref, roi_align_coeffs_t** const x_coeffs_ref, int** const bin_h_at_y_ref, int** const bin_w_at_x_ref, int* const start_h_ref, int* const start_w_ref, int* const end_h_ref, int* const end_w_ref) | ||||
19 | { | ||||
20 | const int bin_h = (int)ceilf(roi_h / pool_h); // How many bins in each point of the pool. We slightly sampling at higher resolution (due to ceiling) with bilinear interpolation. | ||||
21 | const int bin_w = (int)ceilf(roi_w / pool_w); | ||||
22 | const int bin_pool_h = bin_h * pool_h; // Before averaging, what's the size of the region in integral term. | ||||
23 | const int bin_pool_w = bin_w * pool_w; | ||||
24 | const float scale_y = roi_h / bin_pool_h; // The scale to multiply back to get original coordinate. | ||||
25 | const float scale_x = roi_w / bin_pool_w; | ||||
26 | int x, y, i, j; | ||||
27 | roi_align_coeffs_t* const y_coeffs = (roi_align_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(roi_align_coeffs_t) * (bin_pool_h + bin_pool_w) + sizeof(int) * (pool_h + pool_w), CCV_TENSOR_CPU_MEMORY); | ||||
28 | roi_align_coeffs_t* const x_coeffs = y_coeffs + bin_pool_h; | ||||
29 | int* const bin_h_at_y = (int*)(x_coeffs + bin_pool_w); | ||||
30 | int* const bin_w_at_x = bin_h_at_y + pool_h; | ||||
31 | for (i = 0; i < pool_h; i++) | ||||
32 | { | ||||
33 | const int pi = i * bin_h; | ||||
34 | int count = 0; | ||||
35 | for (y = 0; y < bin_h; y++) | ||||
36 | { | ||||
37 | const float ay = roi_y + (y + pi + 0.5) * scale_y - 0.5; | ||||
38 | const int iy = (int)floorf(ay); | ||||
39 | const float ry = ay - iy; | ||||
40 | const int iy0 = ccv_clamp(iy, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof ( iy) _x = (iy); (_x < _a) ? _a : ((_x > _b) ? _b : _x); } ); | ||||
41 | const int iy1 = ccv_clamp(iy + 1, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof ( iy + 1) _x = (iy + 1); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | ||||
42 | y_coeffs[pi + y].i0 = iy0; | ||||
43 | y_coeffs[pi + y].i1 = iy1; | ||||
44 | y_coeffs[pi + y].r = ry; | ||||
45 | const int mute = (iy + 1 < 0 || iy > h - 1); | ||||
46 | y_coeffs[pi + y].mute = mute; | ||||
47 | if (!mute) | ||||
48 | ++count; | ||||
49 | } | ||||
50 | bin_h_at_y[i] = count; | ||||
51 | } | ||||
52 | int start_h = pool_h; | ||||
53 | for (i = 0; start_h == pool_h && i < pool_h; i++) | ||||
54 | if (bin_h_at_y[i] > 0) | ||||
55 | start_h = i; | ||||
56 | int end_h = 0; | ||||
57 | for (i = pool_h - 1; end_h == 0 && i >= 0; i--) | ||||
58 | if (bin_h_at_y[i] > 0) | ||||
59 | end_h = i + 1; | ||||
60 | for (j = 0; j < pool_w; j++) | ||||
61 | { | ||||
62 | const int pj = j * bin_w; | ||||
63 | int count = 0; | ||||
64 | for (x = 0; x < bin_w; x++) | ||||
65 | { | ||||
66 | const float ax = roi_x + (x + pj + 0.5) * scale_x - 0.5; | ||||
67 | const int ix = (int)floorf(ax); | ||||
68 | const float rx = ax - ix; | ||||
69 | const int ix0 = ccv_clamp(ix, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof ( ix) _x = (ix); (_x < _a) ? _a : ((_x > _b) ? _b : _x); } ); | ||||
70 | const int ix1 = ccv_clamp(ix + 1, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof ( ix + 1) _x = (ix + 1); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | ||||
71 | x_coeffs[pj + x].i0 = ix0; | ||||
72 | x_coeffs[pj + x].i1 = ix1; | ||||
73 | x_coeffs[pj + x].r = rx; | ||||
74 | const int mute = (ix + 1 < 0 || ix > w - 1); | ||||
75 | x_coeffs[pj + x].mute = mute; | ||||
76 | if (!mute) | ||||
77 | ++count; | ||||
78 | } | ||||
79 | bin_w_at_x[j] = count; | ||||
80 | } | ||||
81 | int start_w = pool_w; | ||||
82 | for (j = 0; start_w == pool_w && j < pool_w; j++) | ||||
83 | if (bin_w_at_x[j] > 0) | ||||
84 | start_w = j; | ||||
85 | int end_w = 0; | ||||
86 | for (j = pool_w - 1; end_w == 0 && j >= 0; j--) | ||||
87 | if (bin_w_at_x[j] > 0) | ||||
88 | end_w = j + 1; | ||||
89 | *bin_h_ref = bin_h; | ||||
90 | *bin_w_ref = bin_w; | ||||
91 | *y_coeffs_ref = y_coeffs; | ||||
92 | *x_coeffs_ref = x_coeffs; | ||||
93 | *bin_h_at_y_ref = bin_h_at_y; | ||||
94 | *bin_w_at_x_ref = bin_w_at_x; | ||||
95 | *start_h_ref = start_h; | ||||
96 | *start_w_ref = start_w; | ||||
97 | *end_h_ref = end_h; | ||||
98 | *end_w_ref = end_w; | ||||
99 | } | ||||
100 | |||||
101 | static int _ccv_nnc_roi_align_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | ||||
102 | { | ||||
103 | assert(input_size == 2)((void) sizeof ((input_size == 2) ? 1 : 0), __extension__ ({ if (input_size == 2) ; else __assert_fail ("input_size == 2", "roi/ccv_nnc_roi_align_cpu_ref.c" , 103, __extension__ __PRETTY_FUNCTION__); })); | ||||
104 | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0]; | ||||
105 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "roi/ccv_nnc_roi_align_cpu_ref.c", 105, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
106 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1]; | ||||
107 | ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0]; | ||||
108 | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); | ||||
109 | assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((a_nd == (2) + 1 || a_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (a_nd == (2) + 1 || a_nd == (2) + 2) ; else __assert_fail ("a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 109, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
110 | const int* adim = (a_nd == CCV_NNC_MAX_DIM(2) + 1) ? a->info.dim : a->info.dim + 1; | ||||
111 | const int h = adim[0]; | ||||
112 | const int w = adim[1]; | ||||
113 | const int c_nd = ccv_nnc_tensor_nd(c->info.dim); | ||||
114 | assert(c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((c_nd == (2) + 1 || c_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (c_nd == (2) + 1 || c_nd == (2) + 2) ; else __assert_fail ("c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 114, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
115 | const int* cdim = (c_nd == CCV_NNC_MAX_DIM(2) + 1) ? c->info.dim : c->info.dim + 1; | ||||
116 | const int pool_h = cdim[0]; | ||||
117 | const int pool_w = cdim[1]; | ||||
118 | assert(cdim[2] == adim[2])((void) sizeof ((cdim[2] == adim[2]) ? 1 : 0), __extension__ ( { if (cdim[2] == adim[2]) ; else __assert_fail ("cdim[2] == adim[2]" , "roi/ccv_nnc_roi_align_cpu_ref.c", 118, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
119 | const int ch = cdim[2]; | ||||
120 | const float* const ap = a->data.f32; | ||||
121 | const int* ainc = CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) ? ((a_nd == CCV_NNC_MAX_DIM(2) + 1) ? a->inc : a->inc + 1) : adim; | ||||
122 | const float* const bp = b->data.f32; | ||||
123 | float* cp = c->data.f32; | ||||
124 | const int* cinc = CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW) ? ((c_nd == CCV_NNC_MAX_DIM(2) + 1) ? c->inc : c->inc + 1) : cdim; | ||||
125 | const int a_n = ccv_nnc_tensor_get_n(a->info); | ||||
126 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); | ||||
127 | assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__ ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 127, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
128 | const int b_n = b_nd == 1 ? 1 : b->info.dim[0]; | ||||
129 | const int c_n = ccv_nnc_tensor_get_n(c->info); | ||||
130 | assert(c_n == ccv_max(a_n, b_n))((void) sizeof ((c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n ) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n) _b = ( b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("c_n == ccv_max(a_n, b_n)" , "roi/ccv_nnc_roi_align_cpu_ref.c", 130, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
131 | const int aninc = a_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : ainc[0] * ainc[1] * ainc[2]; | ||||
132 | const int* binc = CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) ? b->inc : b->info.dim; | ||||
133 | const int bninc = b_nd == 1 ? 0 : binc[1]; | ||||
134 | const int cninc = c_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : cinc[0] * cinc[1] * cinc[2]; | ||||
135 | ccv_nnc_tensor_zero(c); | ||||
136 | int bin_h, bin_w; | ||||
137 | roi_align_coeffs_t* y_coeffs; | ||||
138 | roi_align_coeffs_t* x_coeffs; | ||||
139 | int* bin_h_at_y; | ||||
140 | int* bin_w_at_x; | ||||
141 | int start_h, start_w, end_h, end_w; | ||||
142 | int n; | ||||
143 | for (n = 0; n < c_n; n++) | ||||
144 | { | ||||
145 | const float* const apn = ap + (n % a_n) * aninc; | ||||
146 | float* cpn = cp + n * cninc; | ||||
147 | const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1. | ||||
148 | const float roi_y = bp[(n % b_n) * bninc + 1] * h; | ||||
149 | const float roi_w = bp[(n % b_n) * bninc + 2] * w; | ||||
150 | const float roi_h = bp[(n % b_n) * bninc + 3] * h; | ||||
151 | // Re-compute the offsets if b changes or it is the first time. | ||||
152 | if ((b_n == 1 && n == 0) || b_n > 1) | ||||
153 | _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w); | ||||
154 | int i, j, x, y, k; | ||||
155 | for (i = start_h; i < end_h; i++) | ||||
156 | { | ||||
157 | const int pi = i * bin_h; | ||||
158 | const int bin_hz = bin_h_at_y[i]; | ||||
159 | for (j = start_w; j < end_w; j++) | ||||
160 | { | ||||
161 | const int pj = j * bin_w; | ||||
162 | const int bin_wz = bin_w_at_x[j]; | ||||
163 | const float inv = 1.0 / (bin_hz * bin_wz); | ||||
164 | float* const cpz = cpn + j * cinc[CCV_NNC_MAX_DIM(2)]; | ||||
165 | for (y = 0; y < bin_h; y++) | ||||
166 | { | ||||
167 | if (y_coeffs[pi + y].mute) | ||||
168 | continue; | ||||
169 | const float ry = y_coeffs[pi + y].r; | ||||
170 | const int iy0 = y_coeffs[pi + y].i0; | ||||
171 | const int iy1 = y_coeffs[pi + y].i1; | ||||
172 | for (x = 0; x < bin_w; x++) | ||||
173 | { | ||||
174 | if (x_coeffs[pj + x].mute) | ||||
175 | continue; | ||||
176 | const float rx = x_coeffs[pj + x].r; | ||||
177 | const int ix0 = x_coeffs[pj + x].i0; | ||||
178 | const int ix1 = x_coeffs[pj + x].i1; | ||||
179 | const float c00 = (1 - ry) * (1 - rx); | ||||
180 | const float c01 = (1 - ry) * rx; | ||||
181 | const float c10 = ry * (1 - rx); | ||||
182 | const float c11 = ry * rx; | ||||
183 | const float* const ap00 = apn + (iy0 * ainc[CCV_NNC_MAX_DIM(2) - 1] + ix0) * ainc[CCV_NNC_MAX_DIM(2)]; | ||||
184 | const float* const ap01 = apn + (iy0 * ainc[CCV_NNC_MAX_DIM(2) - 1] + ix1) * ainc[CCV_NNC_MAX_DIM(2)]; | ||||
185 | const float* const ap10 = apn + (iy1 * ainc[CCV_NNC_MAX_DIM(2) - 1] + ix0) * ainc[CCV_NNC_MAX_DIM(2)]; | ||||
186 | const float* const ap11 = apn + (iy1 * ainc[CCV_NNC_MAX_DIM(2) - 1] + ix1) * ainc[CCV_NNC_MAX_DIM(2)]; | ||||
187 | for (k = 0; k < ch; k++) | ||||
188 | cpz[k] += ap00[k] * c00 + ap01[k] * c01 + ap10[k] * c10 + ap11[k] * c11; | ||||
189 | } | ||||
190 | } | ||||
191 | for (k = 0; k < ch; k++) | ||||
192 | cpz[k] *= inv; | ||||
193 | } | ||||
194 | cpn += cinc[CCV_NNC_MAX_DIM(2) - 1] * cinc[CCV_NNC_MAX_DIM(2)]; | ||||
195 | } | ||||
196 | } | ||||
197 | return CCV_NNC_EXEC_SUCCESS; | ||||
198 | } | ||||
199 | |||||
200 | static int _ccv_nnc_roi_align_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | ||||
201 | { | ||||
202 | assert(input_size >= 3)((void) sizeof ((input_size >= 3) ? 1 : 0), __extension__ ( { if (input_size >= 3) ; else __assert_fail ("input_size >= 3" , "roi/ccv_nnc_roi_align_cpu_ref.c", 202, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
| |||||
203 | const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0]; | ||||
204 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "roi/ccv_nnc_roi_align_cpu_ref.c", 204, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
205 | ccv_nnc_tensor_view_t* o = (ccv_nnc_tensor_view_t*)outputs[0]; | ||||
206 | const int g_nd = ccv_nnc_tensor_nd(g->info.dim); | ||||
207 | assert(g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((g_nd == (2) + 1 || g_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (g_nd == (2) + 1 || g_nd == (2) + 2) ; else __assert_fail ("g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 207, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
208 | const int* gdim = (g_nd == CCV_NNC_MAX_DIM(2) + 1) ? g->info.dim : g->info.dim + 1; | ||||
209 | const int pool_h = gdim[0]; | ||||
210 | const int pool_w = gdim[1]; | ||||
211 | const int o_nd = ccv_nnc_tensor_nd(o->info.dim); | ||||
212 | assert(o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((o_nd == (2) + 1 || o_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (o_nd == (2) + 1 || o_nd == (2) + 2) ; else __assert_fail ("o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 212, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
213 | const int* odim = (o_nd == CCV_NNC_MAX_DIM(2) + 1) ? o->info.dim : o->info.dim + 1; | ||||
214 | const int h = odim[0]; | ||||
215 | const int w = odim[1]; | ||||
216 | assert(gdim[2] == odim[2])((void) sizeof ((gdim[2] == odim[2]) ? 1 : 0), __extension__ ( { if (gdim[2] == odim[2]) ; else __assert_fail ("gdim[2] == odim[2]" , "roi/ccv_nnc_roi_align_cpu_ref.c", 216, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
217 | const int ch = gdim[2]; | ||||
218 | float* gp = g->data.f32; | ||||
219 | const int* ginc = CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW) ? ((g_nd == CCV_NNC_MAX_DIM(2) + 1) ? g->inc : g->inc + 1) : gdim; | ||||
220 | float* op = o->data.f32; | ||||
221 | const int* oinc = CCV_IS_TENSOR_VIEW(o)((*(int*)(o)) & CCV_TENSOR_VIEW) ? ((o_nd == CCV_NNC_MAX_DIM(2) + 1) ? o->inc : o->inc + 1) : odim; | ||||
222 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2]; | ||||
223 | const float* const bp = b->data.f32; | ||||
224 | const int o_n = ccv_nnc_tensor_get_n(o->info); | ||||
225 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); | ||||
226 | assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__ ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 226, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
227 | const int b_n = b_nd
| ||||
228 | const int g_n = ccv_nnc_tensor_get_n(g->info); | ||||
229 | assert(g_n == ccv_max(o_n, b_n))((void) sizeof ((g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n ) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n) _b = ( b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("g_n == ccv_max(o_n, b_n)" , "roi/ccv_nnc_roi_align_cpu_ref.c", 229, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
230 | const int oninc = o_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : oinc[0] * oinc[1] * oinc[2]; | ||||
231 | const int* binc = CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) ? b->inc : b->info.dim; | ||||
232 | const int bninc = b_nd
| ||||
233 | const int gninc = g_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : ginc[0] * ginc[1] * ginc[2]; | ||||
234 | int bin_h, bin_w; | ||||
235 | roi_align_coeffs_t* y_coeffs; | ||||
236 | roi_align_coeffs_t* x_coeffs; | ||||
237 | int* bin_h_at_y; | ||||
238 | int* bin_w_at_x; | ||||
239 | int start_h, start_w, end_h, end_w; | ||||
240 | int n; | ||||
241 | ccv_nnc_tensor_zero(o); | ||||
242 | for (n = 0; n < g_n; n++) | ||||
243 | { | ||||
244 | const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1. | ||||
245 | const float roi_y = bp[(n % b_n) * bninc + 1] * h; | ||||
246 | const float roi_w = bp[(n % b_n) * bninc + 2] * w; | ||||
247 | const float roi_h = bp[(n % b_n) * bninc + 3] * h; | ||||
248 | // Re-compute the offsets if b changes or it is the first time. | ||||
249 | if ((b_n
| ||||
250 | _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w); | ||||
251 | const float* gpn = gp + n * gninc; | ||||
252 | float* const opn = op + (n % o_n) * oninc; | ||||
253 | int x, y, i, j, k; | ||||
254 | for (i = 0; i < pool_h; i++) | ||||
255 | { | ||||
256 | const int pi = i * bin_h; | ||||
| |||||
257 | const int bin_hz = bin_h_at_y[i]; | ||||
258 | for (j = 0; j < pool_w; j++) | ||||
259 | { | ||||
260 | const int pj = j * bin_w; | ||||
261 | const int bin_wz = bin_w_at_x[j]; | ||||
262 | const float inv = 1.0 / (bin_hz * bin_wz); | ||||
263 | const float* const gpz = gpn + j * ginc[CCV_NNC_MAX_DIM(2)]; | ||||
264 | for (y = 0; y < bin_h; y++) | ||||
265 | { | ||||
266 | if (y_coeffs[pi + y].mute) | ||||
267 | continue; | ||||
268 | const float ry = y_coeffs[pi + y].r; | ||||
269 | const int iy0 = y_coeffs[pi + y].i0; | ||||
270 | const int iy1 = y_coeffs[pi + y].i1; | ||||
271 | for (x = 0; x < bin_w; x++) | ||||
272 | { | ||||
273 | if (x_coeffs[pj + x].mute) | ||||
274 | continue; | ||||
275 | const float rx = x_coeffs[pj + x].r; | ||||
276 | const int ix0 = x_coeffs[pj + x].i0; | ||||
277 | const int ix1 = x_coeffs[pj + x].i1; | ||||
278 | const float c00 = (1 - ry) * (1 - rx); | ||||
279 | const float c01 = (1 - ry) * rx; | ||||
280 | const float c10 = ry * (1 - rx); | ||||
281 | const float c11 = ry * rx; | ||||
282 | float* const op00 = opn + (iy0 * oinc[CCV_NNC_MAX_DIM(2) - 1] + ix0) * oinc[CCV_NNC_MAX_DIM(2)]; | ||||
283 | float* const op01 = opn + (iy0 * oinc[CCV_NNC_MAX_DIM(2) - 1] + ix1) * oinc[CCV_NNC_MAX_DIM(2)]; | ||||
284 | float* const op10 = opn + (iy1 * oinc[CCV_NNC_MAX_DIM(2) - 1] + ix0) * oinc[CCV_NNC_MAX_DIM(2)]; | ||||
285 | float* const op11 = opn + (iy1 * oinc[CCV_NNC_MAX_DIM(2) - 1] + ix1) * oinc[CCV_NNC_MAX_DIM(2)]; | ||||
286 | for (k = 0; k < ch; k++) | ||||
287 | { | ||||
288 | op00[k] += gpz[k] * c00 * inv; | ||||
289 | op01[k] += gpz[k] * c01 * inv; | ||||
290 | op10[k] += gpz[k] * c10 * inv; | ||||
291 | op11[k] += gpz[k] * c11 * inv; | ||||
292 | } | ||||
293 | } | ||||
294 | } | ||||
295 | } | ||||
296 | gpn += ginc[CCV_NNC_MAX_DIM(2) - 1] * ginc[CCV_NNC_MAX_DIM(2)]; | ||||
297 | } | ||||
298 | } | ||||
299 | return CCV_NNC_EXEC_SUCCESS; | ||||
300 | } | ||||
301 | |||||
302 | REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | ||||
303 | { | ||||
304 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; | ||||
305 | registry->tensor_datatypes = CCV_32F; | ||||
306 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | ||||
307 | registry->algorithms = 1; | ||||
308 | registry->exec = _ccv_nnc_roi_align_forw; | ||||
309 | } | ||||
310 | |||||
311 | REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | ||||
312 | { | ||||
313 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; | ||||
314 | registry->tensor_datatypes = CCV_32F; | ||||
315 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | ||||
316 | registry->algorithms = 1; | ||||
317 | registry->exec = _ccv_nnc_roi_align_back; | ||||
318 | } |