File: | nnc/cmd/roi/ccv_nnc_roi_align_cpu_ref.c |
Warning: | line 158, column 10 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv.h" | |||
2 | #include "ccv_internal.h" | |||
3 | #include "nnc/ccv_nnc.h" | |||
4 | #include "nnc/ccv_nnc_easy.h" | |||
5 | #include "nnc/ccv_nnc_internal.h" | |||
6 | #ifdef USE_OPENMP | |||
7 | #include <omp.h> | |||
8 | #endif | |||
9 | #ifdef USE_DISPATCH | |||
10 | #include <dispatch/dispatch.h> | |||
11 | #endif | |||
12 | ||||
13 | typedef struct { | |||
14 | int i0, i1, mute; | |||
15 | float r; | |||
16 | } roi_align_coeffs_t; | |||
17 | ||||
18 | static void _ccv_nnc_bilinear_coeffs(ccv_nnc_stream_context_t* const stream_context, const int h, const int w, const float roi_y, const float roi_x, const float roi_h, const float roi_w, const int pool_h, const int pool_w, int* const bin_h_ref, int* const bin_w_ref, roi_align_coeffs_t** const y_coeffs_ref, roi_align_coeffs_t** const x_coeffs_ref, int** const bin_h_at_y_ref, int** const bin_w_at_x_ref, int* const start_h_ref, int* const start_w_ref, int* const end_h_ref, int* const end_w_ref) | |||
19 | { | |||
20 | const int bin_h = (int)ceilf(roi_h / pool_h); // How many bins in each point of the pool. We slightly sampling at higher resolution (due to ceiling) with bilinear interpolation. | |||
21 | const int bin_w = (int)ceilf(roi_w / pool_w); | |||
22 | const int bin_pool_h = bin_h * pool_h; // Before averaging, what's the size of the region in integral term. | |||
23 | const int bin_pool_w = bin_w * pool_w; | |||
24 | const float scale_y = roi_h / bin_pool_h; // The scale to multiply back to get original coordinate. | |||
25 | const float scale_x = roi_w / bin_pool_w; | |||
26 | int x, y, i, j; | |||
27 | roi_align_coeffs_t* const y_coeffs = (roi_align_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(roi_align_coeffs_t) * (bin_pool_h + bin_pool_w) + sizeof(int) * (pool_h + pool_w), CCV_TENSOR_CPU_MEMORY); | |||
28 | roi_align_coeffs_t* const x_coeffs = y_coeffs + bin_pool_h; | |||
29 | int* const bin_h_at_y = (int*)(x_coeffs + bin_pool_w); | |||
30 | int* const bin_w_at_x = bin_h_at_y + pool_h; | |||
31 | for (i = 0; i < pool_h; i++) | |||
32 | { | |||
33 | const int pi = i * bin_h; | |||
34 | int count = 0; | |||
35 | for (y = 0; y < bin_h; y++) | |||
36 | { | |||
37 | const float ay = roi_y + (y + pi + 0.5) * scale_y - 0.5; | |||
38 | const int iy = (int)floorf(ay); | |||
39 | const float ry = ay - iy; | |||
40 | const int iy0 = ccv_clamp(iy, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof ( iy) _x = (iy); (_x < _a) ? _a : ((_x > _b) ? _b : _x); } ); | |||
41 | const int iy1 = ccv_clamp(iy + 1, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof ( iy + 1) _x = (iy + 1); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | |||
42 | y_coeffs[pi + y].i0 = iy0; | |||
43 | y_coeffs[pi + y].i1 = iy1; | |||
44 | y_coeffs[pi + y].r = ry; | |||
45 | const int mute = (iy + 1 < 0 || iy > h - 1); | |||
46 | y_coeffs[pi + y].mute = mute; | |||
47 | if (!mute) | |||
48 | ++count; | |||
49 | } | |||
50 | bin_h_at_y[i] = count; | |||
51 | } | |||
52 | int start_h = pool_h; | |||
53 | for (i = 0; start_h == pool_h && i < pool_h; i++) | |||
54 | if (bin_h_at_y[i] > 0) | |||
55 | start_h = i; | |||
56 | int end_h = 0; | |||
57 | for (i = pool_h - 1; end_h == 0 && i >= 0; i--) | |||
58 | if (bin_h_at_y[i] > 0) | |||
59 | end_h = i + 1; | |||
60 | for (j = 0; j < pool_w; j++) | |||
61 | { | |||
62 | const int pj = j * bin_w; | |||
63 | int count = 0; | |||
64 | for (x = 0; x < bin_w; x++) | |||
65 | { | |||
66 | const float ax = roi_x + (x + pj + 0.5) * scale_x - 0.5; | |||
67 | const int ix = (int)floorf(ax); | |||
68 | const float rx = ax - ix; | |||
69 | const int ix0 = ccv_clamp(ix, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof ( ix) _x = (ix); (_x < _a) ? _a : ((_x > _b) ? _b : _x); } ); | |||
70 | const int ix1 = ccv_clamp(ix + 1, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof ( ix + 1) _x = (ix + 1); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | |||
71 | x_coeffs[pj + x].i0 = ix0; | |||
72 | x_coeffs[pj + x].i1 = ix1; | |||
73 | x_coeffs[pj + x].r = rx; | |||
74 | const int mute = (ix + 1 < 0 || ix > w - 1); | |||
75 | x_coeffs[pj + x].mute = mute; | |||
76 | if (!mute) | |||
77 | ++count; | |||
78 | } | |||
79 | bin_w_at_x[j] = count; | |||
80 | } | |||
81 | int start_w = pool_w; | |||
82 | for (j = 0; start_w == pool_w && j < pool_w; j++) | |||
83 | if (bin_w_at_x[j] > 0) | |||
84 | start_w = j; | |||
85 | int end_w = 0; | |||
86 | for (j = pool_w - 1; end_w == 0 && j >= 0; j--) | |||
87 | if (bin_w_at_x[j] > 0) | |||
88 | end_w = j + 1; | |||
89 | *bin_h_ref = bin_h; | |||
90 | *bin_w_ref = bin_w; | |||
91 | *y_coeffs_ref = y_coeffs; | |||
92 | *x_coeffs_ref = x_coeffs; | |||
93 | *bin_h_at_y_ref = bin_h_at_y; | |||
94 | *bin_w_at_x_ref = bin_w_at_x; | |||
95 | *start_h_ref = start_h; | |||
96 | *start_w_ref = start_w; | |||
97 | *end_h_ref = end_h; | |||
98 | *end_w_ref = end_w; | |||
99 | } | |||
100 | ||||
101 | static int _ccv_nnc_roi_align_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
102 | { | |||
103 | assert(input_size == 2)((void) sizeof ((input_size == 2) ? 1 : 0), __extension__ ({ if (input_size == 2) ; else __assert_fail ("input_size == 2", "roi/ccv_nnc_roi_align_cpu_ref.c" , 103, __extension__ __PRETTY_FUNCTION__); })); | |||
| ||||
104 | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0]; | |||
105 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "roi/ccv_nnc_roi_align_cpu_ref.c", 105, __extension__ __PRETTY_FUNCTION__ ); })); | |||
106 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1]; | |||
107 | ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0]; | |||
108 | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); | |||
109 | assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((a_nd == (2) + 1 || a_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (a_nd == (2) + 1 || a_nd == (2) + 2) ; else __assert_fail ("a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 109, __extension__ __PRETTY_FUNCTION__ ); })); | |||
110 | const int* adim = (a_nd == CCV_NNC_MAX_DIM(2) + 1) ? a->info.dim : a->info.dim + 1; | |||
111 | const int h = adim[0]; | |||
112 | const int w = adim[1]; | |||
113 | const int c_nd = ccv_nnc_tensor_nd(c->info.dim); | |||
114 | assert(c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((c_nd == (2) + 1 || c_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (c_nd == (2) + 1 || c_nd == (2) + 2) ; else __assert_fail ("c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 114, __extension__ __PRETTY_FUNCTION__ ); })); | |||
115 | const int* cdim = (c_nd == CCV_NNC_MAX_DIM(2) + 1) ? c->info.dim : c->info.dim + 1; | |||
116 | const int pool_h = cdim[0]; | |||
117 | const int pool_w = cdim[1]; | |||
118 | assert(cdim[2] == adim[2])((void) sizeof ((cdim[2] == adim[2]) ? 1 : 0), __extension__ ( { if (cdim[2] == adim[2]) ; else __assert_fail ("cdim[2] == adim[2]" , "roi/ccv_nnc_roi_align_cpu_ref.c", 118, __extension__ __PRETTY_FUNCTION__ ); })); | |||
119 | const int ch = cdim[2]; | |||
120 | const float* const ap = a->data.f32; | |||
121 | int astride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
122 | ccv_nnc_tensor_view_get_stride(a, astride); | |||
123 | const float* const bp = b->data.f32; | |||
124 | float* cp = c->data.f32; | |||
125 | int cstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
126 | ccv_nnc_tensor_view_get_stride(c, cstride); | |||
127 | const int a_n = ccv_nnc_tensor_get_n(a->info); | |||
128 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); | |||
129 | assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__ ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 129, __extension__ __PRETTY_FUNCTION__ ); })); | |||
130 | const int b_n = b_nd
| |||
131 | const int c_n = ccv_nnc_tensor_get_n(c->info); | |||
132 | assert(c_n == ccv_max(a_n, b_n))((void) sizeof ((c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n ) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n) _b = ( b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("c_n == ccv_max(a_n, b_n)" , "roi/ccv_nnc_roi_align_cpu_ref.c", 132, __extension__ __PRETTY_FUNCTION__ ); })); | |||
133 | const int aninc = a_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : astride[0]; | |||
134 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
135 | ccv_nnc_tensor_view_get_stride(b, bstride); | |||
136 | const int bninc = b_nd
| |||
137 | const int cninc = c_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : cstride[0]; | |||
138 | ccv_nnc_tensor_zero(c); | |||
139 | int bin_h, bin_w; | |||
140 | roi_align_coeffs_t* y_coeffs; | |||
141 | roi_align_coeffs_t* x_coeffs; | |||
142 | int* bin_h_at_y; | |||
143 | int* bin_w_at_x; | |||
144 | int start_h, start_w, end_h, end_w; | |||
145 | int n; | |||
146 | for (n = 0; n < c_n; n++) | |||
147 | { | |||
148 | const float* const apn = ap + (n % a_n) * aninc; | |||
149 | float* cpn = cp + n * cninc; | |||
150 | const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1. | |||
151 | const float roi_y = bp[(n % b_n) * bninc + 1] * h; | |||
152 | const float roi_w = bp[(n % b_n) * bninc + 2] * w; | |||
153 | const float roi_h = bp[(n % b_n) * bninc + 3] * h; | |||
154 | // Re-compute the offsets if b changes or it is the first time. | |||
155 | if ((b_n == 1 && n == 0) || b_n > 1) | |||
156 | _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w); | |||
157 | int i, j, x, y, k; | |||
158 | for (i = start_h; i < end_h; i++) | |||
| ||||
159 | { | |||
160 | const int pi = i * bin_h; | |||
161 | const int bin_hz = bin_h_at_y[i]; | |||
162 | for (j = start_w; j < end_w; j++) | |||
163 | { | |||
164 | const int pj = j * bin_w; | |||
165 | const int bin_wz = bin_w_at_x[j]; | |||
166 | const float inv = 1.0 / (bin_hz * bin_wz); | |||
167 | float* const cpz = cpn + j * cstride[CCV_NNC_MAX_DIM(2)]; | |||
168 | for (y = 0; y < bin_h; y++) | |||
169 | { | |||
170 | if (y_coeffs[pi + y].mute) | |||
171 | continue; | |||
172 | const float ry = y_coeffs[pi + y].r; | |||
173 | const int iy0 = y_coeffs[pi + y].i0; | |||
174 | const int iy1 = y_coeffs[pi + y].i1; | |||
175 | for (x = 0; x < bin_w; x++) | |||
176 | { | |||
177 | if (x_coeffs[pj + x].mute) | |||
178 | continue; | |||
179 | const float rx = x_coeffs[pj + x].r; | |||
180 | const int ix0 = x_coeffs[pj + x].i0; | |||
181 | const int ix1 = x_coeffs[pj + x].i1; | |||
182 | const float c00 = (1 - ry) * (1 - rx); | |||
183 | const float c01 = (1 - ry) * rx; | |||
184 | const float c10 = ry * (1 - rx); | |||
185 | const float c11 = ry * rx; | |||
186 | const float* const ap00 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)]; | |||
187 | const float* const ap01 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)]; | |||
188 | const float* const ap10 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)]; | |||
189 | const float* const ap11 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)]; | |||
190 | for (k = 0; k < ch; k++) | |||
191 | cpz[k] += ap00[k] * c00 + ap01[k] * c01 + ap10[k] * c10 + ap11[k] * c11; | |||
192 | } | |||
193 | } | |||
194 | for (k = 0; k < ch; k++) | |||
195 | cpz[k] *= inv; | |||
196 | } | |||
197 | cpn += cstride[CCV_NNC_MAX_DIM(2) - 1]; | |||
198 | } | |||
199 | } | |||
200 | return CCV_NNC_EXEC_SUCCESS; | |||
201 | } | |||
202 | ||||
203 | static int _ccv_nnc_roi_align_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
204 | { | |||
205 | assert(input_size >= 3)((void) sizeof ((input_size >= 3) ? 1 : 0), __extension__ ( { if (input_size >= 3) ; else __assert_fail ("input_size >= 3" , "roi/ccv_nnc_roi_align_cpu_ref.c", 205, __extension__ __PRETTY_FUNCTION__ ); })); | |||
206 | const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0]; | |||
207 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "roi/ccv_nnc_roi_align_cpu_ref.c", 207, __extension__ __PRETTY_FUNCTION__ ); })); | |||
208 | ccv_nnc_tensor_view_t* o = (ccv_nnc_tensor_view_t*)outputs[0]; | |||
209 | const int g_nd = ccv_nnc_tensor_nd(g->info.dim); | |||
210 | assert(g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((g_nd == (2) + 1 || g_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (g_nd == (2) + 1 || g_nd == (2) + 2) ; else __assert_fail ("g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 210, __extension__ __PRETTY_FUNCTION__ ); })); | |||
211 | const int* gdim = (g_nd == CCV_NNC_MAX_DIM(2) + 1) ? g->info.dim : g->info.dim + 1; | |||
212 | const int pool_h = gdim[0]; | |||
213 | const int pool_w = gdim[1]; | |||
214 | const int o_nd = ccv_nnc_tensor_nd(o->info.dim); | |||
215 | assert(o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((o_nd == (2) + 1 || o_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (o_nd == (2) + 1 || o_nd == (2) + 2) ; else __assert_fail ("o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 215, __extension__ __PRETTY_FUNCTION__ ); })); | |||
216 | const int* odim = (o_nd == CCV_NNC_MAX_DIM(2) + 1) ? o->info.dim : o->info.dim + 1; | |||
217 | const int h = odim[0]; | |||
218 | const int w = odim[1]; | |||
219 | assert(gdim[2] == odim[2])((void) sizeof ((gdim[2] == odim[2]) ? 1 : 0), __extension__ ( { if (gdim[2] == odim[2]) ; else __assert_fail ("gdim[2] == odim[2]" , "roi/ccv_nnc_roi_align_cpu_ref.c", 219, __extension__ __PRETTY_FUNCTION__ ); })); | |||
220 | const int ch = gdim[2]; | |||
221 | float* gp = g->data.f32; | |||
222 | int gstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
223 | ccv_nnc_tensor_view_get_stride(g, gstride); | |||
224 | float* op = o->data.f32; | |||
225 | int ostride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
226 | ccv_nnc_tensor_view_get_stride(o, ostride); | |||
227 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2]; | |||
228 | const float* const bp = b->data.f32; | |||
229 | const int o_n = ccv_nnc_tensor_get_n(o->info); | |||
230 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); | |||
231 | assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__ ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 231, __extension__ __PRETTY_FUNCTION__ ); })); | |||
232 | const int b_n = b_nd == 1 ? 1 : b->info.dim[0]; | |||
233 | const int g_n = ccv_nnc_tensor_get_n(g->info); | |||
234 | assert(g_n == ccv_max(o_n, b_n))((void) sizeof ((g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n ) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n) _b = ( b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("g_n == ccv_max(o_n, b_n)" , "roi/ccv_nnc_roi_align_cpu_ref.c", 234, __extension__ __PRETTY_FUNCTION__ ); })); | |||
235 | const int oninc = o_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : ostride[0]; | |||
236 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
237 | ccv_nnc_tensor_view_get_stride(b, bstride); | |||
238 | const int bninc = b_nd == 1 ? 0 : bstride[CCV_NNC_MAX_DIM(2) + 2 - b_nd]; | |||
239 | const int gninc = g_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : gstride[0]; | |||
240 | int bin_h, bin_w; | |||
241 | roi_align_coeffs_t* y_coeffs; | |||
242 | roi_align_coeffs_t* x_coeffs; | |||
243 | int* bin_h_at_y; | |||
244 | int* bin_w_at_x; | |||
245 | int start_h, start_w, end_h, end_w; | |||
246 | int n; | |||
247 | ccv_nnc_tensor_zero(o); | |||
248 | for (n = 0; n < g_n; n++) | |||
249 | { | |||
250 | const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1. | |||
251 | const float roi_y = bp[(n % b_n) * bninc + 1] * h; | |||
252 | const float roi_w = bp[(n % b_n) * bninc + 2] * w; | |||
253 | const float roi_h = bp[(n % b_n) * bninc + 3] * h; | |||
254 | // Re-compute the offsets if b changes or it is the first time. | |||
255 | if ((b_n == 1 && n == 0) || b_n > 1) | |||
256 | _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w); | |||
257 | const float* gpn = gp + n * gninc; | |||
258 | float* const opn = op + (n % o_n) * oninc; | |||
259 | int x, y, i, j, k; | |||
260 | for (i = 0; i < pool_h; i++) | |||
261 | { | |||
262 | const int pi = i * bin_h; | |||
263 | const int bin_hz = bin_h_at_y[i]; | |||
264 | for (j = 0; j < pool_w; j++) | |||
265 | { | |||
266 | const int pj = j * bin_w; | |||
267 | const int bin_wz = bin_w_at_x[j]; | |||
268 | const float inv = 1.0 / (bin_hz * bin_wz); | |||
269 | const float* const gpz = gpn + j * gstride[CCV_NNC_MAX_DIM(2)]; | |||
270 | for (y = 0; y < bin_h; y++) | |||
271 | { | |||
272 | if (y_coeffs[pi + y].mute) | |||
273 | continue; | |||
274 | const float ry = y_coeffs[pi + y].r; | |||
275 | const int iy0 = y_coeffs[pi + y].i0; | |||
276 | const int iy1 = y_coeffs[pi + y].i1; | |||
277 | for (x = 0; x < bin_w; x++) | |||
278 | { | |||
279 | if (x_coeffs[pj + x].mute) | |||
280 | continue; | |||
281 | const float rx = x_coeffs[pj + x].r; | |||
282 | const int ix0 = x_coeffs[pj + x].i0; | |||
283 | const int ix1 = x_coeffs[pj + x].i1; | |||
284 | const float c00 = (1 - ry) * (1 - rx); | |||
285 | const float c01 = (1 - ry) * rx; | |||
286 | const float c10 = ry * (1 - rx); | |||
287 | const float c11 = ry * rx; | |||
288 | float* const op00 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)]; | |||
289 | float* const op01 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)]; | |||
290 | float* const op10 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)]; | |||
291 | float* const op11 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)]; | |||
292 | for (k = 0; k < ch; k++) | |||
293 | { | |||
294 | op00[k] += gpz[k] * c00 * inv; | |||
295 | op01[k] += gpz[k] * c01 * inv; | |||
296 | op10[k] += gpz[k] * c10 * inv; | |||
297 | op11[k] += gpz[k] * c11 * inv; | |||
298 | } | |||
299 | } | |||
300 | } | |||
301 | } | |||
302 | gpn += gstride[CCV_NNC_MAX_DIM(2) - 1]; | |||
303 | } | |||
304 | } | |||
305 | return CCV_NNC_EXEC_SUCCESS; | |||
306 | } | |||
307 | ||||
308 | REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | |||
309 | { | |||
310 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; | |||
311 | registry->tensor_datatypes = CCV_32F; | |||
312 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | |||
313 | registry->algorithms = 1; | |||
314 | registry->exec = _ccv_nnc_roi_align_forw; | |||
315 | } | |||
316 | ||||
317 | REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | |||
318 | { | |||
319 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; | |||
320 | registry->tensor_datatypes = CCV_32F; | |||
321 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | |||
322 | registry->algorithms = 1; | |||
323 | registry->exec = _ccv_nnc_roi_align_back; | |||
324 | } |