| File: | nnc/cmd/roi/ccv_nnc_roi_align_cpu_ref.c |
| Warning: | line 158, column 10 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv.h" | |||
| 2 | #include "ccv_internal.h" | |||
| 3 | #include "nnc/ccv_nnc.h" | |||
| 4 | #include "nnc/ccv_nnc_easy.h" | |||
| 5 | #include "nnc/ccv_nnc_internal.h" | |||
| 6 | #ifdef USE_OPENMP | |||
| 7 | #include <omp.h> | |||
| 8 | #endif | |||
| 9 | #ifdef USE_DISPATCH | |||
| 10 | #include <dispatch/dispatch.h> | |||
| 11 | #endif | |||
| 12 | ||||
| 13 | typedef struct { | |||
| 14 | int i0, i1, mute; | |||
| 15 | float r; | |||
| 16 | } roi_align_coeffs_t; | |||
| 17 | ||||
| 18 | static void _ccv_nnc_bilinear_coeffs(ccv_nnc_stream_context_t* const stream_context, const int h, const int w, const float roi_y, const float roi_x, const float roi_h, const float roi_w, const int pool_h, const int pool_w, int* const bin_h_ref, int* const bin_w_ref, roi_align_coeffs_t** const y_coeffs_ref, roi_align_coeffs_t** const x_coeffs_ref, int** const bin_h_at_y_ref, int** const bin_w_at_x_ref, int* const start_h_ref, int* const start_w_ref, int* const end_h_ref, int* const end_w_ref) | |||
| 19 | { | |||
| 20 | const int bin_h = (int)ceilf(roi_h / pool_h); // How many bins in each point of the pool. We slightly sampling at higher resolution (due to ceiling) with bilinear interpolation. | |||
| 21 | const int bin_w = (int)ceilf(roi_w / pool_w); | |||
| 22 | const int bin_pool_h = bin_h * pool_h; // Before averaging, what's the size of the region in integral term. | |||
| 23 | const int bin_pool_w = bin_w * pool_w; | |||
| 24 | const float scale_y = roi_h / bin_pool_h; // The scale to multiply back to get original coordinate. | |||
| 25 | const float scale_x = roi_w / bin_pool_w; | |||
| 26 | int x, y, i, j; | |||
| 27 | roi_align_coeffs_t* const y_coeffs = (roi_align_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(roi_align_coeffs_t) * (bin_pool_h + bin_pool_w) + sizeof(int) * (pool_h + pool_w), CCV_TENSOR_CPU_MEMORY); | |||
| 28 | roi_align_coeffs_t* const x_coeffs = y_coeffs + bin_pool_h; | |||
| 29 | int* const bin_h_at_y = (int*)(x_coeffs + bin_pool_w); | |||
| 30 | int* const bin_w_at_x = bin_h_at_y + pool_h; | |||
| 31 | for (i = 0; i < pool_h; i++) | |||
| 32 | { | |||
| 33 | const int pi = i * bin_h; | |||
| 34 | int count = 0; | |||
| 35 | for (y = 0; y < bin_h; y++) | |||
| 36 | { | |||
| 37 | const float ay = roi_y + (y + pi + 0.5) * scale_y - 0.5; | |||
| 38 | const int iy = (int)floorf(ay); | |||
| 39 | const float ry = ay - iy; | |||
| 40 | const int iy0 = ccv_clamp(iy, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof ( iy) _x = (iy); (_x < _a) ? _a : ((_x > _b) ? _b : _x); } ); | |||
| 41 | const int iy1 = ccv_clamp(iy + 1, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof ( iy + 1) _x = (iy + 1); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | |||
| 42 | y_coeffs[pi + y].i0 = iy0; | |||
| 43 | y_coeffs[pi + y].i1 = iy1; | |||
| 44 | y_coeffs[pi + y].r = ry; | |||
| 45 | const int mute = (iy + 1 < 0 || iy > h - 1); | |||
| 46 | y_coeffs[pi + y].mute = mute; | |||
| 47 | if (!mute) | |||
| 48 | ++count; | |||
| 49 | } | |||
| 50 | bin_h_at_y[i] = count; | |||
| 51 | } | |||
| 52 | int start_h = pool_h; | |||
| 53 | for (i = 0; start_h == pool_h && i < pool_h; i++) | |||
| 54 | if (bin_h_at_y[i] > 0) | |||
| 55 | start_h = i; | |||
| 56 | int end_h = 0; | |||
| 57 | for (i = pool_h - 1; end_h == 0 && i >= 0; i--) | |||
| 58 | if (bin_h_at_y[i] > 0) | |||
| 59 | end_h = i + 1; | |||
| 60 | for (j = 0; j < pool_w; j++) | |||
| 61 | { | |||
| 62 | const int pj = j * bin_w; | |||
| 63 | int count = 0; | |||
| 64 | for (x = 0; x < bin_w; x++) | |||
| 65 | { | |||
| 66 | const float ax = roi_x + (x + pj + 0.5) * scale_x - 0.5; | |||
| 67 | const int ix = (int)floorf(ax); | |||
| 68 | const float rx = ax - ix; | |||
| 69 | const int ix0 = ccv_clamp(ix, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof ( ix) _x = (ix); (_x < _a) ? _a : ((_x > _b) ? _b : _x); } ); | |||
| 70 | const int ix1 = ccv_clamp(ix + 1, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof ( ix + 1) _x = (ix + 1); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | |||
| 71 | x_coeffs[pj + x].i0 = ix0; | |||
| 72 | x_coeffs[pj + x].i1 = ix1; | |||
| 73 | x_coeffs[pj + x].r = rx; | |||
| 74 | const int mute = (ix + 1 < 0 || ix > w - 1); | |||
| 75 | x_coeffs[pj + x].mute = mute; | |||
| 76 | if (!mute) | |||
| 77 | ++count; | |||
| 78 | } | |||
| 79 | bin_w_at_x[j] = count; | |||
| 80 | } | |||
| 81 | int start_w = pool_w; | |||
| 82 | for (j = 0; start_w == pool_w && j < pool_w; j++) | |||
| 83 | if (bin_w_at_x[j] > 0) | |||
| 84 | start_w = j; | |||
| 85 | int end_w = 0; | |||
| 86 | for (j = pool_w - 1; end_w == 0 && j >= 0; j--) | |||
| 87 | if (bin_w_at_x[j] > 0) | |||
| 88 | end_w = j + 1; | |||
| 89 | *bin_h_ref = bin_h; | |||
| 90 | *bin_w_ref = bin_w; | |||
| 91 | *y_coeffs_ref = y_coeffs; | |||
| 92 | *x_coeffs_ref = x_coeffs; | |||
| 93 | *bin_h_at_y_ref = bin_h_at_y; | |||
| 94 | *bin_w_at_x_ref = bin_w_at_x; | |||
| 95 | *start_h_ref = start_h; | |||
| 96 | *start_w_ref = start_w; | |||
| 97 | *end_h_ref = end_h; | |||
| 98 | *end_w_ref = end_w; | |||
| 99 | } | |||
| 100 | ||||
| 101 | static int _ccv_nnc_roi_align_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
| 102 | { | |||
| 103 | assert(input_size == 2)((void) sizeof ((input_size == 2) ? 1 : 0), __extension__ ({ if (input_size == 2) ; else __assert_fail ("input_size == 2", "roi/ccv_nnc_roi_align_cpu_ref.c" , 103, __extension__ __PRETTY_FUNCTION__); })); | |||
| ||||
| 104 | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0]; | |||
| 105 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "roi/ccv_nnc_roi_align_cpu_ref.c", 105, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 106 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1]; | |||
| 107 | ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0]; | |||
| 108 | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); | |||
| 109 | assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((a_nd == (2) + 1 || a_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (a_nd == (2) + 1 || a_nd == (2) + 2) ; else __assert_fail ("a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 109, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 110 | const int* adim = (a_nd == CCV_NNC_MAX_DIM(2) + 1) ? a->info.dim : a->info.dim + 1; | |||
| 111 | const int h = adim[0]; | |||
| 112 | const int w = adim[1]; | |||
| 113 | const int c_nd = ccv_nnc_tensor_nd(c->info.dim); | |||
| 114 | assert(c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((c_nd == (2) + 1 || c_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (c_nd == (2) + 1 || c_nd == (2) + 2) ; else __assert_fail ("c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 114, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 115 | const int* cdim = (c_nd == CCV_NNC_MAX_DIM(2) + 1) ? c->info.dim : c->info.dim + 1; | |||
| 116 | const int pool_h = cdim[0]; | |||
| 117 | const int pool_w = cdim[1]; | |||
| 118 | assert(cdim[2] == adim[2])((void) sizeof ((cdim[2] == adim[2]) ? 1 : 0), __extension__ ( { if (cdim[2] == adim[2]) ; else __assert_fail ("cdim[2] == adim[2]" , "roi/ccv_nnc_roi_align_cpu_ref.c", 118, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 119 | const int ch = cdim[2]; | |||
| 120 | const float* const ap = a->data.f32; | |||
| 121 | int astride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 122 | ccv_nnc_tensor_view_get_stride(a, astride); | |||
| 123 | const float* const bp = b->data.f32; | |||
| 124 | float* cp = c->data.f32; | |||
| 125 | int cstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 126 | ccv_nnc_tensor_view_get_stride(c, cstride); | |||
| 127 | const int a_n = ccv_nnc_tensor_get_n(a->info); | |||
| 128 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); | |||
| 129 | assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__ ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 129, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 130 | const int b_n = b_nd
| |||
| 131 | const int c_n = ccv_nnc_tensor_get_n(c->info); | |||
| 132 | assert(c_n == ccv_max(a_n, b_n))((void) sizeof ((c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n ) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n) _b = ( b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("c_n == ccv_max(a_n, b_n)" , "roi/ccv_nnc_roi_align_cpu_ref.c", 132, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 133 | const int aninc = a_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : astride[0]; | |||
| 134 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 135 | ccv_nnc_tensor_view_get_stride(b, bstride); | |||
| 136 | const int bninc = b_nd
| |||
| 137 | const int cninc = c_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : cstride[0]; | |||
| 138 | ccv_nnc_tensor_zero(c); | |||
| 139 | int bin_h, bin_w; | |||
| 140 | roi_align_coeffs_t* y_coeffs; | |||
| 141 | roi_align_coeffs_t* x_coeffs; | |||
| 142 | int* bin_h_at_y; | |||
| 143 | int* bin_w_at_x; | |||
| 144 | int start_h, start_w, end_h, end_w; | |||
| 145 | int n; | |||
| 146 | for (n = 0; n < c_n; n++) | |||
| 147 | { | |||
| 148 | const float* const apn = ap + (n % a_n) * aninc; | |||
| 149 | float* cpn = cp + n * cninc; | |||
| 150 | const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1. | |||
| 151 | const float roi_y = bp[(n % b_n) * bninc + 1] * h; | |||
| 152 | const float roi_w = bp[(n % b_n) * bninc + 2] * w; | |||
| 153 | const float roi_h = bp[(n % b_n) * bninc + 3] * h; | |||
| 154 | // Re-compute the offsets if b changes or it is the first time. | |||
| 155 | if ((b_n == 1 && n == 0) || b_n > 1) | |||
| 156 | _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w); | |||
| 157 | int i, j, x, y, k; | |||
| 158 | for (i = start_h; i < end_h; i++) | |||
| ||||
| 159 | { | |||
| 160 | const int pi = i * bin_h; | |||
| 161 | const int bin_hz = bin_h_at_y[i]; | |||
| 162 | for (j = start_w; j < end_w; j++) | |||
| 163 | { | |||
| 164 | const int pj = j * bin_w; | |||
| 165 | const int bin_wz = bin_w_at_x[j]; | |||
| 166 | const float inv = 1.0 / (bin_hz * bin_wz); | |||
| 167 | float* const cpz = cpn + j * cstride[CCV_NNC_MAX_DIM(2)]; | |||
| 168 | for (y = 0; y < bin_h; y++) | |||
| 169 | { | |||
| 170 | if (y_coeffs[pi + y].mute) | |||
| 171 | continue; | |||
| 172 | const float ry = y_coeffs[pi + y].r; | |||
| 173 | const int iy0 = y_coeffs[pi + y].i0; | |||
| 174 | const int iy1 = y_coeffs[pi + y].i1; | |||
| 175 | for (x = 0; x < bin_w; x++) | |||
| 176 | { | |||
| 177 | if (x_coeffs[pj + x].mute) | |||
| 178 | continue; | |||
| 179 | const float rx = x_coeffs[pj + x].r; | |||
| 180 | const int ix0 = x_coeffs[pj + x].i0; | |||
| 181 | const int ix1 = x_coeffs[pj + x].i1; | |||
| 182 | const float c00 = (1 - ry) * (1 - rx); | |||
| 183 | const float c01 = (1 - ry) * rx; | |||
| 184 | const float c10 = ry * (1 - rx); | |||
| 185 | const float c11 = ry * rx; | |||
| 186 | const float* const ap00 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)]; | |||
| 187 | const float* const ap01 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)]; | |||
| 188 | const float* const ap10 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)]; | |||
| 189 | const float* const ap11 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)]; | |||
| 190 | for (k = 0; k < ch; k++) | |||
| 191 | cpz[k] += ap00[k] * c00 + ap01[k] * c01 + ap10[k] * c10 + ap11[k] * c11; | |||
| 192 | } | |||
| 193 | } | |||
| 194 | for (k = 0; k < ch; k++) | |||
| 195 | cpz[k] *= inv; | |||
| 196 | } | |||
| 197 | cpn += cstride[CCV_NNC_MAX_DIM(2) - 1]; | |||
| 198 | } | |||
| 199 | } | |||
| 200 | return CCV_NNC_EXEC_SUCCESS; | |||
| 201 | } | |||
| 202 | ||||
| 203 | static int _ccv_nnc_roi_align_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
| 204 | { | |||
| 205 | assert(input_size >= 3)((void) sizeof ((input_size >= 3) ? 1 : 0), __extension__ ( { if (input_size >= 3) ; else __assert_fail ("input_size >= 3" , "roi/ccv_nnc_roi_align_cpu_ref.c", 205, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 206 | const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0]; | |||
| 207 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "roi/ccv_nnc_roi_align_cpu_ref.c", 207, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 208 | ccv_nnc_tensor_view_t* o = (ccv_nnc_tensor_view_t*)outputs[0]; | |||
| 209 | const int g_nd = ccv_nnc_tensor_nd(g->info.dim); | |||
| 210 | assert(g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((g_nd == (2) + 1 || g_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (g_nd == (2) + 1 || g_nd == (2) + 2) ; else __assert_fail ("g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 210, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 211 | const int* gdim = (g_nd == CCV_NNC_MAX_DIM(2) + 1) ? g->info.dim : g->info.dim + 1; | |||
| 212 | const int pool_h = gdim[0]; | |||
| 213 | const int pool_w = gdim[1]; | |||
| 214 | const int o_nd = ccv_nnc_tensor_nd(o->info.dim); | |||
| 215 | assert(o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((o_nd == (2) + 1 || o_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (o_nd == (2) + 1 || o_nd == (2) + 2) ; else __assert_fail ("o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 215, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 216 | const int* odim = (o_nd == CCV_NNC_MAX_DIM(2) + 1) ? o->info.dim : o->info.dim + 1; | |||
| 217 | const int h = odim[0]; | |||
| 218 | const int w = odim[1]; | |||
| 219 | assert(gdim[2] == odim[2])((void) sizeof ((gdim[2] == odim[2]) ? 1 : 0), __extension__ ( { if (gdim[2] == odim[2]) ; else __assert_fail ("gdim[2] == odim[2]" , "roi/ccv_nnc_roi_align_cpu_ref.c", 219, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 220 | const int ch = gdim[2]; | |||
| 221 | float* gp = g->data.f32; | |||
| 222 | int gstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 223 | ccv_nnc_tensor_view_get_stride(g, gstride); | |||
| 224 | float* op = o->data.f32; | |||
| 225 | int ostride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 226 | ccv_nnc_tensor_view_get_stride(o, ostride); | |||
| 227 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2]; | |||
| 228 | const float* const bp = b->data.f32; | |||
| 229 | const int o_n = ccv_nnc_tensor_get_n(o->info); | |||
| 230 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); | |||
| 231 | assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__ ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 231, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 232 | const int b_n = b_nd == 1 ? 1 : b->info.dim[0]; | |||
| 233 | const int g_n = ccv_nnc_tensor_get_n(g->info); | |||
| 234 | assert(g_n == ccv_max(o_n, b_n))((void) sizeof ((g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n ) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n) _b = ( b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("g_n == ccv_max(o_n, b_n)" , "roi/ccv_nnc_roi_align_cpu_ref.c", 234, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 235 | const int oninc = o_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : ostride[0]; | |||
| 236 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 237 | ccv_nnc_tensor_view_get_stride(b, bstride); | |||
| 238 | const int bninc = b_nd == 1 ? 0 : bstride[CCV_NNC_MAX_DIM(2) + 2 - b_nd]; | |||
| 239 | const int gninc = g_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : gstride[0]; | |||
| 240 | int bin_h, bin_w; | |||
| 241 | roi_align_coeffs_t* y_coeffs; | |||
| 242 | roi_align_coeffs_t* x_coeffs; | |||
| 243 | int* bin_h_at_y; | |||
| 244 | int* bin_w_at_x; | |||
| 245 | int start_h, start_w, end_h, end_w; | |||
| 246 | int n; | |||
| 247 | ccv_nnc_tensor_zero(o); | |||
| 248 | for (n = 0; n < g_n; n++) | |||
| 249 | { | |||
| 250 | const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1. | |||
| 251 | const float roi_y = bp[(n % b_n) * bninc + 1] * h; | |||
| 252 | const float roi_w = bp[(n % b_n) * bninc + 2] * w; | |||
| 253 | const float roi_h = bp[(n % b_n) * bninc + 3] * h; | |||
| 254 | // Re-compute the offsets if b changes or it is the first time. | |||
| 255 | if ((b_n == 1 && n == 0) || b_n > 1) | |||
| 256 | _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w); | |||
| 257 | const float* gpn = gp + n * gninc; | |||
| 258 | float* const opn = op + (n % o_n) * oninc; | |||
| 259 | int x, y, i, j, k; | |||
| 260 | for (i = 0; i < pool_h; i++) | |||
| 261 | { | |||
| 262 | const int pi = i * bin_h; | |||
| 263 | const int bin_hz = bin_h_at_y[i]; | |||
| 264 | for (j = 0; j < pool_w; j++) | |||
| 265 | { | |||
| 266 | const int pj = j * bin_w; | |||
| 267 | const int bin_wz = bin_w_at_x[j]; | |||
| 268 | const float inv = 1.0 / (bin_hz * bin_wz); | |||
| 269 | const float* const gpz = gpn + j * gstride[CCV_NNC_MAX_DIM(2)]; | |||
| 270 | for (y = 0; y < bin_h; y++) | |||
| 271 | { | |||
| 272 | if (y_coeffs[pi + y].mute) | |||
| 273 | continue; | |||
| 274 | const float ry = y_coeffs[pi + y].r; | |||
| 275 | const int iy0 = y_coeffs[pi + y].i0; | |||
| 276 | const int iy1 = y_coeffs[pi + y].i1; | |||
| 277 | for (x = 0; x < bin_w; x++) | |||
| 278 | { | |||
| 279 | if (x_coeffs[pj + x].mute) | |||
| 280 | continue; | |||
| 281 | const float rx = x_coeffs[pj + x].r; | |||
| 282 | const int ix0 = x_coeffs[pj + x].i0; | |||
| 283 | const int ix1 = x_coeffs[pj + x].i1; | |||
| 284 | const float c00 = (1 - ry) * (1 - rx); | |||
| 285 | const float c01 = (1 - ry) * rx; | |||
| 286 | const float c10 = ry * (1 - rx); | |||
| 287 | const float c11 = ry * rx; | |||
| 288 | float* const op00 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)]; | |||
| 289 | float* const op01 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)]; | |||
| 290 | float* const op10 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)]; | |||
| 291 | float* const op11 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)]; | |||
| 292 | for (k = 0; k < ch; k++) | |||
| 293 | { | |||
| 294 | op00[k] += gpz[k] * c00 * inv; | |||
| 295 | op01[k] += gpz[k] * c01 * inv; | |||
| 296 | op10[k] += gpz[k] * c10 * inv; | |||
| 297 | op11[k] += gpz[k] * c11 * inv; | |||
| 298 | } | |||
| 299 | } | |||
| 300 | } | |||
| 301 | } | |||
| 302 | gpn += gstride[CCV_NNC_MAX_DIM(2) - 1]; | |||
| 303 | } | |||
| 304 | } | |||
| 305 | return CCV_NNC_EXEC_SUCCESS; | |||
| 306 | } | |||
| 307 | ||||
| 308 | REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | |||
| 309 | { | |||
| 310 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; | |||
| 311 | registry->tensor_datatypes = CCV_32F; | |||
| 312 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | |||
| 313 | registry->algorithms = 1; | |||
| 314 | registry->exec = _ccv_nnc_roi_align_forw; | |||
| 315 | } | |||
| 316 | ||||
| 317 | REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | |||
| 318 | { | |||
| 319 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; | |||
| 320 | registry->tensor_datatypes = CCV_32F; | |||
| 321 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | |||
| 322 | registry->algorithms = 1; | |||
| 323 | registry->exec = _ccv_nnc_roi_align_back; | |||
| 324 | } |