| File: | nnc/cmd/roi/ccv_nnc_roi_align_cpu_ref.c |
| Warning: | line 258, column 30 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv.h" | ||||||||
| 2 | #include "ccv_internal.h" | ||||||||
| 3 | #include "nnc/ccv_nnc.h" | ||||||||
| 4 | #include "nnc/ccv_nnc_easy.h" | ||||||||
| 5 | #include "nnc/ccv_nnc_internal.h" | ||||||||
| 6 | #ifdef USE_OPENMP | ||||||||
| 7 | #include <omp.h> | ||||||||
| 8 | #endif | ||||||||
| 9 | #ifdef USE_DISPATCH | ||||||||
| 10 | #include <dispatch/dispatch.h> | ||||||||
| 11 | #endif | ||||||||
| 12 | |||||||||
| 13 | typedef struct { | ||||||||
| 14 | int i0, i1, mute; | ||||||||
| 15 | float r; | ||||||||
| 16 | } roi_align_coeffs_t; | ||||||||
| 17 | |||||||||
| 18 | static void _ccv_nnc_bilinear_coeffs(ccv_nnc_stream_context_t* const stream_context, const int h, const int w, const float roi_y, const float roi_x, const float roi_h, const float roi_w, const int pool_h, const int pool_w, int* const bin_h_ref, int* const bin_w_ref, roi_align_coeffs_t** const y_coeffs_ref, roi_align_coeffs_t** const x_coeffs_ref, int** const bin_h_at_y_ref, int** const bin_w_at_x_ref, int* const start_h_ref, int* const start_w_ref, int* const end_h_ref, int* const end_w_ref) | ||||||||
| 19 | { | ||||||||
| 20 | const int bin_h = (int)ceilf(roi_h / pool_h); // How many bins in each point of the pool. We slightly sampling at higher resolution (due to ceiling) with bilinear interpolation. | ||||||||
| 21 | const int bin_w = (int)ceilf(roi_w / pool_w); | ||||||||
| 22 | const int bin_pool_h = bin_h * pool_h; // Before averaging, what's the size of the region in integral term. | ||||||||
| 23 | const int bin_pool_w = bin_w * pool_w; | ||||||||
| 24 | const float scale_y = roi_h / bin_pool_h; // The scale to multiply back to get original coordinate. | ||||||||
| 25 | const float scale_x = roi_w / bin_pool_w; | ||||||||
| 26 | int x, y, i, j; | ||||||||
| 27 | roi_align_coeffs_t* const y_coeffs = (roi_align_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(roi_align_coeffs_t) * (bin_pool_h + bin_pool_w) + sizeof(int) * (pool_h + pool_w), CCV_TENSOR_CPU_MEMORY); | ||||||||
| 28 | roi_align_coeffs_t* const x_coeffs = y_coeffs + bin_pool_h; | ||||||||
| 29 | int* const bin_h_at_y = (int*)(x_coeffs + bin_pool_w); | ||||||||
| 30 | int* const bin_w_at_x = bin_h_at_y + pool_h; | ||||||||
| 31 | for (i = 0; i < pool_h; i++) | ||||||||
| 32 | { | ||||||||
| 33 | const int pi = i * bin_h; | ||||||||
| 34 | int count = 0; | ||||||||
| 35 | for (y = 0; y < bin_h; y++) | ||||||||
| 36 | { | ||||||||
| 37 | const float ay = roi_y + (y + pi + 0.5) * scale_y - 0.5; | ||||||||
| 38 | const int iy = (int)floorf(ay); | ||||||||
| 39 | const float ry = ay - iy; | ||||||||
| 40 | const int iy0 = ccv_clamp(iy, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof ( iy) _x = (iy); (_x < _a) ? _a : ((_x > _b) ? _b : _x); } ); | ||||||||
| 41 | const int iy1 = ccv_clamp(iy + 1, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof ( iy + 1) _x = (iy + 1); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | ||||||||
| 42 | y_coeffs[pi + y].i0 = iy0; | ||||||||
| 43 | y_coeffs[pi + y].i1 = iy1; | ||||||||
| 44 | y_coeffs[pi + y].r = ry; | ||||||||
| 45 | const int mute = (iy + 1 < 0 || iy > h - 1); | ||||||||
| 46 | y_coeffs[pi + y].mute = mute; | ||||||||
| 47 | if (!mute) | ||||||||
| 48 | ++count; | ||||||||
| 49 | } | ||||||||
| 50 | bin_h_at_y[i] = count; | ||||||||
| 51 | } | ||||||||
| 52 | int start_h = pool_h; | ||||||||
| 53 | for (i = 0; start_h == pool_h && i < pool_h; i++) | ||||||||
| 54 | if (bin_h_at_y[i] > 0) | ||||||||
| 55 | start_h = i; | ||||||||
| 56 | int end_h = 0; | ||||||||
| 57 | for (i = pool_h - 1; end_h == 0 && i >= 0; i--) | ||||||||
| 58 | if (bin_h_at_y[i] > 0) | ||||||||
| 59 | end_h = i + 1; | ||||||||
| 60 | for (j = 0; j < pool_w; j++) | ||||||||
| 61 | { | ||||||||
| 62 | const int pj = j * bin_w; | ||||||||
| 63 | int count = 0; | ||||||||
| 64 | for (x = 0; x < bin_w; x++) | ||||||||
| 65 | { | ||||||||
| 66 | const float ax = roi_x + (x + pj + 0.5) * scale_x - 0.5; | ||||||||
| 67 | const int ix = (int)floorf(ax); | ||||||||
| 68 | const float rx = ax - ix; | ||||||||
| 69 | const int ix0 = ccv_clamp(ix, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof ( ix) _x = (ix); (_x < _a) ? _a : ((_x > _b) ? _b : _x); } ); | ||||||||
| 70 | const int ix1 = ccv_clamp(ix + 1, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof ( ix + 1) _x = (ix + 1); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | ||||||||
| 71 | x_coeffs[pj + x].i0 = ix0; | ||||||||
| 72 | x_coeffs[pj + x].i1 = ix1; | ||||||||
| 73 | x_coeffs[pj + x].r = rx; | ||||||||
| 74 | const int mute = (ix + 1 < 0 || ix > w - 1); | ||||||||
| 75 | x_coeffs[pj + x].mute = mute; | ||||||||
| 76 | if (!mute) | ||||||||
| 77 | ++count; | ||||||||
| 78 | } | ||||||||
| 79 | bin_w_at_x[j] = count; | ||||||||
| 80 | } | ||||||||
| 81 | int start_w = pool_w; | ||||||||
| 82 | for (j = 0; start_w == pool_w && j < pool_w; j++) | ||||||||
| 83 | if (bin_w_at_x[j] > 0) | ||||||||
| 84 | start_w = j; | ||||||||
| 85 | int end_w = 0; | ||||||||
| 86 | for (j = pool_w - 1; end_w == 0 && j >= 0; j--) | ||||||||
| 87 | if (bin_w_at_x[j] > 0) | ||||||||
| 88 | end_w = j + 1; | ||||||||
| 89 | *bin_h_ref = bin_h; | ||||||||
| 90 | *bin_w_ref = bin_w; | ||||||||
| 91 | *y_coeffs_ref = y_coeffs; | ||||||||
| 92 | *x_coeffs_ref = x_coeffs; | ||||||||
| 93 | *bin_h_at_y_ref = bin_h_at_y; | ||||||||
| 94 | *bin_w_at_x_ref = bin_w_at_x; | ||||||||
| 95 | *start_h_ref = start_h; | ||||||||
| 96 | *start_w_ref = start_w; | ||||||||
| 97 | *end_h_ref = end_h; | ||||||||
| 98 | *end_w_ref = end_w; | ||||||||
| 99 | } | ||||||||
| 100 | |||||||||
| 101 | static int _ccv_nnc_roi_align_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | ||||||||
| 102 | { | ||||||||
| 103 | assert(input_size == 2)((void) sizeof ((input_size == 2) ? 1 : 0), __extension__ ({ if (input_size == 2) ; else __assert_fail ("input_size == 2", "roi/ccv_nnc_roi_align_cpu_ref.c" , 103, __extension__ __PRETTY_FUNCTION__); })); | ||||||||
| 104 | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0]; | ||||||||
| 105 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "roi/ccv_nnc_roi_align_cpu_ref.c", 105, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 106 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1]; | ||||||||
| 107 | ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0]; | ||||||||
| 108 | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); | ||||||||
| 109 | assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((a_nd == (2) + 1 || a_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (a_nd == (2) + 1 || a_nd == (2) + 2) ; else __assert_fail ("a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 109, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 110 | const int* adim = (a_nd == CCV_NNC_MAX_DIM(2) + 1) ? a->info.dim : a->info.dim + 1; | ||||||||
| 111 | const int h = adim[0]; | ||||||||
| 112 | const int w = adim[1]; | ||||||||
| 113 | const int c_nd = ccv_nnc_tensor_nd(c->info.dim); | ||||||||
| 114 | assert(c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((c_nd == (2) + 1 || c_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (c_nd == (2) + 1 || c_nd == (2) + 2) ; else __assert_fail ("c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 114, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 115 | const int* cdim = (c_nd == CCV_NNC_MAX_DIM(2) + 1) ? c->info.dim : c->info.dim + 1; | ||||||||
| 116 | const int pool_h = cdim[0]; | ||||||||
| 117 | const int pool_w = cdim[1]; | ||||||||
| 118 | assert(cdim[2] == adim[2])((void) sizeof ((cdim[2] == adim[2]) ? 1 : 0), __extension__ ( { if (cdim[2] == adim[2]) ; else __assert_fail ("cdim[2] == adim[2]" , "roi/ccv_nnc_roi_align_cpu_ref.c", 118, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 119 | const int ch = cdim[2]; | ||||||||
| 120 | const float* const ap = a->data.f32; | ||||||||
| 121 | int astride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||||||
| 122 | ccv_nnc_tensor_view_get_stride(a, astride); | ||||||||
| 123 | const float* const bp = b->data.f32; | ||||||||
| 124 | float* cp = c->data.f32; | ||||||||
| 125 | int cstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||||||
| 126 | ccv_nnc_tensor_view_get_stride(c, cstride); | ||||||||
| 127 | const int a_n = ccv_nnc_tensor_get_n(a->info); | ||||||||
| 128 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); | ||||||||
| 129 | assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__ ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 129, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 130 | const int b_n = b_nd == 1 ? 1 : b->info.dim[0]; | ||||||||
| 131 | const int c_n = ccv_nnc_tensor_get_n(c->info); | ||||||||
| 132 | assert(c_n == ccv_max(a_n, b_n))((void) sizeof ((c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n ) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n) _b = ( b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("c_n == ccv_max(a_n, b_n)" , "roi/ccv_nnc_roi_align_cpu_ref.c", 132, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 133 | const int aninc = a_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : astride[0]; | ||||||||
| 134 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||||||
| 135 | ccv_nnc_tensor_view_get_stride(b, bstride); | ||||||||
| 136 | const int bninc = b_nd == 1 ? 0 : bstride[CCV_NNC_MAX_DIM(2) + 2 - b_nd]; | ||||||||
| 137 | const int cninc = c_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : cstride[0]; | ||||||||
| 138 | ccv_nnc_tensor_zero(c); | ||||||||
| 139 | int bin_h, bin_w; | ||||||||
| 140 | roi_align_coeffs_t* y_coeffs; | ||||||||
| 141 | roi_align_coeffs_t* x_coeffs; | ||||||||
| 142 | int* bin_h_at_y; | ||||||||
| 143 | int* bin_w_at_x; | ||||||||
| 144 | int start_h, start_w, end_h, end_w; | ||||||||
| 145 | int n; | ||||||||
| 146 | for (n = 0; n < c_n; n++) | ||||||||
| 147 | { | ||||||||
| 148 | const float* const apn = ap + (n % a_n) * aninc; | ||||||||
| 149 | float* cpn = cp + n * cninc; | ||||||||
| 150 | const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1. | ||||||||
| 151 | const float roi_y = bp[(n % b_n) * bninc + 1] * h; | ||||||||
| 152 | const float roi_w = bp[(n % b_n) * bninc + 2] * w; | ||||||||
| 153 | const float roi_h = bp[(n % b_n) * bninc + 3] * h; | ||||||||
| 154 | // Re-compute the offsets if b changes or it is the first time. | ||||||||
| 155 | if ((b_n == 1 && n == 0) || b_n > 1) | ||||||||
| 156 | _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w); | ||||||||
| 157 | int i, j, x, y, k; | ||||||||
| 158 | for (i = start_h; i < end_h; i++) | ||||||||
| 159 | { | ||||||||
| 160 | const int pi = i * bin_h; | ||||||||
| 161 | const int bin_hz = bin_h_at_y[i]; | ||||||||
| 162 | for (j = start_w; j < end_w; j++) | ||||||||
| 163 | { | ||||||||
| 164 | const int pj = j * bin_w; | ||||||||
| 165 | const int bin_wz = bin_w_at_x[j]; | ||||||||
| 166 | const float inv = 1.0 / (bin_hz * bin_wz); | ||||||||
| 167 | float* const cpz = cpn + j * cstride[CCV_NNC_MAX_DIM(2)]; | ||||||||
| 168 | for (y = 0; y < bin_h; y++) | ||||||||
| 169 | { | ||||||||
| 170 | if (y_coeffs[pi + y].mute) | ||||||||
| 171 | continue; | ||||||||
| 172 | const float ry = y_coeffs[pi + y].r; | ||||||||
| 173 | const int iy0 = y_coeffs[pi + y].i0; | ||||||||
| 174 | const int iy1 = y_coeffs[pi + y].i1; | ||||||||
| 175 | for (x = 0; x < bin_w; x++) | ||||||||
| 176 | { | ||||||||
| 177 | if (x_coeffs[pj + x].mute) | ||||||||
| 178 | continue; | ||||||||
| 179 | const float rx = x_coeffs[pj + x].r; | ||||||||
| 180 | const int ix0 = x_coeffs[pj + x].i0; | ||||||||
| 181 | const int ix1 = x_coeffs[pj + x].i1; | ||||||||
| 182 | const float c00 = (1 - ry) * (1 - rx); | ||||||||
| 183 | const float c01 = (1 - ry) * rx; | ||||||||
| 184 | const float c10 = ry * (1 - rx); | ||||||||
| 185 | const float c11 = ry * rx; | ||||||||
| 186 | const float* const ap00 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)]; | ||||||||
| 187 | const float* const ap01 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)]; | ||||||||
| 188 | const float* const ap10 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)]; | ||||||||
| 189 | const float* const ap11 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)]; | ||||||||
| 190 | for (k = 0; k < ch; k++) | ||||||||
| 191 | cpz[k] += ap00[k] * c00 + ap01[k] * c01 + ap10[k] * c10 + ap11[k] * c11; | ||||||||
| 192 | } | ||||||||
| 193 | } | ||||||||
| 194 | for (k = 0; k < ch; k++) | ||||||||
| 195 | cpz[k] *= inv; | ||||||||
| 196 | } | ||||||||
| 197 | cpn += cstride[CCV_NNC_MAX_DIM(2) - 1]; | ||||||||
| 198 | } | ||||||||
| 199 | } | ||||||||
| 200 | return CCV_NNC_EXEC_SUCCESS; | ||||||||
| 201 | } | ||||||||
| 202 | |||||||||
| 203 | static int _ccv_nnc_roi_align_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | ||||||||
| 204 | { | ||||||||
| 205 | assert(input_size >= 3)((void) sizeof ((input_size >= 3) ? 1 : 0), __extension__ ( { if (input_size >= 3) ; else __assert_fail ("input_size >= 3" , "roi/ccv_nnc_roi_align_cpu_ref.c", 205, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| |||||||||
| 206 | const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0]; | ||||||||
| 207 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "roi/ccv_nnc_roi_align_cpu_ref.c", 207, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 208 | ccv_nnc_tensor_view_t* o = (ccv_nnc_tensor_view_t*)outputs[0]; | ||||||||
| 209 | const int g_nd = ccv_nnc_tensor_nd(g->info.dim); | ||||||||
| 210 | assert(g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((g_nd == (2) + 1 || g_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (g_nd == (2) + 1 || g_nd == (2) + 2) ; else __assert_fail ("g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 210, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 211 | const int* gdim = (g_nd == CCV_NNC_MAX_DIM(2) + 1) ? g->info.dim : g->info.dim + 1; | ||||||||
| 212 | const int pool_h = gdim[0]; | ||||||||
| 213 | const int pool_w = gdim[1]; | ||||||||
| 214 | const int o_nd = ccv_nnc_tensor_nd(o->info.dim); | ||||||||
| 215 | assert(o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((o_nd == (2) + 1 || o_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (o_nd == (2) + 1 || o_nd == (2) + 2) ; else __assert_fail ("o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 215, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 216 | const int* odim = (o_nd == CCV_NNC_MAX_DIM(2) + 1) ? o->info.dim : o->info.dim + 1; | ||||||||
| 217 | const int h = odim[0]; | ||||||||
| 218 | const int w = odim[1]; | ||||||||
| 219 | assert(gdim[2] == odim[2])((void) sizeof ((gdim[2] == odim[2]) ? 1 : 0), __extension__ ( { if (gdim[2] == odim[2]) ; else __assert_fail ("gdim[2] == odim[2]" , "roi/ccv_nnc_roi_align_cpu_ref.c", 219, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 220 | const int ch = gdim[2]; | ||||||||
| 221 | float* gp = g->data.f32; | ||||||||
| 222 | int gstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||||||
| 223 | ccv_nnc_tensor_view_get_stride(g, gstride); | ||||||||
| 224 | float* op = o->data.f32; | ||||||||
| 225 | int ostride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||||||
| 226 | ccv_nnc_tensor_view_get_stride(o, ostride); | ||||||||
| 227 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2]; | ||||||||
| 228 | const float* const bp = b->data.f32; | ||||||||
| 229 | const int o_n = ccv_nnc_tensor_get_n(o->info); | ||||||||
| 230 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); | ||||||||
| 231 | assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__ ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 231, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 232 | const int b_n = b_nd
| ||||||||
| 233 | const int g_n = ccv_nnc_tensor_get_n(g->info); | ||||||||
| 234 | assert(g_n == ccv_max(o_n, b_n))((void) sizeof ((g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n ) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n) _b = ( b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("g_n == ccv_max(o_n, b_n)" , "roi/ccv_nnc_roi_align_cpu_ref.c", 234, __extension__ __PRETTY_FUNCTION__ ); })); | ||||||||
| 235 | const int oninc = o_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : ostride[0]; | ||||||||
| 236 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||||||
| 237 | ccv_nnc_tensor_view_get_stride(b, bstride); | ||||||||
| 238 | const int bninc = b_nd
| ||||||||
| 239 | const int gninc = g_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : gstride[0]; | ||||||||
| 240 | int bin_h, bin_w; | ||||||||
| 241 | roi_align_coeffs_t* y_coeffs; | ||||||||
| 242 | roi_align_coeffs_t* x_coeffs; | ||||||||
| 243 | int* bin_h_at_y; | ||||||||
| 244 | int* bin_w_at_x; | ||||||||
| 245 | int start_h, start_w, end_h, end_w; | ||||||||
| 246 | int n; | ||||||||
| 247 | ccv_nnc_tensor_zero(o); | ||||||||
| 248 | for (n = 0; n < g_n; n++) | ||||||||
| 249 | { | ||||||||
| 250 | const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1. | ||||||||
| 251 | const float roi_y = bp[(n % b_n) * bninc + 1] * h; | ||||||||
| 252 | const float roi_w = bp[(n % b_n) * bninc + 2] * w; | ||||||||
| 253 | const float roi_h = bp[(n % b_n) * bninc + 3] * h; | ||||||||
| 254 | // Re-compute the offsets if b changes or it is the first time. | ||||||||
| 255 | if ((b_n
| ||||||||
| 256 | _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w); | ||||||||
| 257 | const float* gpn = gp + n * gninc; | ||||||||
| 258 | float* const opn = op + (n % o_n) * oninc; | ||||||||
| |||||||||
| 259 | int x, y, i, j, k; | ||||||||
| 260 | for (i = 0; i < pool_h; i++) | ||||||||
| 261 | { | ||||||||
| 262 | const int pi = i * bin_h; | ||||||||
| 263 | const int bin_hz = bin_h_at_y[i]; | ||||||||
| 264 | for (j = 0; j < pool_w; j++) | ||||||||
| 265 | { | ||||||||
| 266 | const int pj = j * bin_w; | ||||||||
| 267 | const int bin_wz = bin_w_at_x[j]; | ||||||||
| 268 | const float inv = 1.0 / (bin_hz * bin_wz); | ||||||||
| 269 | const float* const gpz = gpn + j * gstride[CCV_NNC_MAX_DIM(2)]; | ||||||||
| 270 | for (y = 0; y < bin_h; y++) | ||||||||
| 271 | { | ||||||||
| 272 | if (y_coeffs[pi + y].mute) | ||||||||
| 273 | continue; | ||||||||
| 274 | const float ry = y_coeffs[pi + y].r; | ||||||||
| 275 | const int iy0 = y_coeffs[pi + y].i0; | ||||||||
| 276 | const int iy1 = y_coeffs[pi + y].i1; | ||||||||
| 277 | for (x = 0; x < bin_w; x++) | ||||||||
| 278 | { | ||||||||
| 279 | if (x_coeffs[pj + x].mute) | ||||||||
| 280 | continue; | ||||||||
| 281 | const float rx = x_coeffs[pj + x].r; | ||||||||
| 282 | const int ix0 = x_coeffs[pj + x].i0; | ||||||||
| 283 | const int ix1 = x_coeffs[pj + x].i1; | ||||||||
| 284 | const float c00 = (1 - ry) * (1 - rx); | ||||||||
| 285 | const float c01 = (1 - ry) * rx; | ||||||||
| 286 | const float c10 = ry * (1 - rx); | ||||||||
| 287 | const float c11 = ry * rx; | ||||||||
| 288 | float* const op00 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)]; | ||||||||
| 289 | float* const op01 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)]; | ||||||||
| 290 | float* const op10 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)]; | ||||||||
| 291 | float* const op11 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)]; | ||||||||
| 292 | for (k = 0; k < ch; k++) | ||||||||
| 293 | { | ||||||||
| 294 | op00[k] += gpz[k] * c00 * inv; | ||||||||
| 295 | op01[k] += gpz[k] * c01 * inv; | ||||||||
| 296 | op10[k] += gpz[k] * c10 * inv; | ||||||||
| 297 | op11[k] += gpz[k] * c11 * inv; | ||||||||
| 298 | } | ||||||||
| 299 | } | ||||||||
| 300 | } | ||||||||
| 301 | } | ||||||||
| 302 | gpn += gstride[CCV_NNC_MAX_DIM(2) - 1]; | ||||||||
| 303 | } | ||||||||
| 304 | } | ||||||||
| 305 | return CCV_NNC_EXEC_SUCCESS; | ||||||||
| 306 | } | ||||||||
| 307 | |||||||||
| 308 | REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | ||||||||
| 309 | { | ||||||||
| 310 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; | ||||||||
| 311 | registry->tensor_datatypes = CCV_32F; | ||||||||
| 312 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | ||||||||
| 313 | registry->algorithms = 1; | ||||||||
| 314 | registry->exec = _ccv_nnc_roi_align_forw; | ||||||||
| 315 | } | ||||||||
| 316 | |||||||||
| 317 | REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | ||||||||
| 318 | { | ||||||||
| 319 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; | ||||||||
| 320 | registry->tensor_datatypes = CCV_32F; | ||||||||
| 321 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | ||||||||
| 322 | registry->algorithms = 1; | ||||||||
| 323 | registry->exec = _ccv_nnc_roi_align_back; | ||||||||
| 324 | } |
| 1 | /********************************************************** |
| 2 | * C-based/Cached/Core Computer Vision Library |
| 3 | * Liu Liu, 2010-02-01 |
| 4 | **********************************************************/ |
| 5 | |
| 6 | /********************************************************** |
| 7 | * CCV - Neural Network Collection |
| 8 | **********************************************************/ |
| 9 | |
| 10 | #ifndef GUARD_ccv_nnc_easy_h |
| 11 | #define GUARD_ccv_nnc_easy_h |
| 12 | |
| 13 | #include "ccv.h" |
| 14 | #include "ccv_internal.h" |
| 15 | #include "nnc/ccv_nnc.h" |
| 16 | #ifdef HAVE_MPS |
| 17 | #ifdef __APPLE__ |
| 18 | #include "TargetConditionals.h" |
| 19 | #if !TARGET_OS_IPHONE && !TARGET_IPHONE_SIMULATOR |
| 20 | #include <mach/mach_vm.h> |
| 21 | #else |
| 22 | #define PAGE_SIZE (16384) |
| 23 | #endif |
| 24 | #endif |
| 25 | #endif |
| 26 | |
| 27 | /** |
| 28 | * Convenience API |
| 29 | * |
| 30 | * This header provides convenience APIs for nnc usage. Being convenience API, |
| 31 | * it is optimized for shorthand coding, and may collide the naming space with |
| 32 | * others. |
| 33 | * |
| 34 | */ |
| 35 | // c99 only, make sure your compiler supports that. |
| 36 | |
| 37 | #define NOOP_GRAPH_WHILE_EXPR(ccv_nnc_graph_while_f)(1) (ccv_nnc_graph_while_f)(1) |
| 38 | #define NOOP_GRAPH_CASE_OF_EXPR(ccv_nnc_graph_case_of_f)(1) (ccv_nnc_graph_case_of_f)(1) |
| 39 | |
| 40 | // This is a better LIST_COUNT macro, it generates a list of 1+1+0+0+0 where it is 1 if the parameter presents, and 0 otherwise. |
| 41 | // This works better for cases such as LIST_COUNT(1, 2, 3,) where previous macro will get 4 and this one will have correctly |
| 42 | // computed result. |
| 43 | #define LIST_COUNT_01(_0,_1,_2,...)_2 _2 |
| 44 | #define LIST_COUNT_E(...)1 LIST_COUNT_01(_0,##__VA_ARGS__,1,0)1 |
| 45 | #define LIST_COUNT_N(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12,_13,_14,_15,_16,_17,_18,_19,_20,_21,_22,_23,_24,_25,_26,_27,_28,_29,_30,_31,_32,_33,_34,_35,_36,_37,_38,_39,_40,_41,_42,_43,_44,_45,_46,_47,_48,_49,_50,_51,_52,_53,_54,_55,_56,_57,_58,_59,_60,_61,_62,_63,...)(1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 + 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 + 1 +1 +1 -1) (LIST_COUNT_E(_0)1+LIST_COUNT_E(_1)1+LIST_COUNT_E(_2)1+LIST_COUNT_E(_3)1+LIST_COUNT_E(_4)1+LIST_COUNT_E(_5)1+LIST_COUNT_E(_6)1+LIST_COUNT_E(_7)1+LIST_COUNT_E(_8)1+LIST_COUNT_E(_9)1+LIST_COUNT_E(_10)1+LIST_COUNT_E(_11)1+LIST_COUNT_E(_12)1+LIST_COUNT_E(_13)1+LIST_COUNT_E(_14)1+LIST_COUNT_E(_15)1+LIST_COUNT_E(_16)1+LIST_COUNT_E(_17)1+LIST_COUNT_E(_18)1+LIST_COUNT_E(_19)1+LIST_COUNT_E(_20)1+LIST_COUNT_E(_21)1+LIST_COUNT_E(_22)1+LIST_COUNT_E(_23)1+LIST_COUNT_E(_24)1+LIST_COUNT_E(_25)1+LIST_COUNT_E(_26)1+LIST_COUNT_E(_27)1+LIST_COUNT_E(_28)1+LIST_COUNT_E(_29)1+LIST_COUNT_E(_30)1+LIST_COUNT_E(_31)1+LIST_COUNT_E(_32)1+LIST_COUNT_E(_33)1+LIST_COUNT_E(_34)1+LIST_COUNT_E(_35)1+LIST_COUNT_E(_36)1+LIST_COUNT_E(_37)1+LIST_COUNT_E(_38)1+LIST_COUNT_E(_39)1+LIST_COUNT_E(_40)1+LIST_COUNT_E(_41)1+LIST_COUNT_E(_42)1+LIST_COUNT_E(_43)1+LIST_COUNT_E(_44)1+LIST_COUNT_E(_45)1+LIST_COUNT_E(_46)1+LIST_COUNT_E(_47)1+LIST_COUNT_E(_48)1+LIST_COUNT_E(_49)1+LIST_COUNT_E(_50)1+LIST_COUNT_E(_51)1+LIST_COUNT_E(_52)1+LIST_COUNT_E(_53)1+LIST_COUNT_E(_54)1+LIST_COUNT_E(_55)1+LIST_COUNT_E(_56)1+LIST_COUNT_E(_57)1+LIST_COUNT_E(_58)1+LIST_COUNT_E(_59)1+LIST_COUNT_E(_60)1+LIST_COUNT_E(_61)1+LIST_COUNT_E(_62)1+LIST_COUNT_E(_63)1-1) |
| 46 | #define LIST_COUNT(...)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) LIST_COUNT_N(_0,##__VA_ARGS__,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 47 | |
| 48 | #define LIST_X(_type, ...)(_type []){...} (_type []){__VA_ARGS__} |
| 49 | |
| 50 | #define KV_X_2(_x, _y, ...){(_x), (_y)} {(_x), (_y)} |
| 51 | #define KV_X_1(_x, ...){(_x)} {(_x)} |
| 52 | #define KV_X_SEL(_1, _2, _FX, ...)_FX _FX |
| 53 | #define KV(...){(...)} KV_X_SEL(__VA_ARGS__, KV_X_2, KV_X_1)(__VA_ARGS__){(__VA_ARGS__)} |
| 54 | |
| 55 | #define LIST_SIZEOF_COUNT(_type, ...)(sizeof((_type []){...}) / sizeof(_type)) (sizeof(LIST_X(_type, __VA_ARGS__)(_type []){__VA_ARGS__}) / sizeof(_type)) |
| 56 | |
| 57 | /** |
| 58 | * @defgroup convenience_api Convenience API |
| 59 | * @{ |
| 60 | */ |
| 61 | /** |
| 62 | * Pass a list of tensors to NNC functions that accepts (tensor array, tensor array size). |
| 63 | * This method effectively gives two parameters as one. |
| 64 | */ |
| 65 | #define TENSOR_LIST(...)(ccv_nnc_tensor_t* []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(ccv_nnc_tensor_t*, __VA_ARGS__)(ccv_nnc_tensor_t* []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 66 | /** |
| 67 | * Pass a list of tensor parameters to NNC functions that accepts (parameter array, parameter array size). |
| 68 | * This method effectively gives two parameters as one. |
| 69 | */ |
| 70 | #define TENSOR_PARAM_LIST(...)(const ccv_nnc_tensor_param_t []){...}, (1 +1 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_tensor_param_t, __VA_ARGS__)(const ccv_nnc_tensor_param_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 71 | /** |
| 72 | * This represents a tensor symbol that is empty (tensor = nil) |
| 73 | */ |
| 74 | #define NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL } (const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL} |
| 75 | /** |
| 76 | * This represents a graph exec symbol that is empty (exec = nil) |
| 77 | */ |
| 78 | #define NO_GRAPH_EXEC_SYMBOL(const ccv_nnc_graph_exec_symbol_t){.d = CCV_NNC_NO_GRAPH_EXEC_SYMBOL } (const ccv_nnc_graph_exec_symbol_t){.d = CCV_NNC_NO_GRAPH_EXEC_SYMBOL} |
| 79 | /** |
| 80 | * Pass a list of tensor symbols to NNC functions that accepts (tensor symbol array, tensor symbol array size). |
| 81 | * This method effectively gives two parameters as one. |
| 82 | */ |
| 83 | #define TENSOR_SYMBOL_LIST(...)(const ccv_nnc_tensor_symbol_t []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_tensor_symbol_t, __VA_ARGS__)(const ccv_nnc_tensor_symbol_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 84 | /** |
| 85 | * Pass a list of tensor variables to NNC functions that accepts (tensor variable array, tensor variable array size). |
| 86 | * This method effectively gives two parameters as one. |
| 87 | */ |
| 88 | #define TENSOR_VARIABLE_LIST(...)(ccv_nnc_tensor_variable_t []){...}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(ccv_nnc_tensor_variable_t, __VA_ARGS__)(ccv_nnc_tensor_variable_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 89 | /** |
| 90 | * Pass a list of tensor bindings to NNC functions that accepts (tensor binding array, tensor binding array size). |
| 91 | * This method effectively gives two parameters as one. Since tensor binding requires two: symbol and a tensor, |
| 92 | * you should use this like: TENSOR_BIND_MAP(KV(symbol1, tensor1), KV(symbol2, tensor2)). |
| 93 | */ |
| 94 | #define TENSOR_BIND_MAP(...)(const ccv_nnc_tensor_bind_t []){...}, (sizeof((ccv_nnc_tensor_bind_t []){...}) / sizeof(ccv_nnc_tensor_bind_t)) LIST_X(const ccv_nnc_tensor_bind_t, __VA_ARGS__)(const ccv_nnc_tensor_bind_t []){__VA_ARGS__}, LIST_SIZEOF_COUNT(ccv_nnc_tensor_bind_t, __VA_ARGS__)(sizeof((ccv_nnc_tensor_bind_t []){__VA_ARGS__}) / sizeof(ccv_nnc_tensor_bind_t )) |
| 95 | /** |
| 96 | * Pass a list of tensor symbol pairs to NNC functions that accepts (tensor symbol pair array, tensor symbol pair array size). |
| 97 | * This method effectively gives two parameters as one. Since tensor symbol pair requires two: source symbol and destination symbol, |
| 98 | * you should use this like: TENSOR_SYMBOL_MAP(KV(symbol1, symbol2), KV(symbol3, symbol4)). |
| 99 | */ |
| 100 | #define TENSOR_SYMBOL_MAP(...)(const ccv_nnc_tensor_symbol_map_t []){...}, (sizeof((ccv_nnc_tensor_symbol_map_t []){...}) / sizeof(ccv_nnc_tensor_symbol_map_t)) LIST_X(const ccv_nnc_tensor_symbol_map_t, __VA_ARGS__)(const ccv_nnc_tensor_symbol_map_t []){__VA_ARGS__}, LIST_SIZEOF_COUNT(ccv_nnc_tensor_symbol_map_t, __VA_ARGS__)(sizeof((ccv_nnc_tensor_symbol_map_t []){__VA_ARGS__}) / sizeof (ccv_nnc_tensor_symbol_map_t)) |
| 101 | /** |
| 102 | * Pass a list of execution nodes to NNC functions that accepts (execution node array, execution node array size). |
| 103 | * This method effectively gives two parameters as one. |
| 104 | */ |
| 105 | #define GRAPH_EXEC_LIST(...)(const ccv_nnc_graph_exec_t []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_graph_exec_t, __VA_ARGS__)(const ccv_nnc_graph_exec_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 106 | /** |
| 107 | * Pass a list of execution node symbols to NNC functions that accepts (execution node symbol array, execution node symbol array size). |
| 108 | * This method effectively gives two parameters as one. |
| 109 | */ |
| 110 | #define GRAPH_EXEC_SYMBOL_LIST(...)(const ccv_nnc_graph_exec_symbol_t []){...}, (1 +1 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_graph_exec_symbol_t, __VA_ARGS__)(const ccv_nnc_graph_exec_symbol_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 111 | /** |
| 112 | * Pass both default sources and default sources size to function that accepts (sources, source size). |
| 113 | * @param x A given symbolic graph. |
| 114 | */ |
| 115 | #define SYMBOLIC_GRAPH_SOURCES(x)ccv_nnc_symbolic_graph_sources(x), ccv_nnc_symbolic_graph_source_size (x) ccv_nnc_symbolic_graph_sources(x), ccv_nnc_symbolic_graph_source_size(x) |
| 116 | /** |
| 117 | * Pass both default destinations and default destinations size to function that accepts (destinations, destination size). |
| 118 | * @param x A given symbolic graph. |
| 119 | */ |
| 120 | #define SYMBOLIC_GRAPH_DESTINATIONS(x)ccv_nnc_symbolic_graph_destinations(x), ccv_nnc_symbolic_graph_destination_size (x) ccv_nnc_symbolic_graph_destinations(x), ccv_nnc_symbolic_graph_destination_size(x) |
| 121 | /** |
| 122 | * Pass a list of simplification passes to NNC functions that accepts (pass array, pass array size). |
| 123 | * This method effectively gives two parameters as one. |
| 124 | */ |
| 125 | #define SYMBOLIC_GRAPH_PASSES(...)(const int []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const int, __VA_ARGS__)(const int []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 126 | /** |
| 127 | * Pass a list of CNNP models to NNC functions that accepts (model array, model array size). |
| 128 | * This method effectively gives two parameters as one. |
| 129 | */ |
| 130 | #define MODEL_LIST(...)(ccv_cnnp_model_t* []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(ccv_cnnp_model_t*, __VA_ARGS__)(ccv_cnnp_model_t* []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 131 | /** |
| 132 | * Pass a list of CNNP model IOs to NNC functions that accepts (model IO array, model IO array size). |
| 133 | * This method effectively gives two parameters as one. |
| 134 | */ |
| 135 | #define MODEL_IO_LIST(...)(const ccv_cnnp_model_io_t []){...}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_cnnp_model_io_t, __VA_ARGS__)(const ccv_cnnp_model_io_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 136 | /** |
| 137 | * Pass a list of CNNP tensor params to ccv_cnnp_cmd_exec which accepts (tensor params array, tensor params array size). |
| 138 | * This method effectively gives two parameters as one. |
| 139 | */ |
| 140 | #define MODEL_CMD_EXEC_IO_MAP(...)(const ccv_cnnp_cmd_exec_io_t []){...}, (sizeof((ccv_cnnp_cmd_exec_io_t []){...}) / sizeof(ccv_cnnp_cmd_exec_io_t)) LIST_X(const ccv_cnnp_cmd_exec_io_t, __VA_ARGS__)(const ccv_cnnp_cmd_exec_io_t []){__VA_ARGS__}, LIST_SIZEOF_COUNT(ccv_cnnp_cmd_exec_io_t, __VA_ARGS__)(sizeof((ccv_cnnp_cmd_exec_io_t []){__VA_ARGS__}) / sizeof(ccv_cnnp_cmd_exec_io_t )) |
| 141 | /** |
| 142 | * Pass a list of CNNP tensor type to ccv_cnnp_cmd_exec which accepts (tensor type array, tensor type array size). |
| 143 | * This method effectively gives two parameters as one. |
| 144 | */ |
| 145 | #define MODEL_CMD_EXEC_IO_LIST(...)(const int []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const int, __VA_ARGS__)(const int []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 146 | /** |
| 147 | * Pass a list of dataframe column ids to iteration function that accepts (column id array, column id array size). |
| 148 | * This method effectively gives two parameters as one. |
| 149 | */ |
| 150 | #define COLUMN_ID_LIST(...)(const int []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const int, __VA_ARGS__)(const int []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
| 151 | |
| 152 | #define TRAVERSE_FULL0,0,0,0 0,0,0,0 |
| 153 | |
| 154 | #define ALL_PARAMETERS-1 -1 |
| 155 | |
| 156 | // We will support NUMA allocation on CPU in the future. Currently, this is not very meaningful (except enforce no memory reuse between tensors). |
| 157 | #define CPU_NUMA_TENSOR_NHWC(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
| 158 | #define CPU_NUMA_TENSOR_NCHW(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
| 159 | #define CPU_NUMA_TENSOR_CHWN(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
| 160 | #define CPU_TENSOR_NHWC(dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_dt,.dim={...}}) CPU_NUMA_TENSOR_NHWC(ANY, dt, __VA_ARGS__)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_dt,.dim={__VA_ARGS__ }}) |
| 161 | #define CPU_TENSOR_NCHW(dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_dt,.dim={...}}) CPU_NUMA_TENSOR_NCHW(ANY, dt, __VA_ARGS__)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_dt,.dim={__VA_ARGS__ }}) |
| 162 | #define CPU_TENSOR_CHWN(dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_dt,.dim={...}}) CPU_NUMA_TENSOR_CHWN(ANY, dt, __VA_ARGS__)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_dt,.dim={__VA_ARGS__ }}) |
| 163 | // This way, we can do error check on the device type :) |
| 164 | #define GPU_TENSOR_NHWC(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
| 165 | #define GPU_TENSOR_NCHW(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
| 166 | #define GPU_TENSOR_CHWN(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
| 167 | /** @} */ |
| 168 | |
| 169 | #define DIM_ALLOC(...)(int [(12)]){...} (int [CCV_NNC_MAX_DIM_ALLOC(12)]){__VA_ARGS__} |
| 170 | |
| 171 | #define ESCAPE_X(...)... __VA_ARGS__ |
| 172 | #define HINT_X_1(_stride_)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border ={.begin={0},.end={0}}}) ((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border={.begin={0},.end={0}}}) |
| 173 | #define HINT_X_2(_stride_, _border_)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border ={.begin={ESCAPE_X _border_},.end={ESCAPE_X _border_}}}) ((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border={.begin={ESCAPE_X _border_},.end={ESCAPE_X _border_}}}) |
| 174 | #define HINT_X_3(_stride_, _begin_, _end_)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border ={.begin={ESCAPE_X _begin_},.end={ESCAPE_X _end_}}}) ((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border={.begin={ESCAPE_X _begin_},.end={ESCAPE_X _end_}}}) |
| 175 | #define HINT_X_SEL(_1, _2, _3, _FX, ...)_FX _FX |
| 176 | /** |
| 177 | * @ingroup convenience_api |
| 178 | * Simpler method to create hint. |
| 179 | * HINT(stride), HINT(stride, border), HINT(stride, border begin, border end) |
| 180 | */ |
| 181 | #define HINT(...)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X ...}}, .border={.begin ={0},.end={0}}}) HINT_X_SEL(__VA_ARGS__, HINT_X_3, HINT_X_2, HINT_X_1)(__VA_ARGS__)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X __VA_ARGS__}}, .border ={.begin={0},.end={0}}}) |
| 182 | |
| 183 | static inline size_t ccv_nnc_dimension_count(const int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
| 184 | { |
| 185 | if (dim[0] == 0) |
| 186 | return 0; |
| 187 | int i; |
| 188 | size_t count = dim[0]; |
| 189 | for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && dim[i] > 0; i++) |
| 190 | count *= dim[i]; |
| 191 | return count; |
| 192 | } |
| 193 | |
| 194 | static inline size_t ccv_nnc_dimension_upper_bound(const int dim[CCV_NNC_MAX_DIM_ALLOC(12)], const int stride[CCV_NNC_MAX_DIM_ALLOC(12)]) |
| 195 | { |
| 196 | if (dim[0] == 0 || stride[0] == 0) |
| 197 | return 0; |
| 198 | int i; |
| 199 | size_t count = 1 + (dim[0] - 1) * stride[0]; |
| 200 | for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && dim[i] > 0 && stride[i] > 0; i++) |
| 201 | count += (dim[i] - 1) * stride[i]; |
| 202 | return count; |
| 203 | } |
| 204 | |
| 205 | static inline size_t ccv_nnc_tensor_count(const ccv_nnc_tensor_param_t params) |
| 206 | { |
| 207 | return ccv_nnc_dimension_count(params.dim); |
| 208 | } |
| 209 | |
| 210 | static inline ccv_nnc_tensor_param_t ccv_nnc_tensor_palettize(const ccv_nnc_tensor_param_t params, const int qbits, const int number_in_blocks) |
| 211 | { |
| 212 | assert(params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F || params.datatype == CCV_16BF)((void) sizeof ((params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F || params.datatype == CCV_16BF) ? 1 : 0), __extension__ ({ if (params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F || params.datatype == CCV_16BF) ; else __assert_fail ("params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F || params.datatype == CCV_16BF" , "../../nnc/ccv_nnc_easy.h", 212, __extension__ __PRETTY_FUNCTION__ ); })); |
| 213 | ccv_nnc_tensor_param_t new_params = params; |
| 214 | assert(qbits >= 4 && qbits <= 8)((void) sizeof ((qbits >= 4 && qbits <= 8) ? 1 : 0), __extension__ ({ if (qbits >= 4 && qbits <= 8) ; else __assert_fail ("qbits >= 4 && qbits <= 8" , "../../nnc/ccv_nnc_easy.h", 214, __extension__ __PRETTY_FUNCTION__ ); })); |
| 215 | new_params.datatype = ((params.datatype >> 12) & 0xff) | CCV_QX | ((qbits << 8) & 0xf00); |
| 216 | new_params.reserved = number_in_blocks; |
| 217 | return new_params; |
| 218 | } |
| 219 | |
| 220 | static inline size_t ccv_nnc_tensor_data_size_without_padding(const ccv_nnc_tensor_param_t params) |
| 221 | { |
| 222 | const ssize_t count = (ssize_t)ccv_nnc_tensor_count(params); |
| 223 | ssize_t data_size; |
| 224 | if (CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) == CCV_QX) |
| 225 | { |
| 226 | // Our QX right now only does palettization. Hence, we need to get the palette datatype. |
| 227 | const int palette_datatype = (params.datatype & 0xff) << 12; |
| 228 | const int number_in_blocks = params.reserved; |
| 229 | const int num_blocks = (int)((count + number_in_blocks - 1) / number_in_blocks); |
| 230 | const int qbits = (params.datatype & 0xf00) >> 8; |
| 231 | assert(qbits >= 4 && qbits <= 8)((void) sizeof ((qbits >= 4 && qbits <= 8) ? 1 : 0), __extension__ ({ if (qbits >= 4 && qbits <= 8) ; else __assert_fail ("qbits >= 4 && qbits <= 8" , "../../nnc/ccv_nnc_easy.h", 231, __extension__ __PRETTY_FUNCTION__ ); })); |
| 232 | data_size = (ssize_t)(1 << qbits) * CCV_GET_DATA_TYPE_SIZE(palette_datatype)_ccv_get_data_type_size[((palette_datatype) & 0xFF000) >> 12] * num_blocks + (count * qbits + 7) / 8; |
| 233 | } else |
| 234 | data_size = CCV_GET_DATA_TYPE_SIZE(params.datatype)_ccv_get_data_type_size[((params.datatype) & 0xFF000) >> 12] * count; |
| 235 | return data_size; |
| 236 | } |
| 237 | |
| 238 | static inline size_t ccv_nnc_tensor_data_size(const ccv_nnc_tensor_param_t params) |
| 239 | { |
| 240 | ssize_t data_size = ccv_nnc_tensor_data_size_without_padding(params); |
| 241 | #ifdef HAVE_CUDA1 // For CUDA, we align to 128-bytes. |
| 242 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
| 243 | return ((data_size + 127) & -128); |
| 244 | else |
| 245 | #elif defined(HAVE_MPS) // For MPS, we have to align to PAGE_SIZE. |
| 246 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
| 247 | return ((data_size + PAGE_SIZE - 1) & -PAGE_SIZE); |
| 248 | else |
| 249 | #endif |
| 250 | return ((data_size + 63) & -64); |
| 251 | } |
| 252 | |
| 253 | static inline void ccv_nnc_tensor_view_get_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
| 254 | { |
| 255 | int x; |
| 256 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
| 257 | const int offset = ccv_max(CCV_NNC_MAX_DIM + 2 - nd, 0)({ typeof ((2) + 2 - nd) _a = ((2) + 2 - nd); typeof (0) _b = (0); (_a > _b) ? _a : _b; }); |
| 258 | for (x = 0; x < offset; x++) |
| 259 | dim[x] = 1; |
| 260 | for (x = offset; x < ccv_max(CCV_NNC_MAX_DIM + 2, nd)({ typeof ((2) + 2) _a = ((2) + 2); typeof (nd) _b = (nd); (_a > _b) ? _a : _b; }); x++) |
| 261 | dim[x] = tv->info.dim[x - offset]; |
| 262 | dim[ccv_max(CCV_NNC_MAX_DIM + 2, nd)({ typeof ((2) + 2) _a = ((2) + 2); typeof (nd) _b = (nd); (_a > _b) ? _a : _b; })] = 0; |
| 263 | } |
| 264 | |
| 265 | static inline CCV_WARN_UNUSED(int)int __attribute__((warn_unused_result)) ccv_nnc_is_tensor_stride_packed(const int stride[CCV_NNC_MAX_DIM_ALLOC(12)], const int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
| 266 | { |
| 267 | const int nd = ccv_nnc_tensor_nd(stride); |
| 268 | int i; |
| 269 | int cstride = 1; |
| 270 | for (i = nd - 1; i >= 0; i--) |
| 271 | { |
| 272 | if (stride[i] != cstride) |
| 273 | return 0; |
| 274 | cstride *= dim[i]; |
| 275 | } |
| 276 | return 1; |
| 277 | } |
| 278 | |
| 279 | static inline CCV_WARN_UNUSED(int)int __attribute__((warn_unused_result)) ccv_nnc_tensor_view_check_dim(const ccv_nnc_tensor_view_t* const tv, const int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
| 280 | { |
| 281 | int x; |
| 282 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
| 283 | const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd; |
| 284 | for (x = 0; x < offset; x++) |
| 285 | if (dim[x] != 1) |
| 286 | return 0; |
| 287 | for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++) |
| 288 | if (dim[x] != tv->info.dim[x - offset]) |
| 289 | return 0; |
| 290 | return 1; |
| 291 | } |
| 292 | |
| 293 | static inline void ccv_nnc_tensor_view_get_broadcast_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
| 294 | { |
| 295 | int x; |
| 296 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
| 297 | const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd; |
| 298 | for (x = 0; x < offset; x++) |
| 299 | dim[x] = ccv_max(1, dim[x])({ typeof (1) _a = (1); typeof (dim[x]) _b = (dim[x]); (_a > _b) ? _a : _b; }); |
| 300 | for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++) |
| 301 | dim[x] = ccv_max(dim[x], tv->info.dim[x - offset])({ typeof (dim[x]) _a = (dim[x]); typeof (tv->info.dim[x - offset]) _b = (tv->info.dim[x - offset]); (_a > _b) ? _a : _b; }); |
| 302 | } |
| 303 | |
| 304 | static inline CCV_WARN_UNUSED(int)int __attribute__((warn_unused_result)) ccv_nnc_tensor_view_check_broadcast_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
| 305 | { |
| 306 | int x; |
| 307 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
| 308 | const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd; |
| 309 | for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++) |
| 310 | if (dim[x] != tv->info.dim[x - offset] && tv->info.dim[x - offset] != 1) |
| 311 | return 0; |
| 312 | return 1; |
| 313 | } |
| 314 | |
| 315 | static inline void ccv_nnc_tensor_view_get_stride(const ccv_nnc_tensor_view_t* const tv, int stride[CCV_NNC_MAX_DIM_ALLOC(12)]) |
| 316 | { |
| 317 | int x; |
| 318 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
| 319 | const int offset = ccv_max(CCV_NNC_MAX_DIM + 2 - nd, 0)({ typeof ((2) + 2 - nd) _a = ((2) + 2 - nd); typeof (0) _b = (0); (_a > _b) ? _a : _b; }); |
| 320 | stride[ccv_max(nd, CCV_NNC_MAX_DIM + 2)({ typeof (nd) _a = (nd); typeof ((2) + 2) _b = ((2) + 2); (_a > _b) ? _a : _b; })] = 0; |
| 321 | if (CCV_IS_TENSOR_VIEW(tv)((*(int*)(tv)) & CCV_TENSOR_VIEW)) |
| 322 | { |
| 323 | for (x = offset; x < ccv_max(nd, CCV_NNC_MAX_DIM + 2)({ typeof (nd) _a = (nd); typeof ((2) + 2) _b = ((2) + 2); (_a > _b) ? _a : _b; }); x++) |
| 324 | stride[x] = tv->stride[x - offset]; |
| 325 | for (x = 0; x < offset; x++) |
| 326 | stride[x] = stride[offset]; |
| 327 | } else { |
| 328 | int cstride = 1; |
| 329 | for (x = ccv_max(CCV_NNC_MAX_DIM + 1, nd - 1)({ typeof ((2) + 1) _a = ((2) + 1); typeof (nd - 1) _b = (nd - 1); (_a > _b) ? _a : _b; }); x >= offset; x--) |
| 330 | { |
| 331 | stride[x] = cstride; |
| 332 | cstride *= tv->info.dim[x - offset]; |
| 333 | } |
| 334 | for (x = 0; x < offset; x++) |
| 335 | stride[x] = cstride; |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | static inline int ccv_nnc_tensor_get_n(const ccv_nnc_tensor_param_t params) |
| 340 | { |
| 341 | switch (params.format) |
| 342 | { |
| 343 | case CCV_TENSOR_FORMAT_NHWC: |
| 344 | case CCV_TENSOR_FORMAT_NCHW: |
| 345 | if (ccv_nnc_tensor_nd(params.dim) == CCV_NNC_MAX_DIM(2) + 1) |
| 346 | return 1; |
| 347 | else |
| 348 | return params.dim[0]; |
| 349 | case CCV_TENSOR_FORMAT_CHWN: |
| 350 | return params.dim[CCV_NNC_MAX_DIM(2) + 1]; |
| 351 | } |
| 352 | return 0; |
| 353 | } |
| 354 | |
| 355 | static inline int ccv_nnc_tensor_get_c(const ccv_nnc_tensor_param_t params) |
| 356 | { |
| 357 | const int nd = ccv_nnc_tensor_nd(params.dim); |
| 358 | switch (params.format) |
| 359 | { |
| 360 | case CCV_TENSOR_FORMAT_NHWC: |
| 361 | return params.dim[nd - 1]; |
| 362 | case CCV_TENSOR_FORMAT_NCHW: |
| 363 | if (nd == CCV_NNC_MAX_DIM(2) + 1) |
| 364 | return params.dim[0]; |
| 365 | else |
| 366 | return params.dim[nd <= 1 ? 0 : 1]; |
| 367 | case CCV_TENSOR_FORMAT_CHWN: |
| 368 | return params.dim[0]; |
| 369 | } |
| 370 | return 0; |
| 371 | } |
| 372 | |
| 373 | static inline void ccv_nnc_tensor_set_n(ccv_nnc_tensor_param_t* const params, const int n) |
| 374 | { |
| 375 | switch (params->format) |
| 376 | { |
| 377 | case CCV_TENSOR_FORMAT_NHWC: |
| 378 | case CCV_TENSOR_FORMAT_NCHW: |
| 379 | params->dim[0] = n; |
| 380 | break; |
| 381 | case CCV_TENSOR_FORMAT_CHWN: |
| 382 | params->dim[CCV_NNC_MAX_DIM(2) + 1] = n; |
| 383 | break; |
| 384 | } |
| 385 | } |
| 386 | |
| 387 | static inline void ccv_nnc_tensor_set_c(ccv_nnc_tensor_param_t* const params, const int nd, const int c) |
| 388 | { |
| 389 | switch (params->format) |
| 390 | { |
| 391 | case CCV_TENSOR_FORMAT_NHWC: |
| 392 | params->dim[nd - 1] = c; |
| 393 | break; |
| 394 | case CCV_TENSOR_FORMAT_NCHW: |
| 395 | if (nd == CCV_NNC_MAX_DIM(2) + 1) |
| 396 | params->dim[0] = c; |
| 397 | else |
| 398 | params->dim[nd <= 1 ? 0 : 1] = c; |
| 399 | break; |
| 400 | case CCV_TENSOR_FORMAT_CHWN: |
| 401 | params->dim[0] = c; |
| 402 | break; |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | static inline int ccv_nnc_is_matrix_transpose(const ccv_nnc_tensor_param_t params, const int transpose[2]) |
| 407 | { |
| 408 | const int nd = ccv_nnc_tensor_nd(params.dim); |
| 409 | assert(nd >= 1)((void) sizeof ((nd >= 1) ? 1 : 0), __extension__ ({ if (nd >= 1) ; else __assert_fail ("nd >= 1", "../../nnc/ccv_nnc_easy.h" , 409, __extension__ __PRETTY_FUNCTION__); })); |
| 410 | if (transpose[0] != transpose[1]) |
| 411 | { |
| 412 | assert(nd > 1)((void) sizeof ((nd > 1) ? 1 : 0), __extension__ ({ if (nd > 1) ; else __assert_fail ("nd > 1", "../../nnc/ccv_nnc_easy.h" , 412, __extension__ __PRETTY_FUNCTION__); })); |
| 413 | assert(((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) ||((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1 ] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == (( nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose [0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ( (nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))" , "../../nnc/ccv_nnc_easy.h", 414, __extension__ __PRETTY_FUNCTION__ ); })) |
| 414 | ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1))))((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1 ] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == (( nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose [0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ( (nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))" , "../../nnc/ccv_nnc_easy.h", 414, __extension__ __PRETTY_FUNCTION__ ); })); |
| 415 | return 1; |
| 416 | } |
| 417 | return 0; |
| 418 | } |
| 419 | |
| 420 | // Assuming this is batched matrix. Getting relevant parameters. |
| 421 | static inline void ccv_nnc_tensor_get_matrix_params(const ccv_nnc_tensor_param_t params, const int* const stride, const int* const dim, const int transpose[2], int* const batch_size_ref, int* const rows_ref, int* const cols_ref, int* const batch_inc_ref, int* const rows_inc_ref, int* const cols_inc_ref) |
| 422 | { |
| 423 | const int nd = ccv_nnc_tensor_nd(params.dim); |
| 424 | assert(nd >= 1)((void) sizeof ((nd >= 1) ? 1 : 0), __extension__ ({ if (nd >= 1) ; else __assert_fail ("nd >= 1", "../../nnc/ccv_nnc_easy.h" , 424, __extension__ __PRETTY_FUNCTION__); })); |
| 425 | *batch_size_ref = nd < 3 ? 1 : params.dim[nd - 3]; |
| 426 | *batch_inc_ref = nd < 3 ? 0 : stride ? stride[nd - 3] : dim[nd - 2] * dim[nd - 1]; |
| 427 | int rows = nd == 1 ? 1 : (nd == 2 ? params.dim[0] : params.dim[nd - 2]); |
| 428 | int rows_inc = stride ? (nd >= 2 ? stride[nd - 2] : stride[0] * dim[0]) : dim[nd - 1]; |
| 429 | int cols = params.dim[nd - 1]; |
| 430 | int cols_inc = 1; |
| 431 | if (transpose[0] != transpose[1]) |
| 432 | { |
| 433 | assert(nd > 1)((void) sizeof ((nd > 1) ? 1 : 0), __extension__ ({ if (nd > 1) ; else __assert_fail ("nd > 1", "../../nnc/ccv_nnc_easy.h" , 433, __extension__ __PRETTY_FUNCTION__); })); |
| 434 | assert(((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) ||((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1 ] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == (( nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose [0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ( (nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))" , "../../nnc/ccv_nnc_easy.h", 435, __extension__ __PRETTY_FUNCTION__ ); })) |
| 435 | ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1))))((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1 ] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == (( nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose [0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ( (nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))" , "../../nnc/ccv_nnc_easy.h", 435, __extension__ __PRETTY_FUNCTION__ ); })); |
| 436 | int t; |
| 437 | CCV_SWAP(rows, cols, t)((t) = (rows), (rows) = (cols), (cols) = (t)); |
| 438 | CCV_SWAP(rows_inc, cols_inc, t)((t) = (rows_inc), (rows_inc) = (cols_inc), (cols_inc) = (t)); |
| 439 | } |
| 440 | *rows_ref = rows; |
| 441 | *cols_ref = cols; |
| 442 | *rows_inc_ref = rows_inc; |
| 443 | *cols_inc_ref = cols_inc; |
| 444 | } |
| 445 | |
| 446 | static inline CCV_WARN_UNUSED(ccv_nnc_tensor_view_t)ccv_nnc_tensor_view_t __attribute__((warn_unused_result)) ccv_nnc_get_tensor_view(const ccv_nnc_tensor_t* const tensor) |
| 447 | { |
| 448 | if (CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW)) |
| 449 | return (ccv_nnc_tensor_view_t)*(const ccv_nnc_tensor_view_t*)tensor; |
| 450 | ccv_nnc_tensor_view_t tv = {0}; |
| 451 | memcpy(&tv, tensor, sizeof(ccv_nnc_tensor_t)); |
| 452 | return tv; |
| 453 | } |
| 454 | |
| 455 | static inline void ccv_nnc_tensor_view_alignment(ccv_nnc_tensor_view_t** const tvs, const int tv_size) |
| 456 | { |
| 457 | int i, j; |
| 458 | int max_nd = 0; |
| 459 | for (i = 0; i < tv_size; i++) |
| 460 | max_nd = ccv_max(ccv_nnc_tensor_nd(tvs[i]->info.dim), max_nd)({ typeof (ccv_nnc_tensor_nd(tvs[i]->info.dim)) _a = (ccv_nnc_tensor_nd (tvs[i]->info.dim)); typeof (max_nd) _b = (max_nd); (_a > _b) ? _a : _b; }); |
| 461 | for (i = 0; i < tv_size; i++) |
| 462 | { |
| 463 | const int nd = ccv_nnc_tensor_nd(tvs[i]->info.dim); |
| 464 | for (j = max_nd - 1; j >= max_nd - nd; j--) |
| 465 | tvs[i]->info.dim[j] = tvs[i]->info.dim[j - max_nd + nd]; |
| 466 | for (j = 0; j < max_nd - nd; j++) |
| 467 | tvs[i]->info.dim[j] = 1; |
| 468 | if (!CCV_IS_TENSOR_VIEW(tvs[i])((*(int*)(tvs[i])) & CCV_TENSOR_VIEW)) |
| 469 | continue; |
| 470 | for (j = max_nd - 1; j >= max_nd - nd; j--) |
| 471 | tvs[i]->stride[j] = tvs[i]->stride[j - max_nd + nd]; |
| 472 | for (j = 0; j < max_nd - nd; j++) |
| 473 | tvs[i]->stride[j] = tvs[i]->stride[max_nd - nd]; |
| 474 | } |
| 475 | } |
| 476 | |
| 477 | |
| 478 | #define TRANSPOSE(_X, _Y)((int[]){(_X),(_Y)}) ((int[]){(_X),(_Y)}) |
| 479 | #define NO_TRANSPOSE((int[]){(0),(0)}) TRANSPOSE(0, 0)((int[]){(0),(0)}) |
| 480 | #define CMD_GEMM_X(_0, _TA, _TB, ...)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},. transpose_a={_TA[0],_TA[1]},.transpose_b={_TB[0],_TB[1]},}}) ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},.transpose_a={_TA[0],_TA[1]},.transpose_b={_TB[0],_TB[1]},}}) // We default to alpha = 1 and beta = 1 |
| 481 | #define CMD_GEMM(...)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},. transpose_a={...[0],...[1]},.transpose_b={((int[]){(0),(0)})[ 0],((int[]){(0),(0)})[1]},}}) CMD_GEMM_X(_0, ##__VA_ARGS__, NO_TRANSPOSE, NO_TRANSPOSE)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},. transpose_a={##__VA_ARGS__[0],##__VA_ARGS__[1]},.transpose_b= {((int[]){(0),(0)})[0],((int[]){(0),(0)})[1]},}}) |
| 482 | #define CMD_GENERIC_X_0()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}) ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}) |
| 483 | #define CMD_GENERIC_X_F(...)("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC" ) ("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC") |
| 484 | #define CMD_GENERIC_X_3(...)((ccv_nnc_cmd_param_t){.size={.dim={...}}}) ((ccv_nnc_cmd_param_t){.size={.dim={__VA_ARGS__}}}) |
| 485 | #define CMD_GENERIC_X_SEL(_0, _1, _2, _3, _FX, ...)_FX _FX |
| 486 | // Using ## so that if it is empty, we omit one comma. |
| 487 | #define CMD_GENERIC(...)("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC" ) CMD_GENERIC_X_SEL(CMD_GENERIC_X_F, ##__VA_ARGS__, CMD_GENERIC_X_3, CMD_GENERIC_X_F, CMD_GENERIC_X_F, CMD_GENERIC_X_0)(__VA_ARGS__)("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC" ) |
| 488 | #define CMD_REDUCE(...)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.reduce={.count=( 1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1),.axis={...}}}) ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.reduce={.count=LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1),.axis={__VA_ARGS__}}}) |
| 489 | /** |
| 490 | * @defgroup available_commands Available Commands |
| 491 | * @{ |
| 492 | */ |
| 493 | #define CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0) ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0) |
| 494 | #define CMD_CUSTOM_FORWARD(f)ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, f, ccv_nnc_cmd_auto, 0) ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, f, ccv_nnc_cmd_auto, 0) |
| 495 | /** @} */ |
| 496 | |
| 497 | int ccv_nnc_is_no_hint(const ccv_nnc_hint_t hint); |
| 498 | int ccv_nnc_is_cmd_auto(const ccv_nnc_cmd_param_t params); |
| 499 | int ccv_nnc_is_tensor_auto(const ccv_nnc_tensor_param_t params); |
| 500 | |
| 501 | /** |
| 502 | * @addtogroup convenience_api |
| 503 | * @{ |
| 504 | */ |
| 505 | /** |
| 506 | * Offsets all zero. |
| 507 | */ |
| 508 | extern const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC(12)]; |
| 509 | /** |
| 510 | * No hint available. |
| 511 | */ |
| 512 | extern const ccv_nnc_hint_t ccv_nnc_no_hint; |
| 513 | /** |
| 514 | * The default symbolic graph compile parameters. |
| 515 | */ |
| 516 | extern const ccv_nnc_symbolic_graph_compile_param_t ccv_nnc_default_compile_params; |
| 517 | /** |
| 518 | * Derive the command parameters automatically if possible. |
| 519 | */ |
| 520 | extern const ccv_nnc_cmd_param_t ccv_nnc_cmd_auto; |
| 521 | /** |
| 522 | * Derive the tensor parameters automatically if possible. |
| 523 | */ |
| 524 | extern const ccv_nnc_tensor_param_t ccv_nnc_tensor_auto; |
| 525 | /** @} */ |
| 526 | |
| 527 | // Generated command flags for easy creation of ccv_nnc_cmd_t objects. |
| 528 | #include "cmd/ccv_nnc_cmd_easy.h" |
| 529 | |
| 530 | #endif |