File: | nnc/cmd/roi/ccv_nnc_roi_align_cpu_ref.c |
Warning: | line 148, column 36 Division by zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv.h" | ||||
2 | #include "ccv_internal.h" | ||||
3 | #include "nnc/ccv_nnc.h" | ||||
4 | #include "nnc/ccv_nnc_easy.h" | ||||
5 | #include "nnc/ccv_nnc_internal.h" | ||||
6 | #ifdef USE_OPENMP | ||||
7 | #include <omp.h> | ||||
8 | #endif | ||||
9 | #ifdef USE_DISPATCH | ||||
10 | #include <dispatch/dispatch.h> | ||||
11 | #endif | ||||
12 | |||||
13 | typedef struct { | ||||
14 | int i0, i1, mute; | ||||
15 | float r; | ||||
16 | } roi_align_coeffs_t; | ||||
17 | |||||
18 | static void _ccv_nnc_bilinear_coeffs(ccv_nnc_stream_context_t* const stream_context, const int h, const int w, const float roi_y, const float roi_x, const float roi_h, const float roi_w, const int pool_h, const int pool_w, int* const bin_h_ref, int* const bin_w_ref, roi_align_coeffs_t** const y_coeffs_ref, roi_align_coeffs_t** const x_coeffs_ref, int** const bin_h_at_y_ref, int** const bin_w_at_x_ref, int* const start_h_ref, int* const start_w_ref, int* const end_h_ref, int* const end_w_ref) | ||||
19 | { | ||||
20 | const int bin_h = (int)ceilf(roi_h / pool_h); // How many bins in each point of the pool. We slightly sampling at higher resolution (due to ceiling) with bilinear interpolation. | ||||
21 | const int bin_w = (int)ceilf(roi_w / pool_w); | ||||
22 | const int bin_pool_h = bin_h * pool_h; // Before averaging, what's the size of the region in integral term. | ||||
23 | const int bin_pool_w = bin_w * pool_w; | ||||
24 | const float scale_y = roi_h / bin_pool_h; // The scale to multiply back to get original coordinate. | ||||
25 | const float scale_x = roi_w / bin_pool_w; | ||||
26 | int x, y, i, j; | ||||
27 | roi_align_coeffs_t* const y_coeffs = (roi_align_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(roi_align_coeffs_t) * (bin_pool_h + bin_pool_w) + sizeof(int) * (pool_h + pool_w), CCV_TENSOR_CPU_MEMORY); | ||||
28 | roi_align_coeffs_t* const x_coeffs = y_coeffs + bin_pool_h; | ||||
29 | int* const bin_h_at_y = (int*)(x_coeffs + bin_pool_w); | ||||
30 | int* const bin_w_at_x = bin_h_at_y + pool_h; | ||||
31 | for (i = 0; i < pool_h; i++) | ||||
32 | { | ||||
33 | const int pi = i * bin_h; | ||||
34 | int count = 0; | ||||
35 | for (y = 0; y < bin_h; y++) | ||||
36 | { | ||||
37 | const float ay = roi_y + (y + pi + 0.5) * scale_y - 0.5; | ||||
38 | const int iy = (int)floorf(ay); | ||||
39 | const float ry = ay - iy; | ||||
40 | const int iy0 = ccv_clamp(iy, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof ( iy) _x = (iy); (_x < _a) ? _a : ((_x > _b) ? _b : _x); } ); | ||||
41 | const int iy1 = ccv_clamp(iy + 1, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof ( iy + 1) _x = (iy + 1); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | ||||
42 | y_coeffs[pi + y].i0 = iy0; | ||||
43 | y_coeffs[pi + y].i1 = iy1; | ||||
44 | y_coeffs[pi + y].r = ry; | ||||
45 | const int mute = (iy + 1 < 0 || iy > h - 1); | ||||
46 | y_coeffs[pi + y].mute = mute; | ||||
47 | if (!mute) | ||||
48 | ++count; | ||||
49 | } | ||||
50 | bin_h_at_y[i] = count; | ||||
51 | } | ||||
52 | int start_h = pool_h; | ||||
53 | for (i = 0; start_h == pool_h && i < pool_h; i++) | ||||
54 | if (bin_h_at_y[i] > 0) | ||||
55 | start_h = i; | ||||
56 | int end_h = 0; | ||||
57 | for (i = pool_h - 1; end_h == 0 && i >= 0; i--) | ||||
58 | if (bin_h_at_y[i] > 0) | ||||
59 | end_h = i + 1; | ||||
60 | for (j = 0; j < pool_w; j++) | ||||
61 | { | ||||
62 | const int pj = j * bin_w; | ||||
63 | int count = 0; | ||||
64 | for (x = 0; x < bin_w; x++) | ||||
65 | { | ||||
66 | const float ax = roi_x + (x + pj + 0.5) * scale_x - 0.5; | ||||
67 | const int ix = (int)floorf(ax); | ||||
68 | const float rx = ax - ix; | ||||
69 | const int ix0 = ccv_clamp(ix, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof ( ix) _x = (ix); (_x < _a) ? _a : ((_x > _b) ? _b : _x); } ); | ||||
70 | const int ix1 = ccv_clamp(ix + 1, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof ( ix + 1) _x = (ix + 1); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | ||||
71 | x_coeffs[pj + x].i0 = ix0; | ||||
72 | x_coeffs[pj + x].i1 = ix1; | ||||
73 | x_coeffs[pj + x].r = rx; | ||||
74 | const int mute = (ix + 1 < 0 || ix > w - 1); | ||||
75 | x_coeffs[pj + x].mute = mute; | ||||
76 | if (!mute) | ||||
77 | ++count; | ||||
78 | } | ||||
79 | bin_w_at_x[j] = count; | ||||
80 | } | ||||
81 | int start_w = pool_w; | ||||
82 | for (j = 0; start_w == pool_w && j < pool_w; j++) | ||||
83 | if (bin_w_at_x[j] > 0) | ||||
84 | start_w = j; | ||||
85 | int end_w = 0; | ||||
86 | for (j = pool_w - 1; end_w == 0 && j >= 0; j--) | ||||
87 | if (bin_w_at_x[j] > 0) | ||||
88 | end_w = j + 1; | ||||
89 | *bin_h_ref = bin_h; | ||||
90 | *bin_w_ref = bin_w; | ||||
91 | *y_coeffs_ref = y_coeffs; | ||||
92 | *x_coeffs_ref = x_coeffs; | ||||
93 | *bin_h_at_y_ref = bin_h_at_y; | ||||
94 | *bin_w_at_x_ref = bin_w_at_x; | ||||
95 | *start_h_ref = start_h; | ||||
96 | *start_w_ref = start_w; | ||||
97 | *end_h_ref = end_h; | ||||
98 | *end_w_ref = end_w; | ||||
99 | } | ||||
100 | |||||
101 | static int _ccv_nnc_roi_align_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | ||||
102 | { | ||||
103 | assert(input_size == 2)((void) sizeof ((input_size == 2) ? 1 : 0), __extension__ ({ if (input_size == 2) ; else __assert_fail ("input_size == 2", "roi/ccv_nnc_roi_align_cpu_ref.c" , 103, __extension__ __PRETTY_FUNCTION__); })); | ||||
| |||||
104 | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0]; | ||||
105 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "roi/ccv_nnc_roi_align_cpu_ref.c", 105, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
106 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1]; | ||||
107 | ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0]; | ||||
108 | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); | ||||
109 | assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((a_nd == (2) + 1 || a_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (a_nd == (2) + 1 || a_nd == (2) + 2) ; else __assert_fail ("a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 109, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
110 | const int* adim = (a_nd == CCV_NNC_MAX_DIM(2) + 1) ? a->info.dim : a->info.dim + 1; | ||||
111 | const int h = adim[0]; | ||||
112 | const int w = adim[1]; | ||||
113 | const int c_nd = ccv_nnc_tensor_nd(c->info.dim); | ||||
114 | assert(c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((c_nd == (2) + 1 || c_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (c_nd == (2) + 1 || c_nd == (2) + 2) ; else __assert_fail ("c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 114, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
115 | const int* cdim = (c_nd == CCV_NNC_MAX_DIM(2) + 1) ? c->info.dim : c->info.dim + 1; | ||||
116 | const int pool_h = cdim[0]; | ||||
117 | const int pool_w = cdim[1]; | ||||
118 | assert(cdim[2] == adim[2])((void) sizeof ((cdim[2] == adim[2]) ? 1 : 0), __extension__ ( { if (cdim[2] == adim[2]) ; else __assert_fail ("cdim[2] == adim[2]" , "roi/ccv_nnc_roi_align_cpu_ref.c", 118, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
119 | const int ch = cdim[2]; | ||||
120 | const float* const ap = a->data.f32; | ||||
121 | int astride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||
122 | ccv_nnc_tensor_view_get_stride(a, astride); | ||||
123 | const float* const bp = b->data.f32; | ||||
124 | float* cp = c->data.f32; | ||||
125 | int cstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||
126 | ccv_nnc_tensor_view_get_stride(c, cstride); | ||||
127 | const int a_n = ccv_nnc_tensor_get_n(a->info); | ||||
128 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); | ||||
129 | assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__ ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 129, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
130 | const int b_n = b_nd
| ||||
131 | const int c_n = ccv_nnc_tensor_get_n(c->info); | ||||
132 | assert(c_n == ccv_max(a_n, b_n))((void) sizeof ((c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n ) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n) _b = ( b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("c_n == ccv_max(a_n, b_n)" , "roi/ccv_nnc_roi_align_cpu_ref.c", 132, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
133 | const int aninc = a_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : astride[0]; | ||||
134 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||
135 | ccv_nnc_tensor_view_get_stride(b, bstride); | ||||
136 | const int bninc = b_nd
| ||||
137 | const int cninc = c_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : cstride[0]; | ||||
138 | ccv_nnc_tensor_zero(c); | ||||
139 | int bin_h, bin_w; | ||||
140 | roi_align_coeffs_t* y_coeffs; | ||||
141 | roi_align_coeffs_t* x_coeffs; | ||||
142 | int* bin_h_at_y; | ||||
143 | int* bin_w_at_x; | ||||
144 | int start_h, start_w, end_h, end_w; | ||||
145 | int n; | ||||
146 | for (n = 0; n < c_n; n++) | ||||
147 | { | ||||
148 | const float* const apn = ap + (n % a_n) * aninc; | ||||
| |||||
149 | float* cpn = cp + n * cninc; | ||||
150 | const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1. | ||||
151 | const float roi_y = bp[(n % b_n) * bninc + 1] * h; | ||||
152 | const float roi_w = bp[(n % b_n) * bninc + 2] * w; | ||||
153 | const float roi_h = bp[(n % b_n) * bninc + 3] * h; | ||||
154 | // Re-compute the offsets if b changes or it is the first time. | ||||
155 | if ((b_n == 1 && n == 0) || b_n > 1) | ||||
156 | _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w); | ||||
157 | int i, j, x, y, k; | ||||
158 | for (i = start_h; i < end_h; i++) | ||||
159 | { | ||||
160 | const int pi = i * bin_h; | ||||
161 | const int bin_hz = bin_h_at_y[i]; | ||||
162 | for (j = start_w; j < end_w; j++) | ||||
163 | { | ||||
164 | const int pj = j * bin_w; | ||||
165 | const int bin_wz = bin_w_at_x[j]; | ||||
166 | const float inv = 1.0 / (bin_hz * bin_wz); | ||||
167 | float* const cpz = cpn + j * cstride[CCV_NNC_MAX_DIM(2)]; | ||||
168 | for (y = 0; y < bin_h; y++) | ||||
169 | { | ||||
170 | if (y_coeffs[pi + y].mute) | ||||
171 | continue; | ||||
172 | const float ry = y_coeffs[pi + y].r; | ||||
173 | const int iy0 = y_coeffs[pi + y].i0; | ||||
174 | const int iy1 = y_coeffs[pi + y].i1; | ||||
175 | for (x = 0; x < bin_w; x++) | ||||
176 | { | ||||
177 | if (x_coeffs[pj + x].mute) | ||||
178 | continue; | ||||
179 | const float rx = x_coeffs[pj + x].r; | ||||
180 | const int ix0 = x_coeffs[pj + x].i0; | ||||
181 | const int ix1 = x_coeffs[pj + x].i1; | ||||
182 | const float c00 = (1 - ry) * (1 - rx); | ||||
183 | const float c01 = (1 - ry) * rx; | ||||
184 | const float c10 = ry * (1 - rx); | ||||
185 | const float c11 = ry * rx; | ||||
186 | const float* const ap00 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)]; | ||||
187 | const float* const ap01 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)]; | ||||
188 | const float* const ap10 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)]; | ||||
189 | const float* const ap11 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)]; | ||||
190 | for (k = 0; k < ch; k++) | ||||
191 | cpz[k] += ap00[k] * c00 + ap01[k] * c01 + ap10[k] * c10 + ap11[k] * c11; | ||||
192 | } | ||||
193 | } | ||||
194 | for (k = 0; k < ch; k++) | ||||
195 | cpz[k] *= inv; | ||||
196 | } | ||||
197 | cpn += cstride[CCV_NNC_MAX_DIM(2) - 1]; | ||||
198 | } | ||||
199 | } | ||||
200 | return CCV_NNC_EXEC_SUCCESS; | ||||
201 | } | ||||
202 | |||||
203 | static int _ccv_nnc_roi_align_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | ||||
204 | { | ||||
205 | assert(input_size >= 3)((void) sizeof ((input_size >= 3) ? 1 : 0), __extension__ ( { if (input_size >= 3) ; else __assert_fail ("input_size >= 3" , "roi/ccv_nnc_roi_align_cpu_ref.c", 205, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
206 | const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0]; | ||||
207 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "roi/ccv_nnc_roi_align_cpu_ref.c", 207, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
208 | ccv_nnc_tensor_view_t* o = (ccv_nnc_tensor_view_t*)outputs[0]; | ||||
209 | const int g_nd = ccv_nnc_tensor_nd(g->info.dim); | ||||
210 | assert(g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((g_nd == (2) + 1 || g_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (g_nd == (2) + 1 || g_nd == (2) + 2) ; else __assert_fail ("g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 210, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
211 | const int* gdim = (g_nd == CCV_NNC_MAX_DIM(2) + 1) ? g->info.dim : g->info.dim + 1; | ||||
212 | const int pool_h = gdim[0]; | ||||
213 | const int pool_w = gdim[1]; | ||||
214 | const int o_nd = ccv_nnc_tensor_nd(o->info.dim); | ||||
215 | assert(o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((o_nd == (2) + 1 || o_nd == (2) + 2) ? 1 : 0) , __extension__ ({ if (o_nd == (2) + 1 || o_nd == (2) + 2) ; else __assert_fail ("o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 215, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
216 | const int* odim = (o_nd == CCV_NNC_MAX_DIM(2) + 1) ? o->info.dim : o->info.dim + 1; | ||||
217 | const int h = odim[0]; | ||||
218 | const int w = odim[1]; | ||||
219 | assert(gdim[2] == odim[2])((void) sizeof ((gdim[2] == odim[2]) ? 1 : 0), __extension__ ( { if (gdim[2] == odim[2]) ; else __assert_fail ("gdim[2] == odim[2]" , "roi/ccv_nnc_roi_align_cpu_ref.c", 219, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
220 | const int ch = gdim[2]; | ||||
221 | float* gp = g->data.f32; | ||||
222 | int gstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||
223 | ccv_nnc_tensor_view_get_stride(g, gstride); | ||||
224 | float* op = o->data.f32; | ||||
225 | int ostride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||
226 | ccv_nnc_tensor_view_get_stride(o, ostride); | ||||
227 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2]; | ||||
228 | const float* const bp = b->data.f32; | ||||
229 | const int o_n = ccv_nnc_tensor_get_n(o->info); | ||||
230 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); | ||||
231 | assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__ ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2" , "roi/ccv_nnc_roi_align_cpu_ref.c", 231, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
232 | const int b_n = b_nd == 1 ? 1 : b->info.dim[0]; | ||||
233 | const int g_n = ccv_nnc_tensor_get_n(g->info); | ||||
234 | assert(g_n == ccv_max(o_n, b_n))((void) sizeof ((g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n ) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__ ({ if (g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n) _b = ( b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("g_n == ccv_max(o_n, b_n)" , "roi/ccv_nnc_roi_align_cpu_ref.c", 234, __extension__ __PRETTY_FUNCTION__ ); })); | ||||
235 | const int oninc = o_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : ostride[0]; | ||||
236 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | ||||
237 | ccv_nnc_tensor_view_get_stride(b, bstride); | ||||
238 | const int bninc = b_nd == 1 ? 0 : bstride[CCV_NNC_MAX_DIM(2) + 2 - b_nd]; | ||||
239 | const int gninc = g_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : gstride[0]; | ||||
240 | int bin_h, bin_w; | ||||
241 | roi_align_coeffs_t* y_coeffs; | ||||
242 | roi_align_coeffs_t* x_coeffs; | ||||
243 | int* bin_h_at_y; | ||||
244 | int* bin_w_at_x; | ||||
245 | int start_h, start_w, end_h, end_w; | ||||
246 | int n; | ||||
247 | ccv_nnc_tensor_zero(o); | ||||
248 | for (n = 0; n < g_n; n++) | ||||
249 | { | ||||
250 | const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1. | ||||
251 | const float roi_y = bp[(n % b_n) * bninc + 1] * h; | ||||
252 | const float roi_w = bp[(n % b_n) * bninc + 2] * w; | ||||
253 | const float roi_h = bp[(n % b_n) * bninc + 3] * h; | ||||
254 | // Re-compute the offsets if b changes or it is the first time. | ||||
255 | if ((b_n == 1 && n == 0) || b_n > 1) | ||||
256 | _ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w); | ||||
257 | const float* gpn = gp + n * gninc; | ||||
258 | float* const opn = op + (n % o_n) * oninc; | ||||
259 | int x, y, i, j, k; | ||||
260 | for (i = 0; i < pool_h; i++) | ||||
261 | { | ||||
262 | const int pi = i * bin_h; | ||||
263 | const int bin_hz = bin_h_at_y[i]; | ||||
264 | for (j = 0; j < pool_w; j++) | ||||
265 | { | ||||
266 | const int pj = j * bin_w; | ||||
267 | const int bin_wz = bin_w_at_x[j]; | ||||
268 | const float inv = 1.0 / (bin_hz * bin_wz); | ||||
269 | const float* const gpz = gpn + j * gstride[CCV_NNC_MAX_DIM(2)]; | ||||
270 | for (y = 0; y < bin_h; y++) | ||||
271 | { | ||||
272 | if (y_coeffs[pi + y].mute) | ||||
273 | continue; | ||||
274 | const float ry = y_coeffs[pi + y].r; | ||||
275 | const int iy0 = y_coeffs[pi + y].i0; | ||||
276 | const int iy1 = y_coeffs[pi + y].i1; | ||||
277 | for (x = 0; x < bin_w; x++) | ||||
278 | { | ||||
279 | if (x_coeffs[pj + x].mute) | ||||
280 | continue; | ||||
281 | const float rx = x_coeffs[pj + x].r; | ||||
282 | const int ix0 = x_coeffs[pj + x].i0; | ||||
283 | const int ix1 = x_coeffs[pj + x].i1; | ||||
284 | const float c00 = (1 - ry) * (1 - rx); | ||||
285 | const float c01 = (1 - ry) * rx; | ||||
286 | const float c10 = ry * (1 - rx); | ||||
287 | const float c11 = ry * rx; | ||||
288 | float* const op00 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)]; | ||||
289 | float* const op01 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)]; | ||||
290 | float* const op10 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)]; | ||||
291 | float* const op11 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)]; | ||||
292 | for (k = 0; k < ch; k++) | ||||
293 | { | ||||
294 | op00[k] += gpz[k] * c00 * inv; | ||||
295 | op01[k] += gpz[k] * c01 * inv; | ||||
296 | op10[k] += gpz[k] * c10 * inv; | ||||
297 | op11[k] += gpz[k] * c11 * inv; | ||||
298 | } | ||||
299 | } | ||||
300 | } | ||||
301 | } | ||||
302 | gpn += gstride[CCV_NNC_MAX_DIM(2) - 1]; | ||||
303 | } | ||||
304 | } | ||||
305 | return CCV_NNC_EXEC_SUCCESS; | ||||
306 | } | ||||
307 | |||||
308 | REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | ||||
309 | { | ||||
310 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; | ||||
311 | registry->tensor_datatypes = CCV_32F; | ||||
312 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | ||||
313 | registry->algorithms = 1; | ||||
314 | registry->exec = _ccv_nnc_roi_align_forw; | ||||
315 | } | ||||
316 | |||||
317 | REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | ||||
318 | { | ||||
319 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; | ||||
320 | registry->tensor_datatypes = CCV_32F; | ||||
321 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | ||||
322 | registry->algorithms = 1; | ||||
323 | registry->exec = _ccv_nnc_roi_align_back; | ||||
324 | } |
1 | /********************************************************** |
2 | * C-based/Cached/Core Computer Vision Library |
3 | * Liu Liu, 2010-02-01 |
4 | **********************************************************/ |
5 | |
6 | /********************************************************** |
7 | * CCV - Neural Network Collection |
8 | **********************************************************/ |
9 | |
10 | #ifndef GUARD_ccv_nnc_easy_h |
11 | #define GUARD_ccv_nnc_easy_h |
12 | |
13 | #include "ccv.h" |
14 | #include "ccv_internal.h" |
15 | #include "nnc/ccv_nnc.h" |
16 | #ifdef HAVE_MPS |
17 | #ifdef __APPLE__ |
18 | #include "TargetConditionals.h" |
19 | #if !TARGET_OS_IPHONE && !TARGET_IPHONE_SIMULATOR |
20 | #include <mach/mach_vm.h> |
21 | #else |
22 | #define PAGE_SIZE (16384) |
23 | #endif |
24 | #endif |
25 | #endif |
26 | |
27 | /** |
28 | * Convenience API |
29 | * |
30 | * This header provides convenience APIs for nnc usage. Being convenience API, |
31 | * it is optimized for shorthand coding, and may collide the naming space with |
32 | * others. |
33 | * |
34 | */ |
35 | // c99 only, make sure your compiler supports that. |
36 | |
37 | #define NOOP_GRAPH_WHILE_EXPR(ccv_nnc_graph_while_f)(1) (ccv_nnc_graph_while_f)(1) |
38 | #define NOOP_GRAPH_CASE_OF_EXPR(ccv_nnc_graph_case_of_f)(1) (ccv_nnc_graph_case_of_f)(1) |
39 | |
40 | // This is a better LIST_COUNT macro, it generates a list of 1+1+0+0+0 where it is 1 if the parameter presents, and 0 otherwise. |
41 | // This works better for cases such as LIST_COUNT(1, 2, 3,) where previous macro will get 4 and this one will have correctly |
42 | // computed result. |
43 | #define LIST_COUNT_01(_0,_1,_2,...)_2 _2 |
44 | #define LIST_COUNT_E(...)1 LIST_COUNT_01(_0,##__VA_ARGS__,1,0)1 |
45 | #define LIST_COUNT_N(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12,_13,_14,_15,_16,_17,_18,_19,_20,_21,_22,_23,_24,_25,_26,_27,_28,_29,_30,_31,_32,_33,_34,_35,_36,_37,_38,_39,_40,_41,_42,_43,_44,_45,_46,_47,_48,_49,_50,_51,_52,_53,_54,_55,_56,_57,_58,_59,_60,_61,_62,_63,...)(1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 + 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 + 1 +1 +1 -1) (LIST_COUNT_E(_0)1+LIST_COUNT_E(_1)1+LIST_COUNT_E(_2)1+LIST_COUNT_E(_3)1+LIST_COUNT_E(_4)1+LIST_COUNT_E(_5)1+LIST_COUNT_E(_6)1+LIST_COUNT_E(_7)1+LIST_COUNT_E(_8)1+LIST_COUNT_E(_9)1+LIST_COUNT_E(_10)1+LIST_COUNT_E(_11)1+LIST_COUNT_E(_12)1+LIST_COUNT_E(_13)1+LIST_COUNT_E(_14)1+LIST_COUNT_E(_15)1+LIST_COUNT_E(_16)1+LIST_COUNT_E(_17)1+LIST_COUNT_E(_18)1+LIST_COUNT_E(_19)1+LIST_COUNT_E(_20)1+LIST_COUNT_E(_21)1+LIST_COUNT_E(_22)1+LIST_COUNT_E(_23)1+LIST_COUNT_E(_24)1+LIST_COUNT_E(_25)1+LIST_COUNT_E(_26)1+LIST_COUNT_E(_27)1+LIST_COUNT_E(_28)1+LIST_COUNT_E(_29)1+LIST_COUNT_E(_30)1+LIST_COUNT_E(_31)1+LIST_COUNT_E(_32)1+LIST_COUNT_E(_33)1+LIST_COUNT_E(_34)1+LIST_COUNT_E(_35)1+LIST_COUNT_E(_36)1+LIST_COUNT_E(_37)1+LIST_COUNT_E(_38)1+LIST_COUNT_E(_39)1+LIST_COUNT_E(_40)1+LIST_COUNT_E(_41)1+LIST_COUNT_E(_42)1+LIST_COUNT_E(_43)1+LIST_COUNT_E(_44)1+LIST_COUNT_E(_45)1+LIST_COUNT_E(_46)1+LIST_COUNT_E(_47)1+LIST_COUNT_E(_48)1+LIST_COUNT_E(_49)1+LIST_COUNT_E(_50)1+LIST_COUNT_E(_51)1+LIST_COUNT_E(_52)1+LIST_COUNT_E(_53)1+LIST_COUNT_E(_54)1+LIST_COUNT_E(_55)1+LIST_COUNT_E(_56)1+LIST_COUNT_E(_57)1+LIST_COUNT_E(_58)1+LIST_COUNT_E(_59)1+LIST_COUNT_E(_60)1+LIST_COUNT_E(_61)1+LIST_COUNT_E(_62)1+LIST_COUNT_E(_63)1-1) |
46 | #define LIST_COUNT(...)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) LIST_COUNT_N(_0,##__VA_ARGS__,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
47 | |
48 | #define LIST_X(_type, ...)(_type []){...} (_type []){__VA_ARGS__} |
49 | |
50 | #define KV_X_2(_x, _y, ...){(_x), (_y)} {(_x), (_y)} |
51 | #define KV_X_1(_x, ...){(_x)} {(_x)} |
52 | #define KV_X_SEL(_1, _2, _FX, ...)_FX _FX |
53 | #define KV(...){(...)} KV_X_SEL(__VA_ARGS__, KV_X_2, KV_X_1)(__VA_ARGS__){(__VA_ARGS__)} |
54 | |
55 | #define LIST_SIZEOF_COUNT(_type, ...)(sizeof((_type []){...}) / sizeof(_type)) (sizeof(LIST_X(_type, __VA_ARGS__)(_type []){__VA_ARGS__}) / sizeof(_type)) |
56 | |
57 | /** |
58 | * @defgroup convenience_api Convenience API |
59 | * @{ |
60 | */ |
61 | /** |
62 | * Pass a list of tensors to NNC functions that accepts (tensor array, tensor array size). |
63 | * This method effectively gives two parameters as one. |
64 | */ |
65 | #define TENSOR_LIST(...)(ccv_nnc_tensor_t* []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(ccv_nnc_tensor_t*, __VA_ARGS__)(ccv_nnc_tensor_t* []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
66 | /** |
67 | * Pass a list of tensor parameters to NNC functions that accepts (parameter array, parameter array size). |
68 | * This method effectively gives two parameters as one. |
69 | */ |
70 | #define TENSOR_PARAM_LIST(...)(const ccv_nnc_tensor_param_t []){...}, (1 +1 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_tensor_param_t, __VA_ARGS__)(const ccv_nnc_tensor_param_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
71 | /** |
72 | * This represents a tensor symbol that is empty (tensor = nil) |
73 | */ |
74 | #define NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL } (const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL} |
75 | /** |
76 | * This represents a graph exec symbol that is empty (exec = nil) |
77 | */ |
78 | #define NO_GRAPH_EXEC_SYMBOL(const ccv_nnc_graph_exec_symbol_t){.d = CCV_NNC_NO_GRAPH_EXEC_SYMBOL } (const ccv_nnc_graph_exec_symbol_t){.d = CCV_NNC_NO_GRAPH_EXEC_SYMBOL} |
79 | /** |
80 | * Pass a list of tensor symbols to NNC functions that accepts (tensor symbol array, tensor symbol array size). |
81 | * This method effectively gives two parameters as one. |
82 | */ |
83 | #define TENSOR_SYMBOL_LIST(...)(const ccv_nnc_tensor_symbol_t []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_tensor_symbol_t, __VA_ARGS__)(const ccv_nnc_tensor_symbol_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
84 | /** |
85 | * Pass a list of tensor variables to NNC functions that accepts (tensor variable array, tensor variable array size). |
86 | * This method effectively gives two parameters as one. |
87 | */ |
88 | #define TENSOR_VARIABLE_LIST(...)(ccv_nnc_tensor_variable_t []){...}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(ccv_nnc_tensor_variable_t, __VA_ARGS__)(ccv_nnc_tensor_variable_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
89 | /** |
90 | * Pass a list of tensor bindings to NNC functions that accepts (tensor binding array, tensor binding array size). |
91 | * This method effectively gives two parameters as one. Since tensor binding requires two: symbol and a tensor, |
92 | * you should use this like: TENSOR_BIND_MAP(KV(symbol1, tensor1), KV(symbol2, tensor2)). |
93 | */ |
94 | #define TENSOR_BIND_MAP(...)(const ccv_nnc_tensor_bind_t []){...}, (sizeof((ccv_nnc_tensor_bind_t []){...}) / sizeof(ccv_nnc_tensor_bind_t)) LIST_X(const ccv_nnc_tensor_bind_t, __VA_ARGS__)(const ccv_nnc_tensor_bind_t []){__VA_ARGS__}, LIST_SIZEOF_COUNT(ccv_nnc_tensor_bind_t, __VA_ARGS__)(sizeof((ccv_nnc_tensor_bind_t []){__VA_ARGS__}) / sizeof(ccv_nnc_tensor_bind_t )) |
95 | /** |
96 | * Pass a list of tensor symbol pairs to NNC functions that accepts (tensor symbol pair array, tensor symbol pair array size). |
97 | * This method effectively gives two parameters as one. Since tensor symbol pair requires two: source symbol and destination symbol, |
98 | * you should use this like: TENSOR_SYMBOL_MAP(KV(symbol1, symbol2), KV(symbol3, symbol4)). |
99 | */ |
100 | #define TENSOR_SYMBOL_MAP(...)(const ccv_nnc_tensor_symbol_map_t []){...}, (sizeof((ccv_nnc_tensor_symbol_map_t []){...}) / sizeof(ccv_nnc_tensor_symbol_map_t)) LIST_X(const ccv_nnc_tensor_symbol_map_t, __VA_ARGS__)(const ccv_nnc_tensor_symbol_map_t []){__VA_ARGS__}, LIST_SIZEOF_COUNT(ccv_nnc_tensor_symbol_map_t, __VA_ARGS__)(sizeof((ccv_nnc_tensor_symbol_map_t []){__VA_ARGS__}) / sizeof (ccv_nnc_tensor_symbol_map_t)) |
101 | /** |
102 | * Pass a list of execution nodes to NNC functions that accepts (execution node array, execution node array size). |
103 | * This method effectively gives two parameters as one. |
104 | */ |
105 | #define GRAPH_EXEC_LIST(...)(const ccv_nnc_graph_exec_t []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_graph_exec_t, __VA_ARGS__)(const ccv_nnc_graph_exec_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
106 | /** |
107 | * Pass a list of execution node symbols to NNC functions that accepts (execution node symbol array, execution node symbol array size). |
108 | * This method effectively gives two parameters as one. |
109 | */ |
110 | #define GRAPH_EXEC_SYMBOL_LIST(...)(const ccv_nnc_graph_exec_symbol_t []){...}, (1 +1 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_graph_exec_symbol_t, __VA_ARGS__)(const ccv_nnc_graph_exec_symbol_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
111 | /** |
112 | * Pass both default sources and default sources size to function that accepts (sources, source size). |
113 | * @param x A given symbolic graph. |
114 | */ |
115 | #define SYMBOLIC_GRAPH_SOURCES(x)ccv_nnc_symbolic_graph_sources(x), ccv_nnc_symbolic_graph_source_size (x) ccv_nnc_symbolic_graph_sources(x), ccv_nnc_symbolic_graph_source_size(x) |
116 | /** |
117 | * Pass both default destinations and default destinations size to function that accepts (destinations, destination size). |
118 | * @param x A given symbolic graph. |
119 | */ |
120 | #define SYMBOLIC_GRAPH_DESTINATIONS(x)ccv_nnc_symbolic_graph_destinations(x), ccv_nnc_symbolic_graph_destination_size (x) ccv_nnc_symbolic_graph_destinations(x), ccv_nnc_symbolic_graph_destination_size(x) |
121 | /** |
122 | * Pass a list of simplification passes to NNC functions that accepts (pass array, pass array size). |
123 | * This method effectively gives two parameters as one. |
124 | */ |
125 | #define SYMBOLIC_GRAPH_PASSES(...)(const int []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const int, __VA_ARGS__)(const int []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
126 | /** |
127 | * Pass a list of CNNP models to NNC functions that accepts (model array, model array size). |
128 | * This method effectively gives two parameters as one. |
129 | */ |
130 | #define MODEL_LIST(...)(ccv_cnnp_model_t* []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(ccv_cnnp_model_t*, __VA_ARGS__)(ccv_cnnp_model_t* []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
131 | /** |
132 | * Pass a list of CNNP model IOs to NNC functions that accepts (model IO array, model IO array size). |
133 | * This method effectively gives two parameters as one. |
134 | */ |
135 | #define MODEL_IO_LIST(...)(const ccv_cnnp_model_io_t []){...}, (1 +1 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_cnnp_model_io_t, __VA_ARGS__)(const ccv_cnnp_model_io_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
136 | /** |
137 | * Pass a list of CNNP tensor params to ccv_cnnp_cmd_exec which accepts (tensor params array, tensor params array size). |
138 | * This method effectively gives two parameters as one. |
139 | */ |
140 | #define MODEL_CMD_EXEC_IO_MAP(...)(const ccv_cnnp_cmd_exec_io_t []){...}, (sizeof((ccv_cnnp_cmd_exec_io_t []){...}) / sizeof(ccv_cnnp_cmd_exec_io_t)) LIST_X(const ccv_cnnp_cmd_exec_io_t, __VA_ARGS__)(const ccv_cnnp_cmd_exec_io_t []){__VA_ARGS__}, LIST_SIZEOF_COUNT(ccv_cnnp_cmd_exec_io_t, __VA_ARGS__)(sizeof((ccv_cnnp_cmd_exec_io_t []){__VA_ARGS__}) / sizeof(ccv_cnnp_cmd_exec_io_t )) |
141 | /** |
142 | * Pass a list of CNNP tensor type to ccv_cnnp_cmd_exec which accepts (tensor type array, tensor type array size). |
143 | * This method effectively gives two parameters as one. |
144 | */ |
145 | #define MODEL_CMD_EXEC_IO_LIST(...)(const int []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const int, __VA_ARGS__)(const int []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
146 | /** |
147 | * Pass a list of dataframe column ids to iteration function that accepts (column id array, column id array size). |
148 | * This method effectively gives two parameters as one. |
149 | */ |
150 | #define COLUMN_ID_LIST(...)(const int []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const int, __VA_ARGS__)(const int []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1) |
151 | |
152 | #define TRAVERSE_FULL0,0,0,0 0,0,0,0 |
153 | |
154 | #define ALL_PARAMETERS-1 -1 |
155 | |
156 | // We will support NUMA allocation on CPU in the future. Currently, this is not very meaningful (except enforce no memory reuse between tensors). |
157 | #define CPU_NUMA_TENSOR_NHWC(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
158 | #define CPU_NUMA_TENSOR_NCHW(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
159 | #define CPU_NUMA_TENSOR_CHWN(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
160 | #define CPU_TENSOR_NHWC(dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_dt,.dim={...}}) CPU_NUMA_TENSOR_NHWC(ANY, dt, __VA_ARGS__)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_dt,.dim={__VA_ARGS__ }}) |
161 | #define CPU_TENSOR_NCHW(dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_dt,.dim={...}}) CPU_NUMA_TENSOR_NCHW(ANY, dt, __VA_ARGS__)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_dt,.dim={__VA_ARGS__ }}) |
162 | #define CPU_TENSOR_CHWN(dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_dt,.dim={...}}) CPU_NUMA_TENSOR_CHWN(ANY, dt, __VA_ARGS__)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_dt,.dim={__VA_ARGS__ }}) |
163 | // This way, we can do error check on the device type :) |
164 | #define GPU_TENSOR_NHWC(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
165 | #define GPU_TENSOR_NCHW(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
166 | #define GPU_TENSOR_CHWN(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id ) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype =CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_##dt,.dim={__VA_ARGS__}}) |
167 | /** @} */ |
168 | |
169 | #define DIM_ALLOC(...)(int [(12)]){...} (int [CCV_NNC_MAX_DIM_ALLOC(12)]){__VA_ARGS__} |
170 | |
171 | #define ESCAPE_X(...)... __VA_ARGS__ |
172 | #define HINT_X_1(_stride_)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border ={.begin={0},.end={0}}}) ((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border={.begin={0},.end={0}}}) |
173 | #define HINT_X_2(_stride_, _border_)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border ={.begin={ESCAPE_X _border_},.end={ESCAPE_X _border_}}}) ((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border={.begin={ESCAPE_X _border_},.end={ESCAPE_X _border_}}}) |
174 | #define HINT_X_3(_stride_, _begin_, _end_)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border ={.begin={ESCAPE_X _begin_},.end={ESCAPE_X _end_}}}) ((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border={.begin={ESCAPE_X _begin_},.end={ESCAPE_X _end_}}}) |
175 | #define HINT_X_SEL(_1, _2, _3, _FX, ...)_FX _FX |
176 | /** |
177 | * @ingroup convenience_api |
178 | * Simpler method to create hint. |
179 | * HINT(stride), HINT(stride, border), HINT(stride, border begin, border end) |
180 | */ |
181 | #define HINT(...)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X ...}}, .border={.begin ={0},.end={0}}}) HINT_X_SEL(__VA_ARGS__, HINT_X_3, HINT_X_2, HINT_X_1)(__VA_ARGS__)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X __VA_ARGS__}}, .border ={.begin={0},.end={0}}}) |
182 | |
183 | static inline size_t ccv_nnc_dimension_count(const int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
184 | { |
185 | if (dim[0] == 0) |
186 | return 0; |
187 | int i; |
188 | size_t count = dim[0]; |
189 | for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && dim[i] > 0; i++) |
190 | count *= dim[i]; |
191 | return count; |
192 | } |
193 | |
194 | static inline size_t ccv_nnc_dimension_upper_bound(const int dim[CCV_NNC_MAX_DIM_ALLOC(12)], const int stride[CCV_NNC_MAX_DIM_ALLOC(12)]) |
195 | { |
196 | if (dim[0] == 0 || stride[0] == 0) |
197 | return 0; |
198 | int i; |
199 | size_t count = 1 + (dim[0] - 1) * stride[0]; |
200 | for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && dim[i] > 0 && stride[i] > 0; i++) |
201 | count += (dim[i] - 1) * stride[i]; |
202 | return count; |
203 | } |
204 | |
205 | static inline size_t ccv_nnc_tensor_count(const ccv_nnc_tensor_param_t params) |
206 | { |
207 | return ccv_nnc_dimension_count(params.dim); |
208 | } |
209 | |
210 | static inline ccv_nnc_tensor_param_t ccv_nnc_tensor_palettize(const ccv_nnc_tensor_param_t params, const int qbits, const int number_in_blocks) |
211 | { |
212 | assert(params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F)((void) sizeof ((params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F) ? 1 : 0), __extension__ ({ if (params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F) ; else __assert_fail ("params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F" , "../../nnc/ccv_nnc_easy.h", 212, __extension__ __PRETTY_FUNCTION__ ); })); |
213 | ccv_nnc_tensor_param_t new_params = params; |
214 | assert(qbits >= 4 && qbits <= 8)((void) sizeof ((qbits >= 4 && qbits <= 8) ? 1 : 0), __extension__ ({ if (qbits >= 4 && qbits <= 8) ; else __assert_fail ("qbits >= 4 && qbits <= 8" , "../../nnc/ccv_nnc_easy.h", 214, __extension__ __PRETTY_FUNCTION__ ); })); |
215 | new_params.datatype = ((params.datatype >> 12) & 0xff) | CCV_QX | ((qbits << 8) & 0xf00); |
216 | new_params.reserved = number_in_blocks; |
217 | return new_params; |
218 | } |
219 | |
220 | static inline size_t ccv_nnc_tensor_data_size_without_padding(const ccv_nnc_tensor_param_t params) |
221 | { |
222 | const ssize_t count = (ssize_t)ccv_nnc_tensor_count(params); |
223 | ssize_t data_size; |
224 | if (CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) == CCV_QX) |
225 | { |
226 | // Our QX right now only does palettization. Hence, we need to get the palette datatype. |
227 | const int palette_datatype = (params.datatype & 0xff) << 12; |
228 | const int number_in_blocks = params.reserved; |
229 | const int num_blocks = (int)((count + number_in_blocks - 1) / number_in_blocks); |
230 | const int qbits = (params.datatype & 0xf00) >> 8; |
231 | assert(qbits >= 4 && qbits <= 8)((void) sizeof ((qbits >= 4 && qbits <= 8) ? 1 : 0), __extension__ ({ if (qbits >= 4 && qbits <= 8) ; else __assert_fail ("qbits >= 4 && qbits <= 8" , "../../nnc/ccv_nnc_easy.h", 231, __extension__ __PRETTY_FUNCTION__ ); })); |
232 | data_size = (ssize_t)(1 << qbits) * CCV_GET_DATA_TYPE_SIZE(palette_datatype)_ccv_get_data_type_size[((palette_datatype) & 0xFF000) >> 12] * num_blocks + (count * qbits + 7) / 8; |
233 | } else |
234 | data_size = CCV_GET_DATA_TYPE_SIZE(params.datatype)_ccv_get_data_type_size[((params.datatype) & 0xFF000) >> 12] * count; |
235 | return data_size; |
236 | } |
237 | |
238 | static inline size_t ccv_nnc_tensor_data_size(const ccv_nnc_tensor_param_t params) |
239 | { |
240 | ssize_t data_size = ccv_nnc_tensor_data_size_without_padding(params); |
241 | #ifdef HAVE_CUDA1 // For CUDA, we align to 128-bytes. |
242 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
243 | return ((data_size + 127) & -128); |
244 | else |
245 | #elif defined(HAVE_MPS) // For MPS, we have to align to PAGE_SIZE. |
246 | if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
247 | return ((data_size + PAGE_SIZE - 1) & -PAGE_SIZE); |
248 | else |
249 | #endif |
250 | return ((data_size + 63) & -64); |
251 | } |
252 | |
253 | static inline void ccv_nnc_tensor_view_get_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
254 | { |
255 | int x; |
256 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
257 | const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd; |
258 | for (x = 0; x < offset; x++) |
259 | dim[x] = 1; |
260 | for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++) |
261 | dim[x] = tv->info.dim[x - offset]; |
262 | dim[CCV_NNC_MAX_DIM(2) + 2] = 0; |
263 | } |
264 | |
265 | static inline CCV_WARN_UNUSED(int)int __attribute__((warn_unused_result)) ccv_nnc_is_tensor_stride_packed(const int stride[CCV_NNC_MAX_DIM_ALLOC(12)], const int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
266 | { |
267 | const int nd = ccv_nnc_tensor_nd(stride); |
268 | int i; |
269 | int cstride = 1; |
270 | for (i = nd - 1; i >= 0; i--) |
271 | { |
272 | if (stride[i] != cstride) |
273 | return 0; |
274 | cstride *= dim[i]; |
275 | } |
276 | return 1; |
277 | } |
278 | |
279 | static inline CCV_WARN_UNUSED(int)int __attribute__((warn_unused_result)) ccv_nnc_tensor_view_check_dim(const ccv_nnc_tensor_view_t* const tv, const int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
280 | { |
281 | int x; |
282 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
283 | const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd; |
284 | for (x = 0; x < offset; x++) |
285 | if (dim[x] != 1) |
286 | return 0; |
287 | for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++) |
288 | if (dim[x] != tv->info.dim[x - offset]) |
289 | return 0; |
290 | return 1; |
291 | } |
292 | |
293 | static inline void ccv_nnc_tensor_view_get_broadcast_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
294 | { |
295 | int x; |
296 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
297 | const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd; |
298 | for (x = 0; x < offset; x++) |
299 | dim[x] = ccv_max(1, dim[x])({ typeof (1) _a = (1); typeof (dim[x]) _b = (dim[x]); (_a > _b) ? _a : _b; }); |
300 | for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++) |
301 | dim[x] = ccv_max(dim[x], tv->info.dim[x - offset])({ typeof (dim[x]) _a = (dim[x]); typeof (tv->info.dim[x - offset]) _b = (tv->info.dim[x - offset]); (_a > _b) ? _a : _b; }); |
302 | } |
303 | |
304 | static inline CCV_WARN_UNUSED(int)int __attribute__((warn_unused_result)) ccv_nnc_tensor_view_check_broadcast_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC(12)]) |
305 | { |
306 | int x; |
307 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
308 | const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd; |
309 | for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++) |
310 | if (dim[x] != tv->info.dim[x - offset] && tv->info.dim[x - offset] != 1) |
311 | return 0; |
312 | return 1; |
313 | } |
314 | |
315 | static inline void ccv_nnc_tensor_view_get_stride(const ccv_nnc_tensor_view_t* const tv, int stride[CCV_NNC_MAX_DIM_ALLOC(12)]) |
316 | { |
317 | int x; |
318 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
319 | const int offset = ccv_max(CCV_NNC_MAX_DIM + 2 - nd, 0)({ typeof ((2) + 2 - nd) _a = ((2) + 2 - nd); typeof (0) _b = (0); (_a > _b) ? _a : _b; }); |
320 | stride[CCV_NNC_MAX_DIM(2) + 2] = 0; |
321 | if (CCV_IS_TENSOR_VIEW(tv)((*(int*)(tv)) & CCV_TENSOR_VIEW)) |
322 | { |
323 | for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++) |
324 | stride[x] = tv->stride[x - offset]; |
325 | for (x = 0; x < offset; x++) |
326 | stride[x] = stride[offset]; |
327 | } else { |
328 | int cstride = 1; |
329 | for (x = CCV_NNC_MAX_DIM(2) + 1; x >= offset; x--) |
330 | { |
331 | stride[x] = cstride; |
332 | cstride *= tv->info.dim[x - offset]; |
333 | } |
334 | for (x = 0; x < offset; x++) |
335 | stride[x] = cstride; |
336 | } |
337 | } |
338 | |
339 | static inline int ccv_nnc_tensor_get_n(const ccv_nnc_tensor_param_t params) |
340 | { |
341 | switch (params.format) |
342 | { |
343 | case CCV_TENSOR_FORMAT_NHWC: |
344 | case CCV_TENSOR_FORMAT_NCHW: |
345 | if (ccv_nnc_tensor_nd(params.dim) == CCV_NNC_MAX_DIM(2) + 1) |
346 | return 1; |
347 | else |
348 | return params.dim[0]; |
349 | case CCV_TENSOR_FORMAT_CHWN: |
350 | return params.dim[CCV_NNC_MAX_DIM(2) + 1]; |
351 | } |
352 | return 0; |
353 | } |
354 | |
355 | static inline int ccv_nnc_tensor_get_c(const ccv_nnc_tensor_param_t params) |
356 | { |
357 | const int nd = ccv_nnc_tensor_nd(params.dim); |
358 | switch (params.format) |
359 | { |
360 | case CCV_TENSOR_FORMAT_NHWC: |
361 | return params.dim[nd - 1]; |
362 | case CCV_TENSOR_FORMAT_NCHW: |
363 | if (nd == CCV_NNC_MAX_DIM(2) + 1) |
364 | return params.dim[0]; |
365 | else |
366 | return params.dim[nd <= 1 ? 0 : 1]; |
367 | case CCV_TENSOR_FORMAT_CHWN: |
368 | return params.dim[0]; |
369 | } |
370 | return 0; |
371 | } |
372 | |
373 | static inline void ccv_nnc_tensor_set_n(ccv_nnc_tensor_param_t* const params, const int n) |
374 | { |
375 | switch (params->format) |
376 | { |
377 | case CCV_TENSOR_FORMAT_NHWC: |
378 | case CCV_TENSOR_FORMAT_NCHW: |
379 | params->dim[0] = n; |
380 | break; |
381 | case CCV_TENSOR_FORMAT_CHWN: |
382 | params->dim[CCV_NNC_MAX_DIM(2) + 1] = n; |
383 | break; |
384 | } |
385 | } |
386 | |
387 | static inline void ccv_nnc_tensor_set_c(ccv_nnc_tensor_param_t* const params, const int nd, const int c) |
388 | { |
389 | switch (params->format) |
390 | { |
391 | case CCV_TENSOR_FORMAT_NHWC: |
392 | params->dim[nd - 1] = c; |
393 | break; |
394 | case CCV_TENSOR_FORMAT_NCHW: |
395 | if (nd == CCV_NNC_MAX_DIM(2) + 1) |
396 | params->dim[0] = c; |
397 | else |
398 | params->dim[nd <= 1 ? 0 : 1] = c; |
399 | break; |
400 | case CCV_TENSOR_FORMAT_CHWN: |
401 | params->dim[0] = c; |
402 | break; |
403 | } |
404 | } |
405 | |
406 | static inline int ccv_nnc_is_matrix_transpose(const ccv_nnc_tensor_param_t params, const int transpose[2]) |
407 | { |
408 | const int nd = ccv_nnc_tensor_nd(params.dim); |
409 | assert(nd >= 1)((void) sizeof ((nd >= 1) ? 1 : 0), __extension__ ({ if (nd >= 1) ; else __assert_fail ("nd >= 1", "../../nnc/ccv_nnc_easy.h" , 409, __extension__ __PRETTY_FUNCTION__); })); |
410 | if (transpose[0] != transpose[1]) |
411 | { |
412 | assert(nd > 1)((void) sizeof ((nd > 1) ? 1 : 0), __extension__ ({ if (nd > 1) ; else __assert_fail ("nd > 1", "../../nnc/ccv_nnc_easy.h" , 412, __extension__ __PRETTY_FUNCTION__); })); |
413 | assert(((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) ||((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1 ] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == (( nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose [0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ( (nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))" , "../../nnc/ccv_nnc_easy.h", 414, __extension__ __PRETTY_FUNCTION__ ); })) |
414 | ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1))))((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1 ] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == (( nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose [0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ( (nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))" , "../../nnc/ccv_nnc_easy.h", 414, __extension__ __PRETTY_FUNCTION__ ); })); |
415 | return 1; |
416 | } |
417 | return 0; |
418 | } |
419 | |
420 | // Assuming this is batched matrix. Getting relevant parameters. |
421 | static inline void ccv_nnc_tensor_get_matrix_params(const ccv_nnc_tensor_param_t params, const int* const stride, const int* const dim, const int transpose[2], int* const batch_size_ref, int* const rows_ref, int* const cols_ref, int* const batch_inc_ref, int* const rows_inc_ref, int* const cols_inc_ref) |
422 | { |
423 | const int nd = ccv_nnc_tensor_nd(params.dim); |
424 | assert(nd >= 1)((void) sizeof ((nd >= 1) ? 1 : 0), __extension__ ({ if (nd >= 1) ; else __assert_fail ("nd >= 1", "../../nnc/ccv_nnc_easy.h" , 424, __extension__ __PRETTY_FUNCTION__); })); |
425 | *batch_size_ref = nd < 3 ? 1 : params.dim[nd - 3]; |
426 | *batch_inc_ref = nd < 3 ? 0 : stride ? stride[nd - 3] : dim[nd - 2] * dim[nd - 1]; |
427 | int rows = nd == 1 ? 1 : (nd == 2 ? params.dim[0] : params.dim[nd - 2]); |
428 | int rows_inc = stride ? (nd >= 2 ? stride[nd - 2] : stride[0] * dim[0]) : dim[nd - 1]; |
429 | int cols = params.dim[nd - 1]; |
430 | int cols_inc = 1; |
431 | if (transpose[0] != transpose[1]) |
432 | { |
433 | assert(nd > 1)((void) sizeof ((nd > 1) ? 1 : 0), __extension__ ({ if (nd > 1) ; else __assert_fail ("nd > 1", "../../nnc/ccv_nnc_easy.h" , 433, __extension__ __PRETTY_FUNCTION__); })); |
434 | assert(((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) ||((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1 ] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == (( nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose [0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ( (nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))" , "../../nnc/ccv_nnc_easy.h", 435, __extension__ __PRETTY_FUNCTION__ ); })) |
435 | ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1))))((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1 ] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == (( nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose [0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ( (nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))" , "../../nnc/ccv_nnc_easy.h", 435, __extension__ __PRETTY_FUNCTION__ ); })); |
436 | int t; |
437 | CCV_SWAP(rows, cols, t)((t) = (rows), (rows) = (cols), (cols) = (t)); |
438 | CCV_SWAP(rows_inc, cols_inc, t)((t) = (rows_inc), (rows_inc) = (cols_inc), (cols_inc) = (t)); |
439 | } |
440 | *rows_ref = rows; |
441 | *cols_ref = cols; |
442 | *rows_inc_ref = rows_inc; |
443 | *cols_inc_ref = cols_inc; |
444 | } |
445 | |
446 | static inline CCV_WARN_UNUSED(ccv_nnc_tensor_view_t)ccv_nnc_tensor_view_t __attribute__((warn_unused_result)) ccv_nnc_get_tensor_view(const ccv_nnc_tensor_t* const tensor) |
447 | { |
448 | if (CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW)) |
449 | return (ccv_nnc_tensor_view_t)*(const ccv_nnc_tensor_view_t*)tensor; |
450 | ccv_nnc_tensor_view_t tv; |
451 | memcpy(&tv, tensor, sizeof(ccv_nnc_tensor_t)); |
452 | return tv; |
453 | } |
454 | |
455 | static inline void ccv_nnc_tensor_view_alignment(ccv_nnc_tensor_view_t** const tvs, const int tv_size) |
456 | { |
457 | int i, j; |
458 | int max_nd = 0; |
459 | for (i = 0; i < tv_size; i++) |
460 | max_nd = ccv_max(ccv_nnc_tensor_nd(tvs[i]->info.dim), max_nd)({ typeof (ccv_nnc_tensor_nd(tvs[i]->info.dim)) _a = (ccv_nnc_tensor_nd (tvs[i]->info.dim)); typeof (max_nd) _b = (max_nd); (_a > _b) ? _a : _b; }); |
461 | for (i = 0; i < tv_size; i++) |
462 | { |
463 | const int nd = ccv_nnc_tensor_nd(tvs[i]->info.dim); |
464 | for (j = max_nd - 1; j >= max_nd - nd; j--) |
465 | tvs[i]->info.dim[j] = tvs[i]->info.dim[j - max_nd + nd]; |
466 | for (j = 0; j < max_nd - nd; j++) |
467 | tvs[i]->info.dim[j] = 1; |
468 | if (!CCV_IS_TENSOR_VIEW(tvs[i])((*(int*)(tvs[i])) & CCV_TENSOR_VIEW)) |
469 | continue; |
470 | for (j = max_nd - 1; j >= max_nd - nd; j--) |
471 | tvs[i]->stride[j] = tvs[i]->stride[j - max_nd + nd]; |
472 | for (j = 0; j < max_nd - nd; j++) |
473 | tvs[i]->stride[j] = tvs[i]->stride[max_nd - nd]; |
474 | } |
475 | } |
476 | |
477 | |
478 | #define TRANSPOSE(_X, _Y)((int[]){(_X),(_Y)}) ((int[]){(_X),(_Y)}) |
479 | #define NO_TRANSPOSE((int[]){(0),(0)}) TRANSPOSE(0, 0)((int[]){(0),(0)}) |
480 | #define CMD_GEMM_X(_0, _TA, _TB, ...)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},. transpose_a={_TA[0],_TA[1]},.transpose_b={_TB[0],_TB[1]},}}) ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},.transpose_a={_TA[0],_TA[1]},.transpose_b={_TB[0],_TB[1]},}}) // We default to alpha = 1 and beta = 1 |
481 | #define CMD_GEMM(...)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},. transpose_a={...[0],...[1]},.transpose_b={((int[]){(0),(0)})[ 0],((int[]){(0),(0)})[1]},}}) CMD_GEMM_X(_0, ##__VA_ARGS__, NO_TRANSPOSE, NO_TRANSPOSE)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},. transpose_a={##__VA_ARGS__[0],##__VA_ARGS__[1]},.transpose_b= {((int[]){(0),(0)})[0],((int[]){(0),(0)})[1]},}}) |
482 | #define CMD_GENERIC_X_0()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}) ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}) |
483 | #define CMD_GENERIC_X_F(...)("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC" ) ("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC") |
484 | #define CMD_GENERIC_X_3(...)((ccv_nnc_cmd_param_t){.size={.dim={...}}}) ((ccv_nnc_cmd_param_t){.size={.dim={__VA_ARGS__}}}) |
485 | #define CMD_GENERIC_X_SEL(_0, _1, _2, _3, _FX, ...)_FX _FX |
486 | // Using ## so that if it is empty, we omit one comma. |
487 | #define CMD_GENERIC(...)("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC" ) CMD_GENERIC_X_SEL(CMD_GENERIC_X_F, ##__VA_ARGS__, CMD_GENERIC_X_3, CMD_GENERIC_X_F, CMD_GENERIC_X_F, CMD_GENERIC_X_0)(__VA_ARGS__)("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC" ) |
488 | #define CMD_REDUCE(...)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.reduce={.count=( 1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1),.axis={...}}}) ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.reduce={.count=LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 -1),.axis={__VA_ARGS__}}}) |
489 | /** |
490 | * @defgroup available_commands Available Commands |
491 | * @{ |
492 | */ |
493 | #define CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0) ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0) |
494 | #define CMD_CUSTOM_FORWARD(f)ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, f, ccv_nnc_cmd_auto, 0) ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, f, ccv_nnc_cmd_auto, 0) |
495 | /** @} */ |
496 | |
497 | int ccv_nnc_is_no_hint(const ccv_nnc_hint_t hint); |
498 | int ccv_nnc_is_cmd_auto(const ccv_nnc_cmd_param_t params); |
499 | int ccv_nnc_is_tensor_auto(const ccv_nnc_tensor_param_t params); |
500 | |
501 | /** |
502 | * @addtogroup convenience_api |
503 | * @{ |
504 | */ |
505 | /** |
506 | * Offsets all zero. |
507 | */ |
508 | extern const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC(12)]; |
509 | /** |
510 | * No hint available. |
511 | */ |
512 | extern const ccv_nnc_hint_t ccv_nnc_no_hint; |
513 | /** |
514 | * The default symbolic graph compile parameters. |
515 | */ |
516 | extern const ccv_nnc_symbolic_graph_compile_param_t ccv_nnc_default_compile_params; |
517 | /** |
518 | * Derive the command parameters automatically if possible. |
519 | */ |
520 | extern const ccv_nnc_cmd_param_t ccv_nnc_cmd_auto; |
521 | /** |
522 | * Derive the tensor parameters automatically if possible. |
523 | */ |
524 | extern const ccv_nnc_tensor_param_t ccv_nnc_tensor_auto; |
525 | /** @} */ |
526 | |
527 | // Generated command flags for easy creation of ccv_nnc_cmd_t objects. |
528 | #include "cmd/ccv_nnc_cmd_easy.h" |
529 | |
530 | #endif |