roi/ccv_nnc_roi_align_cpu

Bug Summary

File:	nnc/cmd/roi/ccv_nnc_roi_align_cpu_ref.c
Warning:	line 148, column 36 Division by zero

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_roi_align_cpu_ref.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -resource-dir /usr/local/lib/clang/19 -I ../../ -I .. -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-11-04-165540-38187-1 -x c roi/ccv_nnc_roi_align_cpu_ref.c

roi/ccv_nnc_roi_align_cpu_ref.c

→

1#include "ccv.h"
2#include "ccv_internal.h"
3#include "nnc/ccv_nnc.h"
4#include "nnc/ccv_nnc_easy.h"
5#include "nnc/ccv_nnc_internal.h"
6#ifdef USE_OPENMP
7#include <omp.h>
8#endif
9#ifdef USE_DISPATCH
10#include <dispatch/dispatch.h>
11#endif
12 
13typedef struct {
14	int i0, i1, mute;
15	float r;
16} roi_align_coeffs_t;
17 
18static void _ccv_nnc_bilinear_coeffs(ccv_nnc_stream_context_t* const stream_context, const int h, const int w, const float roi_y, const float roi_x, const float roi_h, const float roi_w, const int pool_h, const int pool_w, int* const bin_h_ref, int* const bin_w_ref, roi_align_coeffs_t** const y_coeffs_ref, roi_align_coeffs_t** const x_coeffs_ref, int** const bin_h_at_y_ref, int** const bin_w_at_x_ref, int* const start_h_ref, int* const start_w_ref, int* const end_h_ref, int* const end_w_ref)
19{
20	const int bin_h = (int)ceilf(roi_h / pool_h); // How many bins in each point of the pool. We slightly sampling at higher resolution (due to ceiling) with bilinear interpolation.
21	const int bin_w = (int)ceilf(roi_w / pool_w);
22	const int bin_pool_h = bin_h * pool_h; // Before averaging, what's the size of the region in integral term.
23	const int bin_pool_w = bin_w * pool_w;
24	const float scale_y = roi_h / bin_pool_h; // The scale to multiply back to get original coordinate.
25	const float scale_x = roi_w / bin_pool_w;
26	int x, y, i, j;
27	roi_align_coeffs_t* const y_coeffs = (roi_align_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(roi_align_coeffs_t) * (bin_pool_h + bin_pool_w) + sizeof(int) * (pool_h + pool_w), CCV_TENSOR_CPU_MEMORY);
28	roi_align_coeffs_t* const x_coeffs = y_coeffs + bin_pool_h;
29	int* const bin_h_at_y = (int*)(x_coeffs + bin_pool_w);
30	int* const bin_w_at_x = bin_h_at_y + pool_h;
31	for (i = 0; i < pool_h; i++)
32	{
33		const int pi = i * bin_h;
34		int count = 0;
35		for (y = 0; y < bin_h; y++)
36		{
37			const float ay = roi_y + (y + pi + 0.5) * scale_y - 0.5;
38			const int iy = (int)floorf(ay);
39			const float ry = ay - iy;
40			const int iy0 = ccv_clamp(iy, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof (
iy) _x = (iy); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }
);
41			const int iy1 = ccv_clamp(iy + 1, 0, h - 1)({ typeof (0) _a = (0); typeof (h - 1) _b = (h - 1); typeof (
iy + 1) _x = (iy + 1); (_x < _a) ? _a : ((_x > _b) ? _b
 : _x); });
42			y_coeffs[pi + y].i0 = iy0;
43			y_coeffs[pi + y].i1 = iy1;
44			y_coeffs[pi + y].r = ry;
45			const int mute = (iy + 1 < 0 || iy > h - 1);
46			y_coeffs[pi + y].mute = mute;
47			if (!mute)
48				++count;
49		}
50		bin_h_at_y[i] = count;
51	}
52	int start_h = pool_h;
53	for (i = 0; start_h == pool_h && i < pool_h; i++)
54		if (bin_h_at_y[i] > 0)
55			start_h = i;
56	int end_h = 0;
57	for (i = pool_h - 1; end_h == 0 && i >= 0; i--)
58		if (bin_h_at_y[i] > 0)
59			end_h = i + 1;
60	for (j = 0; j < pool_w; j++)
61	{
62		const int pj = j * bin_w;
63		int count = 0;
64		for (x = 0; x < bin_w; x++)
65		{
66			const float ax = roi_x + (x + pj + 0.5) * scale_x - 0.5;
67			const int ix = (int)floorf(ax);
68			const float rx = ax - ix;
69			const int ix0 = ccv_clamp(ix, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof (
ix) _x = (ix); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }
);
70			const int ix1 = ccv_clamp(ix + 1, 0, w - 1)({ typeof (0) _a = (0); typeof (w - 1) _b = (w - 1); typeof (
ix + 1) _x = (ix + 1); (_x < _a) ? _a : ((_x > _b) ? _b
 : _x); });
71			x_coeffs[pj + x].i0 = ix0;
72			x_coeffs[pj + x].i1 = ix1;
73			x_coeffs[pj + x].r = rx;
74			const int mute = (ix + 1 < 0 || ix > w - 1);
75			x_coeffs[pj + x].mute = mute;
76			if (!mute)
77				++count;
78		}
79		bin_w_at_x[j] = count;
80	}
81	int start_w = pool_w;
82	for (j = 0; start_w == pool_w && j < pool_w; j++)
83		if (bin_w_at_x[j] > 0)
84			start_w = j;
85	int end_w = 0;
86	for (j = pool_w - 1; end_w == 0 && j >= 0; j--)
87		if (bin_w_at_x[j] > 0)
88			end_w = j + 1;
89	*bin_h_ref = bin_h;
90	*bin_w_ref = bin_w;
91	*y_coeffs_ref = y_coeffs;
92	*x_coeffs_ref = x_coeffs;
93	*bin_h_at_y_ref = bin_h_at_y;
94	*bin_w_at_x_ref = bin_w_at_x;
95	*start_h_ref = start_h;
96	*start_w_ref = start_w;
97	*end_h_ref = end_h;
98	*end_w_ref = end_w;
99}
100 
101static int _ccv_nnc_roi_align_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
102{
103	assert(input_size == 2)((void) sizeof ((input_size == 2) ? 1 : 0), __extension__ ({ if
 (input_size == 2) ; else __assert_fail ("input_size == 2", "roi/ccv_nnc_roi_align_cpu_ref.c"
, 103, __extension__ __PRETTY_FUNCTION__); }));
1
Assuming 'input_size' is equal to 2→
2
←
Taking true branch→
104	const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
105	assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({
 if (output_size == 1) ; else __assert_fail ("output_size == 1"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 105, __extension__ __PRETTY_FUNCTION__
); }));
3
←
Assuming 'output_size' is equal to 1→
4
←
Taking true branch→
106	const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
107	ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
108	const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
109	assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((a_nd == (2) + 1 || a_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (a_nd == (2) + 1 || a_nd == (2) + 2) ; else
 __assert_fail ("a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 109, __extension__ __PRETTY_FUNCTION__
); }));
5
←
Assuming the condition is true→
110	const int* adim = (a_nd == CCV_NNC_MAX_DIM(2) + 1) ? a->info.dim : a->info.dim + 1;
6
←
'?' condition is true→
111	const int h = adim[0];
112	const int w = adim[1];
113	const int c_nd = ccv_nnc_tensor_nd(c->info.dim);
114	assert(c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((c_nd == (2) + 1 || c_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (c_nd == (2) + 1 || c_nd == (2) + 2) ; else
 __assert_fail ("c_nd == CCV_NNC_MAX_DIM + 1 || c_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 114, __extension__ __PRETTY_FUNCTION__
); }));
7
←
Assuming the condition is true→
115	const int* cdim = (c_nd == CCV_NNC_MAX_DIM(2) + 1) ? c->info.dim : c->info.dim + 1;
8
←
'?' condition is true→
116	const int pool_h = cdim[0];
117	const int pool_w = cdim[1];
118	assert(cdim[2] == adim[2])((void) sizeof ((cdim[2] == adim[2]) ? 1 : 0), __extension__ (
{ if (cdim[2] == adim[2]) ; else __assert_fail ("cdim[2] == adim[2]"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 118, __extension__ __PRETTY_FUNCTION__
); }));
9
←
Assuming the condition is true→
10
←
Taking true branch→
119	const int ch = cdim[2];
120	const float* const ap = a->data.f32;
121	int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
122	ccv_nnc_tensor_view_get_stride(a, astride);
123	const float* const bp = b->data.f32;
124	float* cp = c->data.f32;
125	int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
126	ccv_nnc_tensor_view_get_stride(c, cstride);
127	const int a_n = ccv_nnc_tensor_get_n(a->info);
11
←
Calling 'ccv_nnc_tensor_get_n'→
14
←
Returning from 'ccv_nnc_tensor_get_n'→
15
←
'a_n' initialized to 0→
128	const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
129	assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__
 ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 129, __extension__ __PRETTY_FUNCTION__
); }));
16
←
Assuming 'b_nd' is equal to 1→
130	const int b_n = b_nd16.1
'b_nd' is equal to 1
16.1
'b_nd' is equal to 1
 == 1 ? 1 : b->info.dim[0];
17
←
'?' condition is true→
131	const int c_n = ccv_nnc_tensor_get_n(c->info);
132	assert(c_n == ccv_max(a_n, b_n))((void) sizeof ((c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n
) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
 ({ if (c_n == ({ typeof (a_n) _a = (a_n); typeof (b_n) _b = (
b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("c_n == ccv_max(a_n, b_n)"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 132, __extension__ __PRETTY_FUNCTION__
); }));
18
←
'?' condition is false→
19
←
Assuming the condition is true→
20
←
Taking true branch→
133	const int aninc = a_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : astride[0];
21
←
'?' condition is true→
134	int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
135	ccv_nnc_tensor_view_get_stride(b, bstride);
136	const int bninc = b_nd21.1
'b_nd' is equal to 1
21.1
'b_nd' is equal to 1
 == 1 ? 0 : bstride[CCV_NNC_MAX_DIM(2) + 2 - b_nd];
22
←
'?' condition is true→
137	const int cninc = c_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : cstride[0];
23
←
'?' condition is true→
138	ccv_nnc_tensor_zero(c);
139	int bin_h, bin_w;
140	roi_align_coeffs_t* y_coeffs;
141	roi_align_coeffs_t* x_coeffs;
142	int* bin_h_at_y;
143	int* bin_w_at_x;
144	int start_h, start_w, end_h, end_w;
145	int n;
146	for (n = 0; n < c_n; n++)
24
←
Loop condition is true.  Entering loop body→
147	{
148		const float* const apn = ap + (n % a_n) * aninc;
25
←
Division by zero
149		float* cpn = cp + n * cninc;
150		const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1.
151		const float roi_y = bp[(n % b_n) * bninc + 1] * h;
152		const float roi_w = bp[(n % b_n) * bninc + 2] * w;
153		const float roi_h = bp[(n % b_n) * bninc + 3] * h;
154		// Re-compute the offsets if b changes or it is the first time.
155		if ((b_n == 1 && n == 0) || b_n > 1)
156			_ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w);
157		int i, j, x, y, k;
158		for (i = start_h; i < end_h; i++)
159		{
160			const int pi = i * bin_h;
161			const int bin_hz = bin_h_at_y[i];
162			for (j = start_w; j < end_w; j++)
163			{
164				const int pj = j * bin_w;
165				const int bin_wz = bin_w_at_x[j];
166				const float inv = 1.0 / (bin_hz * bin_wz);
167				float* const cpz = cpn + j * cstride[CCV_NNC_MAX_DIM(2)];
168				for (y = 0; y < bin_h; y++)
169				{
170					if (y_coeffs[pi + y].mute)
171						continue;
172					const float ry = y_coeffs[pi + y].r;
173					const int iy0 = y_coeffs[pi + y].i0;
174					const int iy1 = y_coeffs[pi + y].i1;
175					for (x = 0; x < bin_w; x++)
176					{
177						if (x_coeffs[pj + x].mute)
178							continue;
179						const float rx = x_coeffs[pj + x].r;
180						const int ix0 = x_coeffs[pj + x].i0;
181						const int ix1 = x_coeffs[pj + x].i1;
182						const float c00 = (1 - ry) * (1 - rx);
183						const float c01 = (1 - ry) * rx;
184						const float c10 = ry * (1 - rx);
185						const float c11 = ry * rx;
186						const float* const ap00 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)];
187						const float* const ap01 = apn + iy0 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)];
188						const float* const ap10 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * astride[CCV_NNC_MAX_DIM(2)];
189						const float* const ap11 = apn + iy1 * astride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * astride[CCV_NNC_MAX_DIM(2)];
190						for (k = 0; k < ch; k++)
191							cpz[k] += ap00[k] * c00 + ap01[k] * c01 + ap10[k] * c10 + ap11[k] * c11;
192					}
193				}
194				for (k = 0; k < ch; k++)
195					cpz[k] *= inv;
196			}
197			cpn += cstride[CCV_NNC_MAX_DIM(2) - 1];
198		}
199	}
200	return CCV_NNC_EXEC_SUCCESS;
201}
202 
203static int _ccv_nnc_roi_align_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
204{
205	assert(input_size >= 3)((void) sizeof ((input_size >= 3) ? 1 : 0), __extension__ (
{ if (input_size >= 3) ; else __assert_fail ("input_size >= 3"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 205, __extension__ __PRETTY_FUNCTION__
); }));
206	const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
207	assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({
 if (output_size == 1) ; else __assert_fail ("output_size == 1"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 207, __extension__ __PRETTY_FUNCTION__
); }));
208	ccv_nnc_tensor_view_t* o = (ccv_nnc_tensor_view_t*)outputs[0];
209	const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
210	assert(g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((g_nd == (2) + 1 || g_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (g_nd == (2) + 1 || g_nd == (2) + 2) ; else
 __assert_fail ("g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 210, __extension__ __PRETTY_FUNCTION__
); }));
211	const int* gdim = (g_nd == CCV_NNC_MAX_DIM(2) + 1) ? g->info.dim : g->info.dim + 1;
212	const int pool_h = gdim[0];
213	const int pool_w = gdim[1];
214	const int o_nd = ccv_nnc_tensor_nd(o->info.dim);
215	assert(o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2)((void) sizeof ((o_nd == (2) + 1 || o_nd == (2) + 2) ? 1 : 0)
, __extension__ ({ if (o_nd == (2) + 1 || o_nd == (2) + 2) ; else
 __assert_fail ("o_nd == CCV_NNC_MAX_DIM + 1 || o_nd == CCV_NNC_MAX_DIM + 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 215, __extension__ __PRETTY_FUNCTION__
); }));
216	const int* odim = (o_nd == CCV_NNC_MAX_DIM(2) + 1) ? o->info.dim : o->info.dim + 1;
217	const int h = odim[0];
218	const int w = odim[1];
219	assert(gdim[2] == odim[2])((void) sizeof ((gdim[2] == odim[2]) ? 1 : 0), __extension__ (
{ if (gdim[2] == odim[2]) ; else __assert_fail ("gdim[2] == odim[2]"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 219, __extension__ __PRETTY_FUNCTION__
); }));
220	const int ch = gdim[2];
221	float* gp = g->data.f32;
222	int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
223	ccv_nnc_tensor_view_get_stride(g, gstride);
224	float* op = o->data.f32;
225	int ostride[CCV_NNC_MAX_DIM_ALLOC(12)];
226	ccv_nnc_tensor_view_get_stride(o, ostride);
227	const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2];
228	const float* const bp = b->data.f32;
229	const int o_n = ccv_nnc_tensor_get_n(o->info);
230	const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
231	assert(b_nd == 1 || b_nd == 2)((void) sizeof ((b_nd == 1 || b_nd == 2) ? 1 : 0), __extension__
 ({ if (b_nd == 1 || b_nd == 2) ; else __assert_fail ("b_nd == 1 || b_nd == 2"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 231, __extension__ __PRETTY_FUNCTION__
); }));
232	const int b_n = b_nd == 1 ? 1 : b->info.dim[0];
233	const int g_n = ccv_nnc_tensor_get_n(g->info);
234	assert(g_n == ccv_max(o_n, b_n))((void) sizeof ((g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n
) _b = (b_n); (_a > _b) ? _a : _b; })) ? 1 : 0), __extension__
 ({ if (g_n == ({ typeof (o_n) _a = (o_n); typeof (b_n) _b = (
b_n); (_a > _b) ? _a : _b; })) ; else __assert_fail ("g_n == ccv_max(o_n, b_n)"
, "roi/ccv_nnc_roi_align_cpu_ref.c", 234, __extension__ __PRETTY_FUNCTION__
); }));
235	const int oninc = o_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : ostride[0];
236	int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
237	ccv_nnc_tensor_view_get_stride(b, bstride);
238	const int bninc = b_nd == 1 ? 0 : bstride[CCV_NNC_MAX_DIM(2) + 2 - b_nd];
239	const int gninc = g_nd == CCV_NNC_MAX_DIM(2) + 1 ? 0 : gstride[0];
240	int bin_h, bin_w;
241	roi_align_coeffs_t* y_coeffs;
242	roi_align_coeffs_t* x_coeffs;
243	int* bin_h_at_y;
244	int* bin_w_at_x;
245	int start_h, start_w, end_h, end_w;
246	int n;
247	ccv_nnc_tensor_zero(o);
248	for (n = 0; n < g_n; n++)
249	{
250		const float roi_x = bp[(n % b_n) * bninc] * w; // These assumed it is real-coordinate, with range between 0 to w - 1.
251		const float roi_y = bp[(n % b_n) * bninc + 1] * h;
252		const float roi_w = bp[(n % b_n) * bninc + 2] * w;
253		const float roi_h = bp[(n % b_n) * bninc + 3] * h;
254		// Re-compute the offsets if b changes or it is the first time.
255		if ((b_n == 1 && n == 0) || b_n > 1)
256			_ccv_nnc_bilinear_coeffs(stream_context, h, w, roi_y, roi_x, roi_h, roi_w, pool_h, pool_w, &bin_h, &bin_w, &y_coeffs, &x_coeffs, &bin_h_at_y, &bin_w_at_x, &start_h, &start_w, &end_h, &end_w);
257		const float* gpn = gp + n * gninc;
258		float* const opn = op + (n % o_n) * oninc;
259		int x, y, i, j, k;
260		for (i = 0; i < pool_h; i++)
261		{
262			const int pi = i * bin_h;
263			const int bin_hz = bin_h_at_y[i];
264			for (j = 0; j < pool_w; j++)
265			{
266				const int pj = j * bin_w;
267				const int bin_wz = bin_w_at_x[j];
268				const float inv = 1.0 / (bin_hz * bin_wz);
269				const float* const gpz = gpn + j * gstride[CCV_NNC_MAX_DIM(2)];
270				for (y = 0; y < bin_h; y++)
271				{
272					if (y_coeffs[pi + y].mute)
273						continue;
274					const float ry = y_coeffs[pi + y].r;
275					const int iy0 = y_coeffs[pi + y].i0;
276					const int iy1 = y_coeffs[pi + y].i1;
277					for (x = 0; x < bin_w; x++)
278					{
279						if (x_coeffs[pj + x].mute)
280							continue;
281						const float rx = x_coeffs[pj + x].r;
282						const int ix0 = x_coeffs[pj + x].i0;
283						const int ix1 = x_coeffs[pj + x].i1;
284						const float c00 = (1 - ry) * (1 - rx);
285						const float c01 = (1 - ry) * rx;
286						const float c10 = ry * (1 - rx);
287						const float c11 = ry * rx;
288						float* const op00 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)];
289						float* const op01 = opn + iy0 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)];
290						float* const op10 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix0 * ostride[CCV_NNC_MAX_DIM(2)];
291						float* const op11 = opn + iy1 * ostride[CCV_NNC_MAX_DIM(2) - 1] + ix1 * ostride[CCV_NNC_MAX_DIM(2)];
292						for (k = 0; k < ch; k++)
293						{
294							op00[k] += gpz[k] * c00 * inv;
295							op01[k] += gpz[k] * c01 * inv;
296							op10[k] += gpz[k] * c10 * inv;
297							op11[k] += gpz[k] * c11 * inv;
298						}
299					}
300				}
301			}
302			gpn += gstride[CCV_NNC_MAX_DIM(2) - 1];
303		}
304	}
305	return CCV_NNC_EXEC_SUCCESS;
306}
307 
308REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
309{
310	registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
311	registry->tensor_datatypes = CCV_32F;
312	registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
313	registry->algorithms = 1;
314	registry->exec = _ccv_nnc_roi_align_forw;
315}
316 
317REGISTER_COMMAND_BACKEND(CCV_NNC_ROI_ALIGN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_ROI_ALIGN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
318{
319	registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
320	registry->tensor_datatypes = CCV_32F;
321	registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
322	registry->algorithms = 1;
323	registry->exec = _ccv_nnc_roi_align_back;
324}

←

../../nnc/ccv_nnc_easy.h

1/**********************************************************
2 * C-based/Cached/Core Computer Vision Library
3 * Liu Liu, 2010-02-01
4 **********************************************************/
5 
6/**********************************************************
7 * CCV - Neural Network Collection
8 **********************************************************/
9 
10#ifndef GUARD_ccv_nnc_easy_h
11#define GUARD_ccv_nnc_easy_h
12 
13#include "ccv.h"
14#include "ccv_internal.h"
15#include "nnc/ccv_nnc.h"
16#ifdef HAVE_MPS
17#ifdef __APPLE__
18#include "TargetConditionals.h"
19#if !TARGET_OS_IPHONE && !TARGET_IPHONE_SIMULATOR
20#include <mach/mach_vm.h>
21#else
22#define PAGE_SIZE (16384)
23#endif
24#endif
25#endif
26 
27/**
28 * Convenience API
29 *
30 * This header provides convenience APIs for nnc usage. Being convenience API,
31 * it is optimized for shorthand coding, and may collide the naming space with
32 * others.
33 *
34 */
35// c99 only, make sure your compiler supports that.
36 
37#define NOOP_GRAPH_WHILE_EXPR(ccv_nnc_graph_while_f)(1) (ccv_nnc_graph_while_f)(1)
38#define NOOP_GRAPH_CASE_OF_EXPR(ccv_nnc_graph_case_of_f)(1) (ccv_nnc_graph_case_of_f)(1)
39 
40// This is a better LIST_COUNT macro, it generates a list of 1+1+0+0+0 where it is 1 if the parameter presents, and 0 otherwise.
41// This works better for cases such as LIST_COUNT(1, 2, 3,) where previous macro will get 4 and this one will have correctly
42// computed result.
43#define LIST_COUNT_01(_0,_1,_2,...)_2 _2
44#define LIST_COUNT_E(...)1 LIST_COUNT_01(_0,##__VA_ARGS__,1,0)1
45#define LIST_COUNT_N(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12,_13,_14,_15,_16,_17,_18,_19,_20,_21,_22,_23,_24,_25,_26,_27,_28,_29,_30,_31,_32,_33,_34,_35,_36,_37,_38,_39,_40,_41,_42,_43,_44,_45,_46,_47,_48,_49,_50,_51,_52,_53,_54,_55,_56,_57,_58,_59,_60,_61,_62,_63,...)(1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +
1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1
 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +
1 +1 +1 -1) (LIST_COUNT_E(_0)1+LIST_COUNT_E(_1)1+LIST_COUNT_E(_2)1+LIST_COUNT_E(_3)1+LIST_COUNT_E(_4)1+LIST_COUNT_E(_5)1+LIST_COUNT_E(_6)1+LIST_COUNT_E(_7)1+LIST_COUNT_E(_8)1+LIST_COUNT_E(_9)1+LIST_COUNT_E(_10)1+LIST_COUNT_E(_11)1+LIST_COUNT_E(_12)1+LIST_COUNT_E(_13)1+LIST_COUNT_E(_14)1+LIST_COUNT_E(_15)1+LIST_COUNT_E(_16)1+LIST_COUNT_E(_17)1+LIST_COUNT_E(_18)1+LIST_COUNT_E(_19)1+LIST_COUNT_E(_20)1+LIST_COUNT_E(_21)1+LIST_COUNT_E(_22)1+LIST_COUNT_E(_23)1+LIST_COUNT_E(_24)1+LIST_COUNT_E(_25)1+LIST_COUNT_E(_26)1+LIST_COUNT_E(_27)1+LIST_COUNT_E(_28)1+LIST_COUNT_E(_29)1+LIST_COUNT_E(_30)1+LIST_COUNT_E(_31)1+LIST_COUNT_E(_32)1+LIST_COUNT_E(_33)1+LIST_COUNT_E(_34)1+LIST_COUNT_E(_35)1+LIST_COUNT_E(_36)1+LIST_COUNT_E(_37)1+LIST_COUNT_E(_38)1+LIST_COUNT_E(_39)1+LIST_COUNT_E(_40)1+LIST_COUNT_E(_41)1+LIST_COUNT_E(_42)1+LIST_COUNT_E(_43)1+LIST_COUNT_E(_44)1+LIST_COUNT_E(_45)1+LIST_COUNT_E(_46)1+LIST_COUNT_E(_47)1+LIST_COUNT_E(_48)1+LIST_COUNT_E(_49)1+LIST_COUNT_E(_50)1+LIST_COUNT_E(_51)1+LIST_COUNT_E(_52)1+LIST_COUNT_E(_53)1+LIST_COUNT_E(_54)1+LIST_COUNT_E(_55)1+LIST_COUNT_E(_56)1+LIST_COUNT_E(_57)1+LIST_COUNT_E(_58)1+LIST_COUNT_E(_59)1+LIST_COUNT_E(_60)1+LIST_COUNT_E(_61)1+LIST_COUNT_E(_62)1+LIST_COUNT_E(_63)1-1)
46#define LIST_COUNT(...)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1) LIST_COUNT_N(_0,##__VA_ARGS__,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
47 
48#define LIST_X(_type, ...)(_type []){...} (_type []){__VA_ARGS__}
49 
50#define KV_X_2(_x, _y, ...){(_x), (_y)} {(_x), (_y)}
51#define KV_X_1(_x, ...){(_x)} {(_x)}
52#define KV_X_SEL(_1, _2, _FX, ...)_FX _FX
53#define KV(...){(...)} KV_X_SEL(__VA_ARGS__, KV_X_2, KV_X_1)(__VA_ARGS__){(__VA_ARGS__)}
54 
55#define LIST_SIZEOF_COUNT(_type, ...)(sizeof((_type []){...}) / sizeof(_type)) (sizeof(LIST_X(_type, __VA_ARGS__)(_type []){__VA_ARGS__}) / sizeof(_type))
56 
57/**
58 * @defgroup convenience_api Convenience API
59 * @{
60 */
61/**
62 * Pass a list of tensors to NNC functions that accepts (tensor array, tensor array size).
63 * This method effectively gives two parameters as one.
64 */
65#define TENSOR_LIST(...)(ccv_nnc_tensor_t* []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(ccv_nnc_tensor_t*, __VA_ARGS__)(ccv_nnc_tensor_t* []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
66/**
67 * Pass a list of tensor parameters to NNC functions that accepts (parameter array, parameter array size).
68 * This method effectively gives two parameters as one.
69 */
70#define TENSOR_PARAM_LIST(...)(const ccv_nnc_tensor_param_t []){...}, (1 +1 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_tensor_param_t, __VA_ARGS__)(const ccv_nnc_tensor_param_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
71/**
72 * This represents a tensor symbol that is empty (tensor = nil)
73 */
74#define NO_TENSOR_SYMBOL(const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL
} (const ccv_nnc_tensor_symbol_t){.d = CCV_NNC_NO_TENSOR_SYMBOL}
75/**
76 * This represents a graph exec symbol that is empty (exec = nil)
77 */
78#define NO_GRAPH_EXEC_SYMBOL(const ccv_nnc_graph_exec_symbol_t){.d = CCV_NNC_NO_GRAPH_EXEC_SYMBOL
} (const ccv_nnc_graph_exec_symbol_t){.d = CCV_NNC_NO_GRAPH_EXEC_SYMBOL}
79/**
80 * Pass a list of tensor symbols to NNC functions that accepts (tensor symbol array, tensor symbol array size).
81 * This method effectively gives two parameters as one.
82 */
83#define TENSOR_SYMBOL_LIST(...)(const ccv_nnc_tensor_symbol_t []){...}, (1 +1 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_tensor_symbol_t, __VA_ARGS__)(const ccv_nnc_tensor_symbol_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
84/**
85 * Pass a list of tensor variables to NNC functions that accepts (tensor variable array, tensor variable array size).
86 * This method effectively gives two parameters as one.
87 */
88#define TENSOR_VARIABLE_LIST(...)(ccv_nnc_tensor_variable_t []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(ccv_nnc_tensor_variable_t, __VA_ARGS__)(ccv_nnc_tensor_variable_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
89/**
90 * Pass a list of tensor bindings to NNC functions that accepts (tensor binding array, tensor binding array size).
91 * This method effectively gives two parameters as one. Since tensor binding requires two: symbol and a tensor,
92 * you should use this like: TENSOR_BIND_MAP(KV(symbol1, tensor1), KV(symbol2, tensor2)).
93 */
94#define TENSOR_BIND_MAP(...)(const ccv_nnc_tensor_bind_t []){...}, (sizeof((ccv_nnc_tensor_bind_t
 []){...}) / sizeof(ccv_nnc_tensor_bind_t)) LIST_X(const ccv_nnc_tensor_bind_t, __VA_ARGS__)(const ccv_nnc_tensor_bind_t []){__VA_ARGS__}, LIST_SIZEOF_COUNT(ccv_nnc_tensor_bind_t, __VA_ARGS__)(sizeof((ccv_nnc_tensor_bind_t []){__VA_ARGS__}) / sizeof(ccv_nnc_tensor_bind_t
))
95/**
96 * Pass a list of tensor symbol pairs to NNC functions that accepts (tensor symbol pair array, tensor symbol pair array size).
97 * This method effectively gives two parameters as one. Since tensor symbol pair requires two: source symbol and destination symbol,
98 * you should use this like: TENSOR_SYMBOL_MAP(KV(symbol1, symbol2), KV(symbol3, symbol4)).
99 */
100#define TENSOR_SYMBOL_MAP(...)(const ccv_nnc_tensor_symbol_map_t []){...}, (sizeof((ccv_nnc_tensor_symbol_map_t
 []){...}) / sizeof(ccv_nnc_tensor_symbol_map_t)) LIST_X(const ccv_nnc_tensor_symbol_map_t, __VA_ARGS__)(const ccv_nnc_tensor_symbol_map_t []){__VA_ARGS__}, LIST_SIZEOF_COUNT(ccv_nnc_tensor_symbol_map_t, __VA_ARGS__)(sizeof((ccv_nnc_tensor_symbol_map_t []){__VA_ARGS__}) / sizeof
(ccv_nnc_tensor_symbol_map_t))
101/**
102 * Pass a list of execution nodes to NNC functions that accepts (execution node array, execution node array size).
103 * This method effectively gives two parameters as one.
104 */
105#define GRAPH_EXEC_LIST(...)(const ccv_nnc_graph_exec_t []){...}, (1 +1 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_graph_exec_t, __VA_ARGS__)(const ccv_nnc_graph_exec_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
106/**
107 * Pass a list of execution node symbols to NNC functions that accepts (execution node symbol array, execution node symbol array size).
108 * This method effectively gives two parameters as one.
109 */
110#define GRAPH_EXEC_SYMBOL_LIST(...)(const ccv_nnc_graph_exec_symbol_t []){...}, (1 +1 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_nnc_graph_exec_symbol_t, __VA_ARGS__)(const ccv_nnc_graph_exec_symbol_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
111/**
112 * Pass both default sources and default sources size to function that accepts (sources, source size).
113 * @param x A given symbolic graph.
114 */
115#define SYMBOLIC_GRAPH_SOURCES(x)ccv_nnc_symbolic_graph_sources(x), ccv_nnc_symbolic_graph_source_size
(x) ccv_nnc_symbolic_graph_sources(x), ccv_nnc_symbolic_graph_source_size(x)
116/**
117 * Pass both default destinations and default destinations size to function that accepts (destinations, destination size).
118 * @param x A given symbolic graph.
119 */
120#define SYMBOLIC_GRAPH_DESTINATIONS(x)ccv_nnc_symbolic_graph_destinations(x), ccv_nnc_symbolic_graph_destination_size
(x) ccv_nnc_symbolic_graph_destinations(x), ccv_nnc_symbolic_graph_destination_size(x)
121/**
122 * Pass a list of simplification passes to NNC functions that accepts (pass array, pass array size).
123 * This method effectively gives two parameters as one.
124 */
125#define SYMBOLIC_GRAPH_PASSES(...)(const int []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const int, __VA_ARGS__)(const int []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
126/**
127 * Pass a list of CNNP models to NNC functions that accepts (model array, model array size).
128 * This method effectively gives two parameters as one.
129 */
130#define MODEL_LIST(...)(ccv_cnnp_model_t* []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(ccv_cnnp_model_t*, __VA_ARGS__)(ccv_cnnp_model_t* []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
131/**
132 * Pass a list of CNNP model IOs to NNC functions that accepts (model IO array, model IO array size).
133 * This method effectively gives two parameters as one.
134 */
135#define MODEL_IO_LIST(...)(const ccv_cnnp_model_io_t []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const ccv_cnnp_model_io_t, __VA_ARGS__)(const ccv_cnnp_model_io_t []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
136/**
137 * Pass a list of CNNP tensor params to ccv_cnnp_cmd_exec which accepts (tensor params array, tensor params array size).
138 * This method effectively gives two parameters as one.
139 */
140#define MODEL_CMD_EXEC_IO_MAP(...)(const ccv_cnnp_cmd_exec_io_t []){...}, (sizeof((ccv_cnnp_cmd_exec_io_t
 []){...}) / sizeof(ccv_cnnp_cmd_exec_io_t)) LIST_X(const ccv_cnnp_cmd_exec_io_t, __VA_ARGS__)(const ccv_cnnp_cmd_exec_io_t []){__VA_ARGS__}, LIST_SIZEOF_COUNT(ccv_cnnp_cmd_exec_io_t, __VA_ARGS__)(sizeof((ccv_cnnp_cmd_exec_io_t []){__VA_ARGS__}) / sizeof(ccv_cnnp_cmd_exec_io_t
))
141/**
142 * Pass a list of CNNP tensor type to ccv_cnnp_cmd_exec which accepts (tensor type array, tensor type array size).
143 * This method effectively gives two parameters as one.
144 */
145#define MODEL_CMD_EXEC_IO_LIST(...)(const int []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const int, __VA_ARGS__)(const int []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
146/**
147 * Pass a list of dataframe column ids to iteration function that accepts (column id array, column id array size).
148 * This method effectively gives two parameters as one.
149 */
150#define COLUMN_ID_LIST(...)(const int []){...}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1) LIST_X(const int, __VA_ARGS__)(const int []){__VA_ARGS__}, LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1)
151 
152#define TRAVERSE_FULL0,0,0,0 0,0,0,0
153 
154#define ALL_PARAMETERS-1 -1
155 
156// We will support NUMA allocation on CPU in the future. Currently, this is not very meaningful (except enforce no memory reuse between tensors).
157#define CPU_NUMA_TENSOR_NHWC(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id
) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype
=CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_##dt,.dim={__VA_ARGS__}})
158#define CPU_NUMA_TENSOR_NCHW(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id
) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype
=CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_##dt,.dim={__VA_ARGS__}})
159#define CPU_NUMA_TENSOR_CHWN(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id
) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype
=CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_CPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_##dt,.dim={__VA_ARGS__}})
160#define CPU_TENSOR_NHWC(dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY
,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_dt,.dim={...}}) CPU_NUMA_TENSOR_NHWC(ANY, dt, __VA_ARGS__)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY
,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_dt,.dim={__VA_ARGS__
}})
161#define CPU_TENSOR_NCHW(dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY
,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_dt,.dim={...}}) CPU_NUMA_TENSOR_NCHW(ANY, dt, __VA_ARGS__)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY
,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_dt,.dim={__VA_ARGS__
}})
162#define CPU_TENSOR_CHWN(dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY
,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_dt,.dim={...}}) CPU_NUMA_TENSOR_CHWN(ANY, dt, __VA_ARGS__)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY
,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_dt,.dim={__VA_ARGS__
}})
163// This way, we can do error check on the device type :)
164#define GPU_TENSOR_NHWC(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id
) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype
=CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_##dt,.dim={__VA_ARGS__}})
165#define GPU_TENSOR_NCHW(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id
) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype
=CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_NCHW,.datatype=CCV_##dt,.dim={__VA_ARGS__}})
166#define GPU_TENSOR_CHWN(device_id, dt, ...)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_device_id
) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype
=CCV_dt,.dim={...}}) ((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_##device_id) | CCV_TENSOR_GPU_MEMORY,.format=CCV_TENSOR_FORMAT_CHWN,.datatype=CCV_##dt,.dim={__VA_ARGS__}})
167/** @} */
168 
169#define DIM_ALLOC(...)(int [(12)]){...} (int [CCV_NNC_MAX_DIM_ALLOC(12)]){__VA_ARGS__}
170 
171#define ESCAPE_X(...)... __VA_ARGS__
172#define HINT_X_1(_stride_)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border
={.begin={0},.end={0}}}) ((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border={.begin={0},.end={0}}})
173#define HINT_X_2(_stride_, _border_)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border
={.begin={ESCAPE_X _border_},.end={ESCAPE_X _border_}}}) ((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border={.begin={ESCAPE_X _border_},.end={ESCAPE_X _border_}}})
174#define HINT_X_3(_stride_, _begin_, _end_)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border
={.begin={ESCAPE_X _begin_},.end={ESCAPE_X _end_}}}) ((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X _stride_}}, .border={.begin={ESCAPE_X _begin_},.end={ESCAPE_X _end_}}})
175#define HINT_X_SEL(_1, _2, _3, _FX, ...)_FX _FX
176/**
177 * @ingroup convenience_api
178 * Simpler method to create hint.
179 * HINT(stride), HINT(stride, border), HINT(stride, border begin, border end)
180 */
181#define HINT(...)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X ...}}, .border={.begin
={0},.end={0}}}) HINT_X_SEL(__VA_ARGS__, HINT_X_3, HINT_X_2, HINT_X_1)(__VA_ARGS__)((ccv_nnc_hint_t){.stride={.dim={ESCAPE_X __VA_ARGS__}}, .border
={.begin={0},.end={0}}})
182 
183static inline size_t ccv_nnc_dimension_count(const int dim[CCV_NNC_MAX_DIM_ALLOC(12)])
184{
185	if (dim[0] == 0)
186		return 0;
187	int i;
188	size_t count = dim[0];
189	for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && dim[i] > 0; i++)
190		count *= dim[i];
191	return count;
192}
193 
194static inline size_t ccv_nnc_dimension_upper_bound(const int dim[CCV_NNC_MAX_DIM_ALLOC(12)], const int stride[CCV_NNC_MAX_DIM_ALLOC(12)])
195{
196	if (dim[0] == 0 || stride[0] == 0)
197		return 0;
198	int i;
199	size_t count = 1 + (dim[0] - 1) * stride[0];
200	for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC(12) && dim[i] > 0 && stride[i] > 0; i++)
201		count += (dim[i] - 1) * stride[i];
202	return count;
203}
204 
205static inline size_t ccv_nnc_tensor_count(const ccv_nnc_tensor_param_t params)
206{
207	return ccv_nnc_dimension_count(params.dim);
208}
209 
210static inline ccv_nnc_tensor_param_t ccv_nnc_tensor_palettize(const ccv_nnc_tensor_param_t params, const int qbits, const int number_in_blocks)
211{
212	assert(params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F)((void) sizeof ((params.datatype == CCV_16F || params.datatype
 == CCV_32F || params.datatype == CCV_64F) ? 1 : 0), __extension__
 ({ if (params.datatype == CCV_16F || params.datatype == CCV_32F
 || params.datatype == CCV_64F) ; else __assert_fail ("params.datatype == CCV_16F || params.datatype == CCV_32F || params.datatype == CCV_64F"
, "../../nnc/ccv_nnc_easy.h", 212, __extension__ __PRETTY_FUNCTION__
); }));
213	ccv_nnc_tensor_param_t new_params = params;
214	assert(qbits >= 4 && qbits <= 8)((void) sizeof ((qbits >= 4 && qbits <= 8) ? 1 :
 0), __extension__ ({ if (qbits >= 4 && qbits <=
 8) ; else __assert_fail ("qbits >= 4 && qbits <= 8"
, "../../nnc/ccv_nnc_easy.h", 214, __extension__ __PRETTY_FUNCTION__
); }));
215	new_params.datatype = ((params.datatype >> 12) & 0xff) | CCV_QX | ((qbits << 8) & 0xf00);
216	new_params.reserved = number_in_blocks;
217	return new_params;
218}
219 
220static inline size_t ccv_nnc_tensor_data_size_without_padding(const ccv_nnc_tensor_param_t params)
221{
222	const ssize_t count = (ssize_t)ccv_nnc_tensor_count(params);
223	ssize_t data_size;
224	if (CCV_GET_DATA_TYPE(params.datatype)((params.datatype) & 0xFF000) == CCV_QX)
225	{
226		// Our QX right now only does palettization. Hence, we need to get the palette datatype.
227		const int palette_datatype = (params.datatype & 0xff) << 12;
228		const int number_in_blocks = params.reserved;
229		const int num_blocks = (int)((count + number_in_blocks - 1) / number_in_blocks);
230		const int qbits = (params.datatype & 0xf00) >> 8;
231		assert(qbits >= 4 && qbits <= 8)((void) sizeof ((qbits >= 4 && qbits <= 8) ? 1 :
 0), __extension__ ({ if (qbits >= 4 && qbits <=
 8) ; else __assert_fail ("qbits >= 4 && qbits <= 8"
, "../../nnc/ccv_nnc_easy.h", 231, __extension__ __PRETTY_FUNCTION__
); }));
232		data_size = (ssize_t)(1 << qbits) * CCV_GET_DATA_TYPE_SIZE(palette_datatype)_ccv_get_data_type_size[((palette_datatype) & 0xFF000) >>
 12] * num_blocks + (count * qbits + 7) / 8;
233	} else
234		data_size = CCV_GET_DATA_TYPE_SIZE(params.datatype)_ccv_get_data_type_size[((params.datatype) & 0xFF000) >>
 12] * count;
235	return data_size;
236}
237 
238static inline size_t ccv_nnc_tensor_data_size(const ccv_nnc_tensor_param_t params)
239{
240	ssize_t data_size = ccv_nnc_tensor_data_size_without_padding(params);
241#ifdef HAVE_CUDA1 // For CUDA, we align to 128-bytes.
242	if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY)
243		return ((data_size + 127) & -128);
244	else
245#elif defined(HAVE_MPS) // For MPS, we have to align to PAGE_SIZE.
246	if (CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY)
247		return ((data_size + PAGE_SIZE - 1) & -PAGE_SIZE);
248	else
249#endif
250	return ((data_size + 63) & -64);
251}
252 
253static inline void ccv_nnc_tensor_view_get_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC(12)])
254{
255	int x;
256	const int nd = ccv_nnc_tensor_nd(tv->info.dim);
257	const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd;
258	for (x = 0; x < offset; x++)
259		dim[x] = 1;
260	for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++)
261		dim[x] = tv->info.dim[x - offset];
262	dim[CCV_NNC_MAX_DIM(2) + 2] = 0;
263}
264 
265static inline CCV_WARN_UNUSED(int)int __attribute__((warn_unused_result)) ccv_nnc_is_tensor_stride_packed(const int stride[CCV_NNC_MAX_DIM_ALLOC(12)], const int dim[CCV_NNC_MAX_DIM_ALLOC(12)])
266{
267	const int nd = ccv_nnc_tensor_nd(stride);
268	int i;
269	int cstride = 1;
270	for (i = nd - 1; i >= 0; i--)
271	{
272		if (stride[i] != cstride)
273			return 0;
274		cstride *= dim[i];
275	}
276	return 1;
277}
278 
279static inline CCV_WARN_UNUSED(int)int __attribute__((warn_unused_result)) ccv_nnc_tensor_view_check_dim(const ccv_nnc_tensor_view_t* const tv, const int dim[CCV_NNC_MAX_DIM_ALLOC(12)])
280{
281	int x;
282	const int nd = ccv_nnc_tensor_nd(tv->info.dim);
283	const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd;
284	for (x = 0; x < offset; x++)
285		if (dim[x] != 1)
286			return 0;
287	for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++)
288		if (dim[x] != tv->info.dim[x - offset])
289			return 0;
290	return 1;
291}
292 
293static inline void ccv_nnc_tensor_view_get_broadcast_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC(12)])
294{
295	int x;
296	const int nd = ccv_nnc_tensor_nd(tv->info.dim);
297	const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd;
298	for (x = 0; x < offset; x++)
299		dim[x] = ccv_max(1, dim[x])({ typeof (1) _a = (1); typeof (dim[x]) _b = (dim[x]); (_a >
 _b) ? _a : _b; });
300	for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++)
301		dim[x] = ccv_max(dim[x], tv->info.dim[x - offset])({ typeof (dim[x]) _a = (dim[x]); typeof (tv->info.dim[x -
 offset]) _b = (tv->info.dim[x - offset]); (_a > _b) ? _a
 : _b; });
302}
303 
304static inline CCV_WARN_UNUSED(int)int __attribute__((warn_unused_result)) ccv_nnc_tensor_view_check_broadcast_dim(const ccv_nnc_tensor_view_t* const tv, int dim[CCV_NNC_MAX_DIM_ALLOC(12)])
305{
306	int x;
307	const int nd = ccv_nnc_tensor_nd(tv->info.dim);
308	const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd;
309	for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++)
310		if (dim[x] != tv->info.dim[x - offset] && tv->info.dim[x - offset] != 1)
311			return 0;
312	return 1;
313}
314 
315static inline void ccv_nnc_tensor_view_get_stride(const ccv_nnc_tensor_view_t* const tv, int stride[CCV_NNC_MAX_DIM_ALLOC(12)])
316{
317	int x;
318	const int nd = ccv_nnc_tensor_nd(tv->info.dim);
319	const int offset = ccv_max(CCV_NNC_MAX_DIM + 2 - nd, 0)({ typeof ((2) + 2 - nd) _a = ((2) + 2 - nd); typeof (0) _b =
 (0); (_a > _b) ? _a : _b; });
320	stride[CCV_NNC_MAX_DIM(2) + 2] = 0;
321	if (CCV_IS_TENSOR_VIEW(tv)((*(int*)(tv)) & CCV_TENSOR_VIEW))
322	{
323		for (x = offset; x < CCV_NNC_MAX_DIM(2) + 2; x++)
324			stride[x] = tv->stride[x - offset];
325		for (x = 0; x < offset; x++)
326			stride[x] = stride[offset];
327	} else {
328		int cstride = 1;
329		for (x = CCV_NNC_MAX_DIM(2) + 1; x >= offset; x--)
330		{
331			stride[x] = cstride;
332			cstride *= tv->info.dim[x - offset];
333		}
334		for (x = 0; x < offset; x++)
335			stride[x] = cstride;
336	}
337}
338 
339static inline int ccv_nnc_tensor_get_n(const ccv_nnc_tensor_param_t params)
340{
341	switch (params.format)
342	{
343		case CCV_TENSOR_FORMAT_NHWC:
344		case CCV_TENSOR_FORMAT_NCHW:
345			if (ccv_nnc_tensor_nd(params.dim) == CCV_NNC_MAX_DIM(2) + 1)
346				return 1;
347			else
348				return params.dim[0];
349		case CCV_TENSOR_FORMAT_CHWN:
350			return params.dim[CCV_NNC_MAX_DIM(2) + 1];
351	}
352	return 0;
12
←
'Default' branch taken. Execution continues on line 352→
13
←
Returning zero→
353}
354 
355static inline int ccv_nnc_tensor_get_c(const ccv_nnc_tensor_param_t params)
356{
357	const int nd = ccv_nnc_tensor_nd(params.dim);
358	switch (params.format)
359	{
360		case CCV_TENSOR_FORMAT_NHWC:
361			return params.dim[nd - 1];
362		case CCV_TENSOR_FORMAT_NCHW:
363			if (nd == CCV_NNC_MAX_DIM(2) + 1)
364				return params.dim[0];
365			else
366				return params.dim[nd <= 1 ? 0 : 1];
367		case CCV_TENSOR_FORMAT_CHWN:
368			return params.dim[0];
369	}
370	return 0;
371}
372 
373static inline void ccv_nnc_tensor_set_n(ccv_nnc_tensor_param_t* const params, const int n)
374{
375	switch (params->format)
376	{
377		case CCV_TENSOR_FORMAT_NHWC:
378		case CCV_TENSOR_FORMAT_NCHW:
379			params->dim[0] = n;
380			break;
381		case CCV_TENSOR_FORMAT_CHWN:
382			params->dim[CCV_NNC_MAX_DIM(2) + 1] = n;
383			break;
384	}
385}
386 
387static inline void ccv_nnc_tensor_set_c(ccv_nnc_tensor_param_t* const params, const int nd, const int c)
388{
389	switch (params->format)
390	{
391		case CCV_TENSOR_FORMAT_NHWC:
392			params->dim[nd - 1] = c;
393			break;
394		case CCV_TENSOR_FORMAT_NCHW:
395			if (nd == CCV_NNC_MAX_DIM(2) + 1)
396				params->dim[0] = c;
397			else
398				params->dim[nd <= 1 ? 0 : 1] = c;
399			break;
400		case CCV_TENSOR_FORMAT_CHWN:
401			params->dim[0] = c;
402			break;
403	}
404}
405 
406static inline int ccv_nnc_is_matrix_transpose(const ccv_nnc_tensor_param_t params, const int transpose[2])
407{
408	const int nd = ccv_nnc_tensor_nd(params.dim);
409	assert(nd >= 1)((void) sizeof ((nd >= 1) ? 1 : 0), __extension__ ({ if (nd
 >= 1) ; else __assert_fail ("nd >= 1", "../../nnc/ccv_nnc_easy.h"
, 409, __extension__ __PRETTY_FUNCTION__); }));
410	if (transpose[0] != transpose[1])
411	{
412		assert(nd > 1)((void) sizeof ((nd > 1) ? 1 : 0), __extension__ ({ if (nd
 > 1) ; else __assert_fail ("nd > 1", "../../nnc/ccv_nnc_easy.h"
, 412, __extension__ __PRETTY_FUNCTION__); }));
413		assert(((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) ||((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) &&
 (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1
] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((
nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose
[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == (
(nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0
 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd -
 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))"
, "../../nnc/ccv_nnc_easy.h", 414, __extension__ __PRETTY_FUNCTION__
); }))
414			((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1))))((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) &&
 (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1
] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((
nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose
[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == (
(nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0
 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd -
 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))"
, "../../nnc/ccv_nnc_easy.h", 414, __extension__ __PRETTY_FUNCTION__
); }));
415		return 1;
416	}
417	return 0;
418}
419 
420// Assuming this is batched matrix. Getting relevant parameters.
421static inline void ccv_nnc_tensor_get_matrix_params(const ccv_nnc_tensor_param_t params, const int* const stride, const int* const dim, const int transpose[2], int* const batch_size_ref, int* const rows_ref, int* const cols_ref, int* const batch_inc_ref, int* const rows_inc_ref, int* const cols_inc_ref)
422{
423	const int nd = ccv_nnc_tensor_nd(params.dim);
424	assert(nd >= 1)((void) sizeof ((nd >= 1) ? 1 : 0), __extension__ ({ if (nd
 >= 1) ; else __assert_fail ("nd >= 1", "../../nnc/ccv_nnc_easy.h"
, 424, __extension__ __PRETTY_FUNCTION__); }));
425	*batch_size_ref = nd < 3 ? 1 : params.dim[nd - 3];
426	*batch_inc_ref = nd < 3 ? 0 : stride ? stride[nd - 3] : dim[nd - 2] * dim[nd - 1];
427	int rows = nd == 1 ? 1 : (nd == 2 ? params.dim[0] : params.dim[nd - 2]);
428	int rows_inc = stride ? (nd >= 2 ? stride[nd - 2] : stride[0] * dim[0]) : dim[nd - 1];
429	int cols = params.dim[nd - 1];
430	int cols_inc = 1;
431	if (transpose[0] != transpose[1])
432	{
433		assert(nd > 1)((void) sizeof ((nd > 1) ? 1 : 0), __extension__ ({ if (nd
 > 1) ; else __assert_fail ("nd > 1", "../../nnc/ccv_nnc_easy.h"
, 433, __extension__ __PRETTY_FUNCTION__); }));
434		assert(((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) ||((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) &&
 (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1
] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((
nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose
[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == (
(nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0
 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd -
 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))"
, "../../nnc/ccv_nnc_easy.h", 435, __extension__ __PRETTY_FUNCTION__
); }))
435			((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1))))((void) sizeof ((((transpose[0] == ((nd == 2) ? 0 : nd - 2)) &&
 (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1
] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((
nd == 2) ? 1 : nd - 1)))) ? 1 : 0), __extension__ ({ if (((transpose
[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == (
(nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0
 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd -
 1)))) ; else __assert_fail ("((transpose[0] == ((nd == 2) ? 0 : nd - 2)) && (transpose[1] == ((nd == 2) ? 1 : nd - 1))) || ((transpose[1] == ((nd == 2) ? 0 : nd - 2)) && (transpose[0] == ((nd == 2) ? 1 : nd - 1)))"
, "../../nnc/ccv_nnc_easy.h", 435, __extension__ __PRETTY_FUNCTION__
); }));
436		int t;
437		CCV_SWAP(rows, cols, t)((t) = (rows), (rows) = (cols), (cols) = (t));
438		CCV_SWAP(rows_inc, cols_inc, t)((t) = (rows_inc), (rows_inc) = (cols_inc), (cols_inc) = (t));
439	}
440	*rows_ref = rows;
441	*cols_ref = cols;
442	*rows_inc_ref = rows_inc;
443	*cols_inc_ref = cols_inc;
444}
445 
446static inline CCV_WARN_UNUSED(ccv_nnc_tensor_view_t)ccv_nnc_tensor_view_t __attribute__((warn_unused_result)) ccv_nnc_get_tensor_view(const ccv_nnc_tensor_t* const tensor)
447{
448	if (CCV_IS_TENSOR_VIEW(tensor)((*(int*)(tensor)) & CCV_TENSOR_VIEW))
449		return (ccv_nnc_tensor_view_t)*(const ccv_nnc_tensor_view_t*)tensor;
450	ccv_nnc_tensor_view_t tv = {0};
451	memcpy(&tv, tensor, sizeof(ccv_nnc_tensor_t));
452	return tv;
453}
454 
455static inline void ccv_nnc_tensor_view_alignment(ccv_nnc_tensor_view_t** const tvs, const int tv_size)
456{
457	int i, j;
458	int max_nd = 0;
459	for (i = 0; i < tv_size; i++)
460		max_nd = ccv_max(ccv_nnc_tensor_nd(tvs[i]->info.dim), max_nd)({ typeof (ccv_nnc_tensor_nd(tvs[i]->info.dim)) _a = (ccv_nnc_tensor_nd
(tvs[i]->info.dim)); typeof (max_nd) _b = (max_nd); (_a >
 _b) ? _a : _b; });
461	for (i = 0; i < tv_size; i++)
462	{
463		const int nd = ccv_nnc_tensor_nd(tvs[i]->info.dim);
464		for (j = max_nd - 1; j >= max_nd - nd; j--)
465			tvs[i]->info.dim[j] = tvs[i]->info.dim[j - max_nd + nd];
466		for (j = 0; j < max_nd - nd; j++)
467			tvs[i]->info.dim[j] = 1;
468		if (!CCV_IS_TENSOR_VIEW(tvs[i])((*(int*)(tvs[i])) & CCV_TENSOR_VIEW))
469			continue;
470		for (j = max_nd - 1; j >= max_nd - nd; j--)
471			tvs[i]->stride[j] = tvs[i]->stride[j - max_nd + nd];
472		for (j = 0; j < max_nd - nd; j++)
473			tvs[i]->stride[j] = tvs[i]->stride[max_nd - nd];
474	}
475}
476 
477 
478#define TRANSPOSE(_X, _Y)((int[]){(_X),(_Y)}) ((int[]){(_X),(_Y)})
479#define NO_TRANSPOSE((int[]){(0),(0)}) TRANSPOSE(0, 0)((int[]){(0),(0)})
480#define CMD_GEMM_X(_0, _TA, _TB, ...)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},.
transpose_a={_TA[0],_TA[1]},.transpose_b={_TB[0],_TB[1]},}}) ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},.transpose_a={_TA[0],_TA[1]},.transpose_b={_TB[0],_TB[1]},}}) // We default to alpha = 1 and beta = 1
481#define CMD_GEMM(...)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},.
transpose_a={...[0],...[1]},.transpose_b={((int[]){(0),(0)})[
0],((int[]){(0),(0)})[1]},}}) CMD_GEMM_X(_0, ##__VA_ARGS__, NO_TRANSPOSE, NO_TRANSPOSE)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.blas={.a={1,1},.
transpose_a={##__VA_ARGS__[0],##__VA_ARGS__[1]},.transpose_b=
{((int[]){(0),(0)})[0],((int[]){(0),(0)})[1]},}})
482#define CMD_GENERIC_X_0()((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}}) ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}}})
483#define CMD_GENERIC_X_F(...)("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC"
) ("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC")
484#define CMD_GENERIC_X_3(...)((ccv_nnc_cmd_param_t){.size={.dim={...}}}) ((ccv_nnc_cmd_param_t){.size={.dim={__VA_ARGS__}}})
485#define CMD_GENERIC_X_SEL(_0, _1, _2, _3, _FX, ...)_FX _FX
486// Using ## so that if it is empty, we omit one comma.
487#define CMD_GENERIC(...)("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC"
) CMD_GENERIC_X_SEL(CMD_GENERIC_X_F, ##__VA_ARGS__, CMD_GENERIC_X_3, CMD_GENERIC_X_F, CMD_GENERIC_X_F, CMD_GENERIC_X_0)(__VA_ARGS__)("This should not be used, you should have either 0 parameter or 3 parameters for CMD_GENERIC"
)
488#define CMD_REDUCE(...)((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.reduce={.count=(
1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 -1),.axis={...}}}) ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.reduce={.count=LIST_COUNT(__VA_ARGS__)(1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 -1),.axis={__VA_ARGS__}}})
489/**
490 * @defgroup available_commands Available Commands
491 * @{
492 */
493#define CMD_NOOP()ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0) ccv_nnc_cmd(CCV_NNC_NOOP, 0, ccv_nnc_cmd_auto, 0)
494#define CMD_CUSTOM_FORWARD(f)ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, f, ccv_nnc_cmd_auto, 0) ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, f, ccv_nnc_cmd_auto, 0)
495/** @} */
496 
497int ccv_nnc_is_no_hint(const ccv_nnc_hint_t hint);
498int ccv_nnc_is_cmd_auto(const ccv_nnc_cmd_param_t params);
499int ccv_nnc_is_tensor_auto(const ccv_nnc_tensor_param_t params);
500 
501/**
502 * @addtogroup convenience_api
503 * @{
504 */
505/**
506 * Offsets all zero.
507 */
508extern const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC(12)];
509/**
510 * No hint available.
511 */
512extern const ccv_nnc_hint_t ccv_nnc_no_hint;
513/**
514 * The default symbolic graph compile parameters.
515 */
516extern const ccv_nnc_symbolic_graph_compile_param_t ccv_nnc_default_compile_params;
517/**
518 * Derive the command parameters automatically if possible.
519 */
520extern const ccv_nnc_cmd_param_t ccv_nnc_cmd_auto;
521/**
522 * Derive the tensor parameters automatically if possible.
523 */
524extern const ccv_nnc_tensor_param_t ccv_nnc_tensor_auto;
525/** @} */
526 
527// Generated command flags for easy creation of ccv_nnc_cmd_t objects.
528#include "cmd/ccv_nnc_cmd_easy.h"
529 
530#endif