/home/liu/actions-runner/_work/ccv/ccv/lib/ccv_basic.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #if defined(HAVE_SSE2) |
4 | | #include <xmmintrin.h> |
5 | | #elif defined(HAVE_NEON) |
6 | | #include <arm_neon.h> |
7 | | #endif |
8 | | |
9 | | /* sobel filter is fundamental to many other high-level algorithms, |
10 | | * here includes 2 special case impl (for 1x3/3x1, 3x3) and one general impl */ |
11 | | void ccv_sobel(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type, int dx, int dy) |
12 | 10 | { |
13 | 10 | ccv_declare_derived_signature(sig, a->sig != 0, ccv_sign_with_format(64, "ccv_sobel(%d,%d)", dx, dy), a->sig, CCV_EOF_SIGN); |
14 | 10 | type = (type == 0) ? CCV_32S | CCV_GET_CHANNEL(a->type) : CCV_GET_DATA_TYPE0 (type) | 0 CCV_GET_CHANNEL0 (a->type); |
15 | 10 | ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, a->rows, a->cols, CCV_GET_CHANNEL(a->type) | CCV_ALL_DATA_TYPE, type, sig); |
16 | 10 | ccv_object_return_if_cached(, db); |
17 | 10 | int i, j, k, c, ch = CCV_GET_CHANNEL(a->type); |
18 | 10 | unsigned char* a_ptr = a->data.u8; |
19 | 10 | unsigned char* b_ptr = db->data.u8; |
20 | 10 | if (dx == 1 && dy == 02 ) |
21 | 1 | { |
22 | 1 | assert(a->cols >= 3); |
23 | | /* special case 1: 1x3 or 3x1 window */ |
24 | 1 | #define for_block(_for_get, _for_set) \ |
25 | 501 | for (i = 0; 1 i < a->rows; i++500 ) \ |
26 | 500 | { \ |
27 | 1.00k | for (k = 0; k < ch; k++500 ) \ |
28 | 500 | _for_set(b_ptr, k, 2 * (_for_get(a_ptr, ch + k) - _for_get(a_ptr, k))); \ |
29 | 249k | for (j = 1; j < a->cols - 1; j++249k ) \ |
30 | 498k | for (k = 0; 249k k < ch; k++249k ) \ |
31 | 249k | _for_set500 (b_ptr, j * ch + k, _for_get(a_ptr, (j + 1) * ch + k) - _for_get(a_ptr, (j - 1) * ch + k)); \ |
32 | 1.00k | for (k = 0; k < ch; k++500 ) \ |
33 | 500 | _for_set(b_ptr, (a->cols - 1) * ch + k, 2 * (_for_get(a_ptr, (a->cols - 1) * ch + k) - _for_get(a_ptr, (a->cols - 2) * ch + k))); \ |
34 | 500 | b_ptr += db->step; \ |
35 | 500 | a_ptr += a->step; \ |
36 | 500 | } |
37 | 1 | ccv_matrix_getter(a->type, ccv_matrix_setter, db->type, for_block); |
38 | 1 | #undef for_block |
39 | 9 | } else if (dx == 0 && dy == 14 ) { |
40 | 1 | assert(a->rows >= 3); |
41 | | /* special case 1: 1x3 or 3x1 window */ |
42 | 1 | #define for_block(_for_get, _for_set) \ |
43 | 501 | for (j = 0; 1 j < a->cols; j++500 ) \ |
44 | 1.00k | for (k = 0; 500 k < ch; k++500 ) \ |
45 | 500 | _for_set1 (b_ptr, j * ch + k, 2 * (_for_get(a_ptr + a->step, j * ch + k) - _for_get(a_ptr, j * ch + k))); \ |
46 | 1 | a_ptr += a->step; \ |
47 | 1 | b_ptr += db->step; \ |
48 | 499 | for (i = 1; i < a->rows - 1; i++498 ) \ |
49 | 498 | { \ |
50 | 249k | for (j = 0; j < a->cols; j++249k ) \ |
51 | 498k | for (k = 0; 249k k < ch; k++249k ) \ |
52 | 249k | _for_set498 (b_ptr, j * ch + k, _for_get(a_ptr + a->step, j * ch + k) - _for_get(a_ptr - a->step, j * ch + k)); \ |
53 | 498 | a_ptr += a->step; \ |
54 | 498 | b_ptr += db->step; \ |
55 | 498 | } \ |
56 | 501 | for (j = 0; j < a->cols; j++500 ) \ |
57 | 1.00k | for (k = 0; 500 k < ch; k++500 ) \ |
58 | 500 | _for_set1 (b_ptr, j * ch + k, 2 * (_for_get(a_ptr, j * ch + k) - _for_get(a_ptr - a->step, j * ch + k))); |
59 | 1 | ccv_matrix_getter(a->type, ccv_matrix_setter, db->type, for_block); |
60 | 1 | #undef for_block |
61 | 8 | } else if ((dx == 1 && dy == 11 ) || (7 dx == -17 && dy == -11 )) { |
62 | | /* special case 2: 3x3 window with diagonal direction */ |
63 | 1 | assert(a->rows >= 3 && a->cols >= 3); |
64 | 1 | #define for_block(_for_get, _for_set) \ |
65 | 500 | for (j = 0; 1 j < a->cols - 1; j++499 ) \ |
66 | 998 | for (k = 0; 499 k < ch; k++499 ) \ |
67 | 499 | _for_set1 (b_ptr, j * ch + k, 2 * (_for_get(a_ptr + a->step, (j + 1) * ch + k) - _for_get(a_ptr, j * ch + k))); \ |
68 | 2 | for (k = 0; k < ch; k++1 ) \ |
69 | 1 | _for_set(b_ptr, (a->cols - 1) * ch + k, 2 * (_for_get(a_ptr + a->step, (a->cols - 1) * ch + k) - _for_get(a_ptr, (a->cols - 1) * ch + k))); \ |
70 | 1 | a_ptr += a->step; \ |
71 | 1 | b_ptr += db->step; \ |
72 | 499 | for (i = 1; i < a->rows - 1; i++498 ) \ |
73 | 498 | { \ |
74 | 996 | for (k = 0; k < ch; k++498 ) \ |
75 | 498 | _for_set(b_ptr, k, 2 * (_for_get(a_ptr + a->step, ch + k) - _for_get(a_ptr, k))); \ |
76 | 248k | for (j = 1; j < a->cols - 1; j++248k ) \ |
77 | 496k | for (k = 0; 248k k < ch; k++248k ) \ |
78 | 248k | _for_set498 (b_ptr, j * ch + k, _for_get(a_ptr + a->step, (j + 1) * ch + k) - _for_get(a_ptr - a->step, (j - 1) * ch + k)); \ |
79 | 996 | for (k = 0; k < ch; k++498 ) \ |
80 | 498 | _for_set(b_ptr, (a->cols - 1) * ch + k, 2 * (_for_get(a_ptr, (a->cols - 1) * ch + k) - _for_get(a_ptr - a->step, (a->cols - 2) * ch + k))); \ |
81 | 498 | a_ptr += a->step; \ |
82 | 498 | b_ptr += db->step; \ |
83 | 498 | } \ |
84 | 2 | for (k = 0; k < ch; k++1 ) \ |
85 | 1 | _for_set(b_ptr, k, 2 * (_for_get(a_ptr, k) - _for_get(a_ptr - a->step, k))); \ |
86 | 500 | for (j = 1; j < a->cols; j++499 ) \ |
87 | 998 | for (k = 0; 499 k < ch; k++499 ) \ |
88 | 499 | _for_set1 (b_ptr, j * ch + k, 2 * (_for_get(a_ptr, j * ch + k) - _for_get(a_ptr - a->step, (j - 1) * ch + k))); |
89 | 1 | ccv_matrix_getter(a->type, ccv_matrix_setter, db->type, for_block); |
90 | 1 | #undef for_block |
91 | 7 | } else if ((dx == 1 && dy == -10 ) || (dx == -1 && dy == 11 )) { |
92 | | /* special case 2: 3x3 window with diagonal direction */ |
93 | 1 | assert(a->rows >= 3 && a->cols >= 3); |
94 | 1 | #define for_block(_for_get, _for_set) \ |
95 | 2 | for (k = 0; 1 k < ch; k++1 ) \ |
96 | 1 | _for_set(b_ptr, k, 2 * (_for_get(a_ptr + a->step, k) - _for_get(a_ptr, k))); \ |
97 | 500 | for (j = 1; j < a->cols; j++499 ) \ |
98 | 998 | for (k = 0; 499 k < ch; k++499 ) \ |
99 | 499 | _for_set1 (b_ptr, j * ch + k, 2 * (_for_get(a_ptr + a->step, (j - 1) * ch + k) - _for_get(a_ptr, j * ch + k))); \ |
100 | 1 | a_ptr += a->step; \ |
101 | 1 | b_ptr += db->step; \ |
102 | 499 | for (i = 1; i < a->rows - 1; i++498 ) \ |
103 | 498 | { \ |
104 | 996 | for (k = 0; k < ch; k++498 ) \ |
105 | 498 | _for_set(b_ptr, k, 2 * (_for_get(a_ptr, k) - _for_get(a_ptr - a->step, ch + k))); \ |
106 | 248k | for (j = 1; j < a->cols - 1; j++248k ) \ |
107 | 496k | for (k = 0; 248k k < ch; k++248k ) \ |
108 | 248k | _for_set498 (b_ptr, j * ch + k, _for_get(a_ptr + a->step, (j - 1) * ch + k) - _for_get(a_ptr - a->step, (j + 1) * ch + k)); \ |
109 | 996 | for (k = 0; k < ch; k++498 ) \ |
110 | 498 | _for_set(b_ptr, (a->cols - 1) * ch + k, 2 * (_for_get(a_ptr + a->step, (a->cols - 2) * ch + k) - _for_get(a_ptr, (a->cols - 1) * ch + k))); \ |
111 | 498 | a_ptr += a->step; \ |
112 | 498 | b_ptr += db->step; \ |
113 | 498 | } \ |
114 | 500 | for (j = 0; j < a->cols - 1; j++499 ) \ |
115 | 998 | for (k = 0; 499 k < ch; k++499 ) \ |
116 | 499 | _for_set1 (b_ptr, j * ch + k, 2 * (_for_get(a_ptr, j * ch + k) - _for_get(a_ptr - a->step, (j + 1) * ch + k))); \ |
117 | 2 | for (k = 0; k < ch; k++1 ) \ |
118 | 1 | _for_set(b_ptr, (a->cols - 1) * ch + k, 2 * (_for_get(a_ptr, (a->cols - 1) * ch + k) - _for_get(a_ptr - a->step, (a->cols - 1) * ch + k))); |
119 | 1 | ccv_matrix_getter(a->type, ccv_matrix_setter, db->type, for_block); |
120 | 1 | #undef for_block |
121 | 6 | } else if (dx == 3 && dy == 02 ) { |
122 | 2 | assert(a->rows >= 3 && a->cols >= 3); |
123 | | /* special case 3: 3x3 window, corresponding sigma = 0.85 */ |
124 | 2 | unsigned char* buf = (unsigned char*)alloca(db->step); |
125 | 2 | #define for_block(_for_get, _for_set_b, _for_get_b) \ |
126 | 1.00k | for (j = 0; 2 j < a->cols; j++1.00k ) \ |
127 | 2.00k | for (k = 0; 1.00k k < ch; k++1.00k ) \ |
128 | 1.00k | _for_set_b2 (b_ptr, j * ch + k, _for_get(a_ptr + a->step, j * ch + k) + 3 * _for_get(a_ptr, j * ch + k)); \ |
129 | 2 | a_ptr += a->step; \ |
130 | 2 | b_ptr += db->step; \ |
131 | 998 | for (i = 1; i < a->rows - 1; i++996 ) \ |
132 | 996 | { \ |
133 | 498k | for (j = 0; j < a->cols; j++498k ) \ |
134 | 996k | for (k = 0; 498k k < ch; k++498k ) \ |
135 | 498k | _for_set_b996 (b_ptr, j * ch + k, _for_get(a_ptr + a->step, j * ch + k) + 2 * _for_get(a_ptr, j * ch + k) + _for_get(a_ptr - a->step, j * ch + k)); \ |
136 | 996 | a_ptr += a->step; \ |
137 | 996 | b_ptr += db->step; \ |
138 | 996 | } \ |
139 | 1.00k | for (j = 0; j < a->cols; j++1.00k ) \ |
140 | 2.00k | for (k = 0; 1.00k k < ch; k++1.00k ) \ |
141 | 1.00k | _for_set_b2 (b_ptr, j * ch + k, 3 * _for_get(a_ptr, j * ch + k) + _for_get(a_ptr - a->step, j * ch + k)); \ |
142 | 2 | b_ptr = db->data.u8; \ |
143 | 1.00k | for (i = 0; i < a->rows; i++1.00k ) \ |
144 | 1.00k | { \ |
145 | 2.00k | for (k = 0; k < ch; k++1.00k ) \ |
146 | 1.00k | _for_set_b(buf, k, _for_get_b(b_ptr, ch + k) - _for_get_b(b_ptr, k)); \ |
147 | 499k | for (j = 1; j < a->cols - 1; j++498k ) \ |
148 | 996k | for (k = 0; 498k k < ch; k++498k ) \ |
149 | 498k | _for_set_b1.00k (buf, j * ch + k, _for_get_b(b_ptr, (j + 1) * ch + k) - _for_get_b(b_ptr, (j - 1) * ch + k)); \ |
150 | 2.00k | for (k = 0; k < ch; k++1.00k ) \ |
151 | 1.00k | _for_set_b(buf, (a->cols - 1) * ch + k, _for_get_b(b_ptr, (a->cols - 1) * ch + k) - _for_get_b(b_ptr, (a->cols - 2) * ch + k)); \ |
152 | 1.00k | memcpy(b_ptr, buf, db->step); \ |
153 | 1.00k | b_ptr += db->step; \ |
154 | 1.00k | } |
155 | 2 | ccv_matrix_getter(a->type, ccv_matrix_setter_getter, db->type, for_block); |
156 | 2 | #undef for_block |
157 | 4 | } else if (dx == 0 && dy == 33 ) { |
158 | 2 | assert(a->rows >= 3 && a->cols >= 3); |
159 | | /* special case 3: 3x3 window, corresponding sigma = 0.85 */ |
160 | 2 | unsigned char* buf = (unsigned char*)alloca(db->step); |
161 | 2 | #define for_block(_for_get, _for_set_b, _for_get_b) \ |
162 | 1.00k | for (j = 0; 2 j < a->cols; j++1.00k ) \ |
163 | 2.00k | for (k = 0; 1.00k k < ch; k++1.00k ) \ |
164 | 1.00k | _for_set_b2 (b_ptr, j * ch + k, _for_get(a_ptr + a->step, j * ch + k) - _for_get(a_ptr, j * ch + k)); \ |
165 | 2 | a_ptr += a->step; \ |
166 | 2 | b_ptr += db->step; \ |
167 | 998 | for (i = 1; i < a->rows - 1; i++996 ) \ |
168 | 996 | { \ |
169 | 498k | for (j = 0; j < a->cols; j++498k ) \ |
170 | 996k | for (k = 0; 498k k < ch; k++498k ) \ |
171 | 498k | _for_set_b996 (b_ptr, j * ch + k, _for_get(a_ptr + a->step, j * ch + k) - _for_get(a_ptr - a->step, j * ch + k)); \ |
172 | 996 | a_ptr += a->step; \ |
173 | 996 | b_ptr += db->step; \ |
174 | 996 | } \ |
175 | 1.00k | for (j = 0; j < a->cols; j++1.00k ) \ |
176 | 2.00k | for (k = 0; 1.00k k < ch; k++1.00k ) \ |
177 | 1.00k | _for_set_b2 (b_ptr, j * ch + k, _for_get(a_ptr, j * ch + k) - _for_get(a_ptr - a->step, j * ch + k)); \ |
178 | 2 | b_ptr = db->data.u8; \ |
179 | 1.00k | for (i = 0; i < a->rows; i++1.00k ) \ |
180 | 1.00k | { \ |
181 | 2.00k | for (k = 0; k < ch; k++1.00k ) \ |
182 | 1.00k | _for_set_b(buf, k, _for_get_b(b_ptr, ch + k) + 3 * _for_get_b(b_ptr, k)); \ |
183 | 499k | for (j = 1; j < a->cols - 1; j++498k ) \ |
184 | 996k | for (k = 0; 498k k < ch; k++498k ) \ |
185 | 498k | _for_set_b1.00k (buf, j * ch + k, _for_get_b(b_ptr, (j + 1) * ch + k) + 2 * _for_get_b(b_ptr, j * ch + k) + _for_get_b(b_ptr, (j - 1) * ch + k)); \ |
186 | 2.00k | for (k = 0; k < ch; k++1.00k ) \ |
187 | 1.00k | _for_set_b(buf, (a->cols - 1) * ch + k, _for_get_b(b_ptr, (a->cols - 2) * ch + k) + 3 * _for_get_b(b_ptr, (a->cols - 1) * ch + k)); \ |
188 | 1.00k | memcpy(b_ptr, buf, db->step); \ |
189 | 1.00k | b_ptr += db->step; \ |
190 | 1.00k | } |
191 | 2 | ccv_matrix_getter(a->type, ccv_matrix_setter_getter, db->type, for_block); |
192 | 2 | #undef for_block |
193 | 2 | } else { |
194 | | /* general case: in this case, I will generate a separable filter, and do the convolution */ |
195 | 2 | int fsz = ccv_max(dx, dy); |
196 | 2 | assert(fsz % 2 == 1); |
197 | 2 | int hfz = fsz / 2; |
198 | 2 | unsigned char* df = (unsigned char*)alloca(sizeof(double) * fsz); |
199 | 2 | unsigned char* gf = (unsigned char*)alloca(sizeof(double) * fsz); |
200 | | /* the sigma calculation is linear derviation of 3x3 - 0.85, 5x5 - 1.32 */ |
201 | 2 | double sigma = ((fsz - 1) / 2) * 0.47 + 0.38; |
202 | 2 | double sigma2 = (2.0 * sigma * sigma); |
203 | | /* 2.5 is the factor to make the kernel "visible" in integer setting */ |
204 | 2 | double psigma3 = 2.5 / sqrt(sqrt(2 * CCV_PI) * sigma * sigma * sigma); |
205 | 12 | for (i = 0; i < fsz; i++10 ) |
206 | 10 | { |
207 | 10 | ((double*)df)[i] = (i - hfz) * exp(-((i - hfz) * (i - hfz)) / sigma2) * psigma3; |
208 | 10 | ((double*)gf)[i] = exp(-((i - hfz) * (i - hfz)) / sigma2) * psigma3; |
209 | 10 | } |
210 | 2 | if (db->type & CCV_32S) |
211 | 2 | { |
212 | 12 | for (i = 0; i < fsz; i++10 ) |
213 | 10 | { |
214 | | // df could be negative, thus, (int)(x + 0.5) shortcut will not work |
215 | 10 | ((int*)df)[i] = (int)round(((double*)df)[i] * 256.0); |
216 | 10 | ((int*)gf)[i] = (int)(((double*)gf)[i] * 256.0 + 0.5); |
217 | 10 | } |
218 | 2 | } else { |
219 | 0 | for (i = 0; i < fsz; i++) |
220 | 0 | { |
221 | 0 | ccv_set_value(db->type, df, i, ((double*)df)[i], 0); |
222 | 0 | ccv_set_value(db->type, gf, i, ((double*)gf)[i], 0); |
223 | 0 | } |
224 | 0 | } |
225 | 2 | if (dx < dy) |
226 | 1 | { |
227 | 1 | unsigned char* tf = df; |
228 | 1 | df = gf; |
229 | 1 | gf = tf; |
230 | 1 | } |
231 | 2 | unsigned char* buf = (unsigned char*)alloca(sizeof(double) * ch * (fsz + ccv_max(a->rows, a->cols))); |
232 | 2 | #define for_block(_for_get, _for_type_b, _for_set_b, _for_get_b) \ |
233 | 1.00k | for (i = 0; 2 i < a->rows; i++1.00k ) \ |
234 | 1.00k | { \ |
235 | 3.00k | for (j = 0; j < hfz; j++2.00k ) \ |
236 | 4.00k | for (k = 0; 2.00k k < ch; k++2.00k ) \ |
237 | 2.00k | _for_set_b1.00k (buf, j * ch + k, _for_get(a_ptr, k)); \ |
238 | 501k | for (j = 0; j < a->cols; j++500k ) \ |
239 | 1.00M | for (k = 0; 500k k < ch; k++500k ) \ |
240 | 500k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get(a_ptr, j * ch + k)); \ |
241 | 3.00k | for (j = a->cols; j < a->cols + hfz; j++2.00k ) \ |
242 | 4.00k | for (k = 0; 2.00k k < ch; k++2.00k ) \ |
243 | 2.00k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get(a_ptr, (a->cols - 1) * ch + k)); \ |
244 | 501k | for (j = 0; j < a->cols; j++500k ) \ |
245 | 500k | { \ |
246 | 1.00M | for (c = 0; c < ch; c++500k ) \ |
247 | 500k | { \ |
248 | 500k | _for_type_b sum = 0; \ |
249 | 3.00M | for (k = 0; k < fsz; k++2.50M ) \ |
250 | 2.50M | sum += _for_get_b(buf, (j + k) * ch + c) * _for_get_b500k (df, k); \ |
251 | 500k | _for_set_b(b_ptr, j * ch + c, sum, 8); \ |
252 | 500k | } \ |
253 | 500k | } \ |
254 | 1.00k | a_ptr += a->step; \ |
255 | 1.00k | b_ptr += db->step; \ |
256 | 1.00k | } \ |
257 | 2 | b_ptr = db->data.u8; \ |
258 | 1.00k | for (i = 0; i < a->cols; i++1.00k ) \ |
259 | 1.00k | { \ |
260 | 3.00k | for (j = 0; j < hfz; j++2.00k ) \ |
261 | 4.00k | for (k = 0; 2.00k k < ch; k++2.00k ) \ |
262 | 2.00k | _for_set_b1.00k (buf, j * ch + k, _for_get_b(b_ptr, i * ch + k)); \ |
263 | 501k | for (j = 0; j < a->rows; j++500k ) \ |
264 | 1.00M | for (k = 0; 500k k < ch; k++500k ) \ |
265 | 500k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get_b(b_ptr + j * db->step, i * ch + k)); \ |
266 | 3.00k | for (j = a->rows; j < a->rows + hfz; j++2.00k ) \ |
267 | 4.00k | for (k = 0; 2.00k k < ch; k++2.00k ) \ |
268 | 2.00k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get_b(b_ptr + (a->rows - 1) * db->step, i * ch + k)); \ |
269 | 501k | for (j = 0; j < a->rows; j++500k ) \ |
270 | 500k | { \ |
271 | 1.00M | for (c = 0; c < ch; c++500k ) \ |
272 | 500k | { \ |
273 | 500k | _for_type_b sum = 0; \ |
274 | 3.00M | for (k = 0; k < fsz; k++2.50M ) \ |
275 | 2.50M | sum += _for_get_b(buf, (j + k) * ch + c) * _for_get_b500k (gf, k); \ |
276 | 500k | _for_set_b(b_ptr + j * db->step, i * ch + c, sum, 8); \ |
277 | 500k | } \ |
278 | 500k | } \ |
279 | 1.00k | } |
280 | 2 | ccv_matrix_getter(a->type, ccv_matrix_typeof_setter_getter, db->type, for_block); |
281 | 2 | #undef for_block |
282 | 2 | } |
283 | 10 | } |
284 | | |
285 | | /* the fast arctan function adopted from OpenCV */ |
286 | | static void _ccv_atan2(float* x, float* y, float* angle, float* mag, int len) |
287 | 0 | { |
288 | 0 | int i = 0; |
289 | 0 | float scale = (float)(180.0 / CCV_PI); |
290 | 0 | #ifdef HAVE_SSE2 |
291 | 0 | #ifndef _WIN32 |
292 | 0 | union { int i; float fl; } iabsmask; iabsmask.i = 0x7fffffff; |
293 | 0 | __m128 eps = _mm_set1_ps((float)1e-6), absmask = _mm_set1_ps(iabsmask.fl); |
294 | 0 | __m128 _90 = _mm_set1_ps((float)(3.141592654 * 0.5)), _180 = _mm_set1_ps((float)3.141592654), _360 = _mm_set1_ps((float)(3.141592654 * 2)); |
295 | 0 | __m128 zero = _mm_setzero_ps(), _0_28 = _mm_set1_ps(0.28f), scale4 = _mm_set1_ps(scale); |
296 | | |
297 | 0 | for(; i <= len - 4; i += 4) |
298 | 0 | { |
299 | 0 | __m128 x4 = _mm_loadu_ps(x + i), y4 = _mm_loadu_ps(y + i); |
300 | 0 | __m128 xq4 = _mm_mul_ps(x4, x4), yq4 = _mm_mul_ps(y4, y4); |
301 | 0 | __m128 xly = _mm_cmplt_ps(xq4, yq4); |
302 | 0 | __m128 z4 = _mm_div_ps(_mm_mul_ps(x4, y4), _mm_add_ps(_mm_add_ps(_mm_max_ps(xq4, yq4), _mm_mul_ps(_mm_min_ps(xq4, yq4), _0_28)), eps)); |
303 | | |
304 | | // a4 <- x < y ? 90 : 0; |
305 | 0 | __m128 a4 = _mm_and_ps(xly, _90); |
306 | | // a4 <- (y < 0 ? 360 - a4 : a4) == ((x < y ? y < 0 ? 270 : 90) : (y < 0 ? 360 : 0)) |
307 | 0 | __m128 mask = _mm_cmplt_ps(y4, zero); |
308 | 0 | a4 = _mm_or_ps(_mm_and_ps(_mm_sub_ps(_360, a4), mask), _mm_andnot_ps(mask, a4)); |
309 | | // a4 <- (x < 0 && !(x < y) ? 180 : a4) |
310 | 0 | mask = _mm_andnot_ps(xly, _mm_cmplt_ps(x4, zero)); |
311 | 0 | a4 = _mm_or_ps(_mm_and_ps(_180, mask), _mm_andnot_ps(mask, a4)); |
312 | | |
313 | | // a4 <- (x < y ? a4 - z4 : a4 + z4) |
314 | 0 | a4 = _mm_mul_ps(_mm_add_ps(_mm_xor_ps(z4, _mm_andnot_ps(absmask, xly)), a4), scale4); |
315 | 0 | __m128 m4 = _mm_sqrt_ps(_mm_add_ps(xq4, yq4)); |
316 | 0 | _mm_storeu_ps(angle + i, a4); |
317 | 0 | _mm_storeu_ps(mag + i, m4); |
318 | 0 | } |
319 | 0 | #endif |
320 | 0 | #endif |
321 | 0 | for(; i < len; i++) |
322 | 0 | { |
323 | 0 | float xf = x[i], yf = y[i]; |
324 | 0 | float a, x2 = xf * xf, y2 = yf * yf; |
325 | 0 | if(y2 <= x2) |
326 | 0 | a = xf * yf / (x2 + 0.28f * y2 + (float)1e-6) + (float)(xf < 0 ? CCV_PI : yf >= 0 ? 0 : CCV_PI * 2); |
327 | 0 | else |
328 | 0 | a = (float)(yf >= 0 ? CCV_PI * 0.5 : CCV_PI * 1.5) - xf * yf / (y2 + 0.28f * x2 + (float)1e-6); |
329 | 0 | angle[i] = a * scale; |
330 | 0 | mag[i] = sqrtf(x2 + y2); |
331 | 0 | } |
332 | 0 | } |
333 | | |
334 | | void ccv_gradient(ccv_dense_matrix_t* a, ccv_dense_matrix_t** theta, int ttype, ccv_dense_matrix_t** m, int mtype, int dx, int dy) |
335 | 0 | { |
336 | 0 | ccv_declare_derived_signature(tsig, a->sig != 0, ccv_sign_with_format(64, "ccv_gradient(theta,%d,%d)", dx, dy), a->sig, CCV_EOF_SIGN); |
337 | 0 | ccv_declare_derived_signature(msig, a->sig != 0, ccv_sign_with_format(64, "ccv_gradient(m,%d,%d)", dx, dy), a->sig, CCV_EOF_SIGN); |
338 | 0 | int ch = CCV_GET_CHANNEL(a->type); |
339 | 0 | ccv_dense_matrix_t* dtheta = *theta = ccv_dense_matrix_renew(*theta, a->rows, a->cols, CCV_32F | ch, CCV_32F | ch, tsig); |
340 | 0 | ccv_dense_matrix_t* dm = *m = ccv_dense_matrix_renew(*m, a->rows, a->cols, CCV_32F | ch, CCV_32F | ch, msig); |
341 | 0 | assert(dtheta && dm); |
342 | 0 | ccv_object_return_if_cached(, dtheta, dm); |
343 | 0 | ccv_revive_object_if_cached(dtheta, dm); |
344 | 0 | ccv_dense_matrix_t* tx = 0; |
345 | 0 | ccv_dense_matrix_t* ty = 0; |
346 | 0 | ccv_sobel(a, &tx, CCV_32F | ch, dx, 0); |
347 | 0 | ccv_sobel(a, &ty, CCV_32F | ch, 0, dy); |
348 | 0 | _ccv_atan2(tx->data.f32, ty->data.f32, dtheta->data.f32, dm->data.f32, ch * a->rows * a->cols); |
349 | 0 | ccv_matrix_free(tx); |
350 | 0 | ccv_matrix_free(ty); |
351 | 0 | } |
352 | | |
353 | | static void _ccv_flip_y_self(ccv_dense_matrix_t* a) |
354 | 2 | { |
355 | 2 | int i; |
356 | 2 | unsigned char* buffer = (unsigned char*)alloca(a->step); |
357 | 2 | unsigned char* a_ptr = a->data.u8; |
358 | 2 | unsigned char* b_ptr = a->data.u8 + (a->rows - 1) * a->step; |
359 | 502 | for (i = 0; i < a->rows / 2; i++500 ) |
360 | 500 | { |
361 | 500 | memcpy(buffer, a_ptr, a->step); |
362 | 500 | memcpy(a_ptr, b_ptr, a->step); |
363 | 500 | memcpy(b_ptr, buffer, a->step); |
364 | 500 | a_ptr += a->step; |
365 | 500 | b_ptr -= a->step; |
366 | 500 | } |
367 | 2 | } |
368 | | |
369 | | static void _ccv_flip_x_self(ccv_dense_matrix_t* a) |
370 | 75.1k | { |
371 | 75.1k | int i, j; |
372 | 75.1k | int len = CCV_GET_DATA_TYPE_SIZE(a->type) * CCV_GET_CHANNEL(a->type); |
373 | 75.1k | unsigned char* buffer = (unsigned char*)alloca(len); |
374 | 75.1k | unsigned char* a_ptr = a->data.u8; |
375 | 2.48M | for (i = 0; i < a->rows; i++2.40M ) |
376 | 2.40M | { |
377 | 41.1M | for (j = 0; j < a->cols / 2; j++38.7M ) |
378 | 38.7M | { |
379 | 38.7M | memcpy(buffer, a_ptr + j * len, len); |
380 | 38.7M | memcpy(a_ptr + j * len, a_ptr + (a->cols - 1 - j) * len, len); |
381 | 38.7M | memcpy(a_ptr + (a->cols - 1 - j) * len, buffer, len); |
382 | 38.7M | } |
383 | 2.40M | a_ptr += a->step; |
384 | 2.40M | } |
385 | 75.1k | } |
386 | | |
387 | | void ccv_flip(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int btype, int type) |
388 | 75.1k | { |
389 | | /* this is the special case where ccv_declare_derived_signature_* macros cannot handle properly */ |
390 | 75.1k | uint64_t sig = a->sig; |
391 | 75.1k | if (type & CCV_FLIP_Y) |
392 | 2 | sig = (a->sig == 0) ? 00 : ccv_cache_generate_signature("ccv_flip_y", 10, sig, CCV_EOF_SIGN); |
393 | 75.1k | if (type & CCV_FLIP_X) |
394 | 75.1k | sig = (a->sig == 0) ? 075.1k : ccv_cache_generate_signature("ccv_flip_x", 10, sig, 2 CCV_EOF_SIGN2 ); |
395 | 75.1k | ccv_dense_matrix_t* db; |
396 | 75.1k | if (b == 0) |
397 | 0 | { |
398 | 0 | db = a; |
399 | 0 | if (a->sig != 0) |
400 | 0 | { |
401 | 0 | btype = CCV_GET_DATA_TYPE(a->type) | CCV_GET_CHANNEL(a->type); |
402 | 0 | sig = ccv_cache_generate_signature((const char*)&btype, sizeof(int), sig, CCV_EOF_SIGN); |
403 | 0 | a->sig = sig; |
404 | 0 | } |
405 | 75.1k | } else { |
406 | 75.1k | btype = CCV_GET_DATA_TYPE(a->type) | CCV_GET_CHANNEL(a->type); |
407 | 75.1k | *b = db = ccv_dense_matrix_renew(*b, a->rows, a->cols, btype, btype, sig); |
408 | 75.1k | ccv_object_return_if_cached(, db); |
409 | 75.1k | if (a->data.u8 != db->data.u8) |
410 | 3 | memcpy(db->data.u8, a->data.u8, a->rows * a->step); |
411 | 75.1k | } |
412 | 75.1k | if (type & CCV_FLIP_Y) |
413 | 2 | _ccv_flip_y_self(db); |
414 | 75.1k | if (type & CCV_FLIP_X) |
415 | 75.1k | _ccv_flip_x_self(db); |
416 | 75.1k | } |
417 | | |
418 | | void ccv_blur(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type, double sigma) |
419 | 1 | { |
420 | 1 | ccv_declare_derived_signature(sig, a->sig != 0, ccv_sign_with_format(64, "ccv_blur(%la)", sigma), a->sig, CCV_EOF_SIGN); |
421 | 1 | type = (type == 0) ? CCV_GET_DATA_TYPE(a->type) | CCV_GET_CHANNEL(a->type) : CCV_GET_DATA_TYPE0 (type) | 0 CCV_GET_CHANNEL0 (a->type); |
422 | 1 | ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, a->rows, a->cols, CCV_ALL_DATA_TYPE | CCV_GET_CHANNEL(a->type), type, sig); |
423 | 1 | ccv_object_return_if_cached(, db); |
424 | 1 | int fsz = ccv_max(1, (int)(4.0 * sigma + 1.0 - 1e-8)) * 2 + 1; |
425 | 1 | int hfz = fsz / 2; |
426 | 1 | assert(hfz > 0); |
427 | 1 | unsigned char* buf = (unsigned char*)alloca(sizeof(double) * ccv_max(hfz * 2 + a->rows, (hfz * 2 + a->cols) * CCV_GET_CHANNEL(a->type))); |
428 | 1 | unsigned char* filter = (unsigned char*)alloca(sizeof(double) * fsz); |
429 | 1 | double tw = 0; |
430 | 1 | int i, j, k, ch = CCV_GET_CHANNEL(a->type); |
431 | 1 | assert(fsz > 0); |
432 | 28 | for (i = 0; 1 i < fsz; i++27 ) |
433 | 27 | tw += ((double*)filter)[i] = exp(-((i - hfz) * (i - hfz)) / (2.0 * sigma * sigma)); |
434 | 1 | int no_8u_type = (db->type & CCV_8U) ? CCV_32S : db->type0 ; |
435 | 1 | if (no_8u_type & CCV_32S) |
436 | 1 | { |
437 | 1 | tw = 256.0 / tw; |
438 | 28 | for (i = 0; i < fsz; i++27 ) |
439 | 27 | ((int*)filter)[i] = (int)(((double*)filter)[i] * tw + 0.5); |
440 | 1 | } else { |
441 | 0 | tw = 1.0 / tw; |
442 | 0 | for (i = 0; i < fsz; i++) |
443 | 0 | ccv_set_value(no_8u_type, filter, i, ((double*)filter)[i] * tw, 0); |
444 | 0 | } |
445 | | /* horizontal */ |
446 | 1 | unsigned char* a_ptr = a->data.u8; |
447 | 1 | unsigned char* b_ptr = db->data.u8; |
448 | 1 | assert(ch > 0); |
449 | 1 | #define for_block(_for_type, _for_set_b, _for_get_b, _for_set_a, _for_get_a) \ |
450 | 601 | for (i = 0; 1 i < a->rows; i++600 ) \ |
451 | 600 | { \ |
452 | 8.40k | for (j = 0; j < hfz; j++7.80k ) \ |
453 | 31.2k | for (k = 0; 7.80k k < ch; k++23.4k ) \ |
454 | 7.80k | _for_set_b600 (buf, j * ch + k, _for_get_a(a_ptr, k)); \ |
455 | 1.44M | for (j = 0; j < a->cols * ch; j++1.44M ) \ |
456 | 600 | _for_set_b(buf, j + hfz * ch, _for_get_a(a_ptr, j)); \ |
457 | 8.40k | for (j = a->cols; j < hfz + a->cols; j++7.80k ) \ |
458 | 31.2k | for (k = 0; 7.80k k < ch; k++23.4k ) \ |
459 | 7.80k | _for_set_b600 (buf, j * ch + hfz * ch + k, _for_get_a(a_ptr, (a->cols - 1) * ch + k)); \ |
460 | 1.44M | for (j = 0; j < a->cols * ch; j++1.44M ) \ |
461 | 1.44M | { \ |
462 | 1.44M | _for_type sum = 0; \ |
463 | 40.3M | for (k = 0; k < fsz; k++38.8M ) \ |
464 | 38.8M | sum += _for_get_b(buf, k * ch + j) * _for_get_b1.44M (filter, k); \ |
465 | 1.44M | _for_set_b(buf, j, sum, 8); \ |
466 | 1.44M | } \ |
467 | 1.44M | for (j = 0; j < a->cols * ch; j++1.44M ) \ |
468 | 600 | _for_set_a(b_ptr, j, _for_get_b(buf, j)); \ |
469 | 600 | a_ptr += a->step; \ |
470 | 600 | b_ptr += db->step; \ |
471 | 600 | } |
472 | 1 | ccv_matrix_typeof_setter_getter(no_8u_type, ccv_matrix_setter, db->type, ccv_matrix_getter, a->type, for_block); |
473 | 1 | #undef for_block |
474 | | /* vertical */ |
475 | 1 | b_ptr = db->data.u8; |
476 | 1 | #define for_block(_for_type, _for_set_b, _for_get_b, _for_set_a, _for_get_a) \ |
477 | 2.40k | for (i = 0; 1 i < a->cols * ch; i++2.40k ) \ |
478 | 2.40k | { \ |
479 | 33.6k | for (j = 0; j < hfz; j++31.2k ) \ |
480 | 2.40k | _for_set_b(buf, j, _for_get_a(b_ptr, i)); \ |
481 | 1.44M | for (j = 0; j < a->rows; j++1.44M ) \ |
482 | 2.40k | _for_set_b(buf, j + hfz, _for_get_a(b_ptr + j * db->step, i)); \ |
483 | 33.6k | for (j = a->rows; j < hfz + a->rows; j++31.2k ) \ |
484 | 2.40k | _for_set_b(buf, j + hfz, _for_get_a(b_ptr + (a->rows - 1) * db->step, i)); \ |
485 | 1.44M | for (j = 0; j < a->rows; j++1.44M ) \ |
486 | 1.44M | { \ |
487 | 1.44M | _for_type sum = 0; \ |
488 | 40.3M | for (k = 0; k < fsz; k++38.8M ) \ |
489 | 38.8M | sum += _for_get_b(buf, k + j) * _for_get_b1.44M (filter, k); \ |
490 | 1.44M | _for_set_b(buf, j, sum, 8); \ |
491 | 1.44M | } \ |
492 | 1.44M | for (j = 0; j < a->rows; j++1.44M ) \ |
493 | 2.40k | _for_set_a(b_ptr + j * db->step, i, _for_get_b(buf, j)); \ |
494 | 2.40k | } |
495 | 1 | ccv_matrix_typeof_setter_getter(no_8u_type, ccv_matrix_setter_getter, db->type, for_block); |
496 | 1 | #undef for_block |
497 | 1 | } |
498 | | |
499 | | void ccv_erode(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type, int fsz) |
500 | 2 | { |
501 | 2 | ccv_declare_derived_signature(sig, a->sig != 0, ccv_sign_with_format(64, "ccv_erode(%d)", fsz), a->sig, CCV_EOF_SIGN); |
502 | 2 | type = (type == 0) ? CCV_GET_DATA_TYPE(a->type) | CCV_GET_CHANNEL(a->type) : CCV_GET_DATA_TYPE0 (type) | 0 CCV_GET_CHANNEL0 (a->type); |
503 | 2 | ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, a->rows, a->cols, CCV_GET_CHANNEL(a->type) | CCV_ALL_DATA_TYPE, type, sig); |
504 | 2 | ccv_object_return_if_cached(, db); |
505 | 2 | int i, j, k, c, ch = CCV_GET_CHANNEL(a->type); |
506 | 2 | unsigned char* buf = (unsigned char*)alloca(sizeof(double) * ch * (fsz + ccv_max(a->rows, a->cols))); |
507 | 2 | const int hfz = fsz / 2; |
508 | 2 | unsigned char* a_ptr = a->data.u8; |
509 | 2 | unsigned char* b_ptr = db->data.u8; |
510 | 2 | #define for_block(_for_get, _for_type_b, _for_set_b, _for_get_b) \ |
511 | 1.00k | for (i = 0; 2 i < a->rows; i++1.00k ) \ |
512 | 1.00k | { \ |
513 | 2.50k | for (j = 0; j < hfz; j++1.50k ) \ |
514 | 6.00k | for (k = 0; 1.50k k < ch; k++4.50k ) \ |
515 | 1.50k | _for_set_b1.00k (buf, j * ch + k, _for_get(a_ptr, k)); \ |
516 | 501k | for (j = 0; j < a->cols; j++500k ) \ |
517 | 2.00M | for (k = 0; 500k k < ch; k++1.50M ) \ |
518 | 500k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get(a_ptr, j * ch + k)); \ |
519 | 2.50k | for (j = a->cols; j < a->cols + hfz; j++1.50k ) \ |
520 | 6.00k | for (k = 0; 1.50k k < ch; k++4.50k ) \ |
521 | 1.50k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get(a_ptr, (a->cols - 1) * ch + k)); \ |
522 | 501k | for (j = 0; j < a->cols; j++500k ) \ |
523 | 500k | { \ |
524 | 2.00M | for (c = 0; c < ch; c++1.50M ) \ |
525 | 1.50M | { \ |
526 | 1.50M | _for_type_b minimum = _for_get_b(buf, j * ch + c); \ |
527 | 6.00M | for (k = 1; k < fsz; k++4.50M ) \ |
528 | 4.50M | minimum = ccv_min(minimum, _for_get_b(buf, (j + k) * ch + c)); \ |
529 | 1.50M | _for_set_b(b_ptr, j * ch + c, minimum); \ |
530 | 1.50M | } \ |
531 | 500k | } \ |
532 | 1.00k | a_ptr += a->step; \ |
533 | 1.00k | b_ptr += db->step; \ |
534 | 1.00k | } \ |
535 | 2 | b_ptr = db->data.u8; \ |
536 | 1.00k | for (i = 0; i < a->cols; i++1.00k ) \ |
537 | 1.00k | { \ |
538 | 2.50k | for (j = 0; j < hfz; j++1.50k ) \ |
539 | 6.00k | for (k = 0; 1.50k k < ch; k++4.50k ) \ |
540 | 1.50k | _for_set_b1.00k (buf, j * ch + k, _for_get_b(b_ptr, i * ch + k)); \ |
541 | 501k | for (j = 0; j < a->rows; j++500k ) \ |
542 | 2.00M | for (k = 0; 500k k < ch; k++1.50M ) \ |
543 | 500k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get_b(b_ptr + j * db->step, i * ch + k)); \ |
544 | 2.50k | for (j = a->rows; j < a->rows + hfz; j++1.50k ) \ |
545 | 6.00k | for (k = 0; 1.50k k < ch; k++4.50k ) \ |
546 | 1.50k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get_b(b_ptr + (a->rows - 1) * db->step, i * ch + k)); \ |
547 | 501k | for (j = 0; j < a->rows; j++500k ) \ |
548 | 500k | { \ |
549 | 2.00M | for (c = 0; c < ch; c++1.50M ) \ |
550 | 1.50M | { \ |
551 | 1.50M | _for_type_b minimum = _for_get_b(buf, j * ch + c); \ |
552 | 6.00M | for (k = 1; k < fsz; k++4.50M ) \ |
553 | 4.50M | minimum = ccv_min(minimum, _for_get_b(buf, (j + k) * ch + c)); \ |
554 | 1.50M | _for_set_b(b_ptr + j * db->step, i * ch + c, minimum); \ |
555 | 1.50M | } \ |
556 | 500k | } \ |
557 | 1.00k | } |
558 | 2 | ccv_matrix_getter(a->type, ccv_matrix_typeof_setter_getter, db->type, for_block); |
559 | 2 | #undef for_block |
560 | 2 | } |
561 | | |
562 | | void ccv_dilate(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type, int fsz) |
563 | 2 | { |
564 | 2 | ccv_declare_derived_signature(sig, a->sig != 0, ccv_sign_with_format(64, "ccv_dilate(%d)", fsz), a->sig, CCV_EOF_SIGN); |
565 | 2 | type = (type == 0) ? CCV_GET_DATA_TYPE(a->type) | CCV_GET_CHANNEL(a->type) : CCV_GET_DATA_TYPE0 (type) | 0 CCV_GET_CHANNEL0 (a->type); |
566 | 2 | ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, a->rows, a->cols, CCV_GET_CHANNEL(a->type) | CCV_ALL_DATA_TYPE, type, sig); |
567 | 2 | ccv_object_return_if_cached(, db); |
568 | 2 | int i, j, k, c, ch = CCV_GET_CHANNEL(a->type); |
569 | 2 | unsigned char* buf = (unsigned char*)alloca(sizeof(double) * ch * (fsz + ccv_max(a->rows, a->cols))); |
570 | 2 | const int hfz = fsz / 2; |
571 | 2 | unsigned char* a_ptr = a->data.u8; |
572 | 2 | unsigned char* b_ptr = db->data.u8; |
573 | 2 | #define for_block(_for_get, _for_type_b, _for_set_b, _for_get_b) \ |
574 | 1.00k | for (i = 0; 2 i < a->rows; i++1.00k ) \ |
575 | 1.00k | { \ |
576 | 2.50k | for (j = 0; j < hfz; j++1.50k ) \ |
577 | 6.00k | for (k = 0; 1.50k k < ch; k++4.50k ) \ |
578 | 1.50k | _for_set_b1.00k (buf, j * ch + k, _for_get(a_ptr, k)); \ |
579 | 501k | for (j = 0; j < a->cols; j++500k ) \ |
580 | 2.00M | for (k = 0; 500k k < ch; k++1.50M ) \ |
581 | 500k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get(a_ptr, j * ch + k)); \ |
582 | 2.50k | for (j = a->cols; j < a->cols + hfz; j++1.50k ) \ |
583 | 6.00k | for (k = 0; 1.50k k < ch; k++4.50k ) \ |
584 | 1.50k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get(a_ptr, (a->cols - 1) * ch + k)); \ |
585 | 501k | for (j = 0; j < a->cols; j++500k ) \ |
586 | 500k | { \ |
587 | 2.00M | for (c = 0; c < ch; c++1.50M ) \ |
588 | 1.50M | { \ |
589 | 1.50M | _for_type_b maximum = _for_get_b(buf, j * ch + c); \ |
590 | 6.00M | for (k = 1; k < fsz; k++4.50M ) \ |
591 | 4.50M | maximum = ccv_max(maximum, _for_get_b(buf, (j + k) * ch + c)); \ |
592 | 1.50M | _for_set_b(b_ptr, j * ch + c, maximum); \ |
593 | 1.50M | } \ |
594 | 500k | } \ |
595 | 1.00k | a_ptr += a->step; \ |
596 | 1.00k | b_ptr += db->step; \ |
597 | 1.00k | } \ |
598 | 2 | b_ptr = db->data.u8; \ |
599 | 1.00k | for (i = 0; i < a->cols; i++1.00k ) \ |
600 | 1.00k | { \ |
601 | 2.50k | for (j = 0; j < hfz; j++1.50k ) \ |
602 | 6.00k | for (k = 0; 1.50k k < ch; k++4.50k ) \ |
603 | 1.50k | _for_set_b1.00k (buf, j * ch + k, _for_get_b(b_ptr, i * ch + k)); \ |
604 | 501k | for (j = 0; j < a->rows; j++500k ) \ |
605 | 2.00M | for (k = 0; 500k k < ch; k++1.50M ) \ |
606 | 500k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get_b(b_ptr + j * db->step, i * ch + k)); \ |
607 | 2.50k | for (j = a->rows; j < a->rows + hfz; j++1.50k ) \ |
608 | 6.00k | for (k = 0; 1.50k k < ch; k++4.50k ) \ |
609 | 1.50k | _for_set_b1.00k (buf, (j + hfz) * ch + k, _for_get_b(b_ptr + (a->rows - 1) * db->step, i * ch + k)); \ |
610 | 501k | for (j = 0; j < a->rows; j++500k ) \ |
611 | 500k | { \ |
612 | 2.00M | for (c = 0; c < ch; c++1.50M ) \ |
613 | 1.50M | { \ |
614 | 1.50M | _for_type_b maximum = _for_get_b(buf, j * ch + c); \ |
615 | 6.00M | for (k = 1; k < fsz; k++4.50M ) \ |
616 | 4.50M | maximum = ccv_max(maximum, _for_get_b(buf, (j + k) * ch + c)); \ |
617 | 1.50M | _for_set_b(b_ptr + j * db->step, i * ch + c, maximum); \ |
618 | 1.50M | } \ |
619 | 500k | } \ |
620 | 1.00k | } |
621 | 2 | ccv_matrix_getter(a->type, ccv_matrix_typeof_setter_getter, db->type, for_block); |
622 | 2 | #undef for_block |
623 | 2 | } |