Coverage Report

Created: 2019-07-03 22:50

/home/liu/buildslave/linux-x64-runtests/build/lib/ccv_basic.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#if defined(HAVE_SSE2)
4
#include <xmmintrin.h>
5
#elif defined(HAVE_NEON)
6
#include <arm_neon.h>
7
#endif
8
9
/* sobel filter is fundamental to many other high-level algorithms,
10
 * here includes 2 special case impl (for 1x3/3x1, 3x3) and one general impl */
11
void ccv_sobel(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type, int dx, int dy)
12
10
{
13
10
  ccv_declare_derived_signature(sig, a->sig != 0, ccv_sign_with_format(64, "ccv_sobel(%d,%d)", dx, dy), a->sig, CCV_EOF_SIGN);
14
10
  type = (type == 0) ? CCV_32S | CCV_GET_CHANNEL(a->type) : 
CCV_GET_DATA_TYPE0
(type) | 0
CCV_GET_CHANNEL0
(a->type);
15
10
  ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, a->rows, a->cols, CCV_GET_CHANNEL(a->type) | CCV_ALL_DATA_TYPE, type, sig);
16
10
  ccv_object_return_if_cached(, db);
17
10
  int i, j, k, c, ch = CCV_GET_CHANNEL(a->type);
18
10
  unsigned char* a_ptr = a->data.u8;
19
10
  unsigned char* b_ptr = db->data.u8;
20
10
  if (dx == 1 && 
dy == 02
)
21
1
  {
22
1
    assert(a->cols >= 3);
23
1
    /* special case 1: 1x3 or 3x1 window */
24
1
#define for_block(_for_get, _for_set) \
25
501
    
for (i = 0; 1
i < a->rows;
i++500
) \
26
500
    { \
27
1.00k
      for (k = 0; k < ch; 
k++500
) \
28
500
        _for_set(b_ptr, k, 2 * (_for_get(a_ptr, ch + k, 0) - _for_get(a_ptr, k, 0)), 0); \
29
249k
      for (j = 1; j < a->cols - 1; 
j++249k
) \
30
498k
        
for (k = 0; 249k
k < ch;
k++249k
) \
31
249k
          _for_set(b_ptr, j * ch + k, _for_get(a_ptr, (j + 1) * ch + k, 0) - _for_get(a_ptr, (j - 1) * ch + k, 0), 0); \
32
1.00k
      for (k = 0; k < ch; 
k++500
) \
33
500
        _for_set(b_ptr, (a->cols - 1) * ch + k, 2 * (_for_get(a_ptr, (a->cols - 1) * ch + k, 0) - _for_get(a_ptr, (a->cols - 2) * ch + k, 0)), 0); \
34
500
      b_ptr += db->step; \
35
500
      a_ptr += a->step; \
36
500
    }
37
1
    ccv_matrix_getter(a->type, ccv_matrix_setter, db->type, for_block);
38
1
#undef for_block
39
9
  } else if (dx == 0 && 
dy == 14
) {
40
1
    assert(a->rows >= 3);
41
1
    /* special case 1: 1x3 or 3x1 window */
42
1
#define for_block(_for_get, _for_set) \
43
501
    
for (j = 0; 1
j < a->cols;
j++500
) \
44
1.00k
      
for (k = 0; 500
k < ch;
k++500
) \
45
500
        _for_set(b_ptr, j * ch + k, 2 * (_for_get(a_ptr + a->step, j * ch + k, 0) - _for_get(a_ptr, j * ch + k, 0)), 0); \
46
1
    a_ptr += a->step; \
47
1
    b_ptr += db->step; \
48
499
    for (i = 1; i < a->rows - 1; 
i++498
) \
49
498
    { \
50
249k
      for (j = 0; j < a->cols; 
j++249k
) \
51
498k
        
for (k = 0; 249k
k < ch;
k++249k
) \
52
249k
          _for_set(b_ptr, j * ch + k, _for_get(a_ptr + a->step, j * ch + k, 0) - _for_get(a_ptr - a->step, j * ch + k, 0), 0); \
53
498
      a_ptr += a->step; \
54
498
      b_ptr += db->step; \
55
498
    } \
56
501
    for (j = 0; j < a->cols; 
j++500
) \
57
1.00k
      
for (k = 0; 500
k < ch;
k++500
) \
58
500
        _for_set(b_ptr, j * ch + k, 2 * (_for_get(a_ptr, j * ch + k, 0) - _for_get(a_ptr - a->step, j * ch + k, 0)), 0);
59
1
    ccv_matrix_getter(a->type, ccv_matrix_setter, db->type, for_block);
60
1
#undef for_block
61
8
  } else if ((dx == 1 && 
dy == 11
) ||
(7
dx == -17
&&
dy == -11
)) {
62
1
    /* special case 2: 3x3 window with diagonal direction */
63
1
    assert(a->rows >= 3 && a->cols >= 3);
64
1
#define for_block(_for_get, _for_set) \
65
500
    
for (j = 0; 1
j < a->cols - 1;
j++499
) \
66
998
      
for (k = 0; 499
k < ch;
k++499
) \
67
499
        _for_set(b_ptr, j * ch + k, 2 * (_for_get(a_ptr + a->step, (j + 1) * ch + k, 0) - _for_get(a_ptr, j * ch + k, 0)), 0); \
68
2
    for (k = 0; k < ch; 
k++1
) \
69
1
      _for_set(b_ptr, (a->cols - 1) * ch + k, 2 * (_for_get(a_ptr + a->step, (a->cols - 1) * ch + k, 0) - _for_get(a_ptr, (a->cols - 1) * ch + k, 0)), 0); \
70
1
    a_ptr += a->step; \
71
1
    b_ptr += db->step; \
72
499
    for (i = 1; i < a->rows - 1; 
i++498
) \
73
498
    { \
74
996
      for (k = 0; k < ch; 
k++498
) \
75
498
        _for_set(b_ptr, k, 2 * (_for_get(a_ptr + a->step, ch + k, 0) - _for_get(a_ptr, k, 0)), 0); \
76
248k
      for (j = 1; j < a->cols - 1; 
j++248k
) \
77
496k
        
for (k = 0; 248k
k < ch;
k++248k
) \
78
248k
          _for_set(b_ptr, j * ch + k, _for_get(a_ptr + a->step, (j + 1) * ch + k, 0) - _for_get(a_ptr - a->step, (j - 1) * ch + k, 0), 0); \
79
996
      for (k = 0; k < ch; 
k++498
) \
80
498
        _for_set(b_ptr, (a->cols - 1) * ch + k, 2 * (_for_get(a_ptr, (a->cols - 1) * ch + k, 0) - _for_get(a_ptr - a->step, (a->cols - 2) * ch + k, 0)), 0); \
81
498
      a_ptr += a->step; \
82
498
      b_ptr += db->step; \
83
498
    } \
84
2
    for (k = 0; k < ch; 
k++1
) \
85
1
      _for_set(b_ptr, k, 2 * (_for_get(a_ptr, k, 0) - _for_get(a_ptr - a->step, k, 0)), 0); \
86
500
    for (j = 1; j < a->cols; 
j++499
) \
87
998
      
for (k = 0; 499
k < ch;
k++499
) \
88
499
        _for_set(b_ptr, j * ch + k, 2 * (_for_get(a_ptr, j * ch + k, 0) - _for_get(a_ptr - a->step, (j - 1) * ch + k, 0)), 0);
89
1
    ccv_matrix_getter(a->type, ccv_matrix_setter, db->type, for_block);
90
1
#undef for_block
91
7
  } else if ((dx == 1 && 
dy == -10
) || (dx == -1 &&
dy == 11
)) {
92
1
    /* special case 2: 3x3 window with diagonal direction */
93
1
    assert(a->rows >= 3 && a->cols >= 3);
94
1
#define for_block(_for_get, _for_set) \
95
2
    
for (k = 0; 1
k < ch;
k++1
) \
96
1
      _for_set(b_ptr, k, 2 * (_for_get(a_ptr + a->step, k, 0) - _for_get(a_ptr, k, 0)), 0); \
97
500
    for (j = 1; j < a->cols; 
j++499
) \
98
998
      
for (k = 0; 499
k < ch;
k++499
) \
99
499
        _for_set(b_ptr, j * ch + k, 2 * (_for_get(a_ptr + a->step, (j - 1) * ch + k, 0) - _for_get(a_ptr, j * ch + k, 0)), 0); \
100
1
    a_ptr += a->step; \
101
1
    b_ptr += db->step; \
102
499
    for (i = 1; i < a->rows - 1; 
i++498
) \
103
498
    { \
104
996
      for (k = 0; k < ch; 
k++498
) \
105
498
        _for_set(b_ptr, k, 2 * (_for_get(a_ptr, k, 0) - _for_get(a_ptr - a->step, ch + k, 0)), 0); \
106
248k
      for (j = 1; j < a->cols - 1; 
j++248k
) \
107
496k
        
for (k = 0; 248k
k < ch;
k++248k
) \
108
248k
          _for_set(b_ptr, j * ch + k, _for_get(a_ptr + a->step, (j - 1) * ch + k, 0) - _for_get(a_ptr - a->step, (j + 1) * ch + k, 0), 0); \
109
996
      for (k = 0; k < ch; 
k++498
) \
110
498
        _for_set(b_ptr, (a->cols - 1) * ch + k, 2 * (_for_get(a_ptr + a->step, (a->cols - 2) * ch + k, 0) - _for_get(a_ptr, (a->cols - 1) * ch + k, 0)), 0); \
111
498
      a_ptr += a->step; \
112
498
      b_ptr += db->step; \
113
498
    } \
114
500
    for (j = 0; j < a->cols - 1; 
j++499
) \
115
998
      
for (k = 0; 499
k < ch;
k++499
) \
116
499
        _for_set(b_ptr, j * ch + k, 2 * (_for_get(a_ptr, j * ch + k, 0) - _for_get(a_ptr - a->step, (j + 1) * ch + k, 0)), 0); \
117
2
    for (k = 0; k < ch; 
k++1
) \
118
1
      _for_set(b_ptr, (a->cols - 1) * ch + k, 2 * (_for_get(a_ptr, (a->cols - 1) * ch + k, 0) - _for_get(a_ptr - a->step, (a->cols - 1) * ch + k, 0)), 0);
119
1
    ccv_matrix_getter(a->type, ccv_matrix_setter, db->type, for_block);
120
1
#undef for_block
121
6
  } else if (dx == 3 && 
dy == 02
) {
122
2
    assert(a->rows >= 3 && a->cols >= 3);
123
2
    /* special case 3: 3x3 window, corresponding sigma = 0.85 */
124
2
    unsigned char* buf = (unsigned char*)alloca(db->step);
125
2
#define for_block(_for_get, _for_set_b, _for_get_b) \
126
1.00k
    
for (j = 0; 2
j < a->cols;
j++1.00k
) \
127
2.00k
      
for (k = 0; 1.00k
k < ch;
k++1.00k
) \
128
1.00k
        _for_set_b(b_ptr, j * ch + k, _for_get(a_ptr + a->step, j * ch + k, 0) + 3 * _for_get(a_ptr, j * ch + k, 0), 0); \
129
2
    a_ptr += a->step; \
130
2
    b_ptr += db->step; \
131
998
    for (i = 1; i < a->rows - 1; 
i++996
) \
132
996
    { \
133
498k
      for (j = 0; j < a->cols; 
j++498k
) \
134
996k
        
for (k = 0; 498k
k < ch;
k++498k
) \
135
498k
          _for_set_b(b_ptr, j * ch + k, _for_get(a_ptr + a->step, j * ch + k, 0) + 2 * _for_get(a_ptr, j * ch + k, 0) + _for_get(a_ptr - a->step, j * ch + k, 0), 0); \
136
996
      a_ptr += a->step; \
137
996
      b_ptr += db->step; \
138
996
    } \
139
1.00k
    for (j = 0; j < a->cols; 
j++1.00k
) \
140
2.00k
      
for (k = 0; 1.00k
k < ch;
k++1.00k
) \
141
1.00k
        _for_set_b(b_ptr, j * ch + k, 3 * _for_get(a_ptr, j * ch + k, 0) + _for_get(a_ptr - a->step, j * ch + k, 0), 0); \
142
2
    b_ptr = db->data.u8; \
143
1.00k
    for (i = 0; i < a->rows; 
i++1.00k
) \
144
1.00k
    { \
145
2.00k
      for (k = 0; k < ch; 
k++1.00k
) \
146
1.00k
        _for_set_b(buf, k, _for_get_b(b_ptr, ch + k, 0) - _for_get_b(b_ptr, k, 0), 0); \
147
499k
      for (j = 1; j < a->cols - 1; 
j++498k
) \
148
996k
        
for (k = 0; 498k
k < ch;
k++498k
) \
149
498k
          _for_set_b(buf, j * ch + k, _for_get_b(b_ptr, (j + 1) * ch + k, 0) - _for_get_b(b_ptr, (j - 1) * ch + k, 0), 0); \
150
2.00k
      for (k = 0; k < ch; 
k++1.00k
) \
151
1.00k
        _for_set_b(buf, (a->cols - 1) * ch + k, _for_get_b(b_ptr, (a->cols - 1) * ch + k, 0) - _for_get_b(b_ptr, (a->cols - 2) * ch + k, 0), 0); \
152
1.00k
      memcpy(b_ptr, buf, db->step); \
153
1.00k
      b_ptr += db->step; \
154
1.00k
    }
155
2
    ccv_matrix_getter(a->type, ccv_matrix_setter_getter, db->type, for_block);
156
2
#undef for_block
157
4
  } else if (dx == 0 && 
dy == 33
) {
158
2
    assert(a->rows >= 3 && a->cols >= 3);
159
2
    /* special case 3: 3x3 window, corresponding sigma = 0.85 */
160
2
    unsigned char* buf = (unsigned char*)alloca(db->step);
161
2
#define for_block(_for_get, _for_set_b, _for_get_b) \
162
1.00k
    
for (j = 0; 2
j < a->cols;
j++1.00k
) \
163
2.00k
      
for (k = 0; 1.00k
k < ch;
k++1.00k
) \
164
1.00k
        _for_set_b(b_ptr, j * ch + k, _for_get(a_ptr + a->step, j * ch + k, 0) - _for_get(a_ptr, j * ch + k, 0), 0); \
165
2
    a_ptr += a->step; \
166
2
    b_ptr += db->step; \
167
998
    for (i = 1; i < a->rows - 1; 
i++996
) \
168
996
    { \
169
498k
      for (j = 0; j < a->cols; 
j++498k
) \
170
996k
        
for (k = 0; 498k
k < ch;
k++498k
) \
171
498k
          _for_set_b(b_ptr, j * ch + k, _for_get(a_ptr + a->step, j * ch + k, 0) - _for_get(a_ptr - a->step, j * ch + k, 0), 0); \
172
996
      a_ptr += a->step; \
173
996
      b_ptr += db->step; \
174
996
    } \
175
1.00k
    for (j = 0; j < a->cols; 
j++1.00k
) \
176
2.00k
      
for (k = 0; 1.00k
k < ch;
k++1.00k
) \
177
1.00k
        _for_set_b(b_ptr, j * ch + k, _for_get(a_ptr, j * ch + k, 0) - _for_get(a_ptr - a->step, j * ch + k, 0), 0); \
178
2
    b_ptr = db->data.u8; \
179
1.00k
    for (i = 0; i < a->rows; 
i++1.00k
) \
180
1.00k
    { \
181
2.00k
      for (k = 0; k < ch; 
k++1.00k
) \
182
1.00k
        _for_set_b(buf, k, _for_get_b(b_ptr, ch + k, 0) + 3 * _for_get_b(b_ptr, k, 0), 0); \
183
499k
      for (j = 1; j < a->cols - 1; 
j++498k
) \
184
996k
        
for (k = 0; 498k
k < ch;
k++498k
) \
185
498k
          _for_set_b(buf, j * ch + k, _for_get_b(b_ptr, (j + 1) * ch + k, 0) + 2 * _for_get_b(b_ptr, j * ch + k, 0) + _for_get_b(b_ptr, (j - 1) * ch + k, 0), 0); \
186
2.00k
      for (k = 0; k < ch; 
k++1.00k
) \
187
1.00k
        _for_set_b(buf, (a->cols - 1) * ch + k, _for_get_b(b_ptr, (a->cols - 2) * ch + k, 0) + 3 * _for_get_b(b_ptr, (a->cols - 1) * ch + k, 0), 0); \
188
1.00k
      memcpy(b_ptr, buf, db->step); \
189
1.00k
      b_ptr += db->step; \
190
1.00k
    }
191
2
    ccv_matrix_getter(a->type, ccv_matrix_setter_getter, db->type, for_block);
192
2
#undef for_block
193
2
  } else {
194
2
    /* general case: in this case, I will generate a separable filter, and do the convolution */
195
2
    int fsz = ccv_max(dx, dy);
196
2
    assert(fsz % 2 == 1);
197
2
    int hfz = fsz / 2;
198
2
    unsigned char* df = (unsigned char*)alloca(sizeof(double) * fsz);
199
2
    unsigned char* gf = (unsigned char*)alloca(sizeof(double) * fsz);
200
2
    /* the sigma calculation is linear derviation of 3x3 - 0.85, 5x5 - 1.32 */
201
2
    double sigma = ((fsz - 1) / 2) * 0.47 + 0.38;
202
2
    double sigma2 = (2.0 * sigma * sigma);
203
2
    /* 2.5 is the factor to make the kernel "visible" in integer setting */
204
2
    double psigma3 = 2.5 / sqrt(sqrt(2 * CCV_PI) * sigma * sigma * sigma);
205
12
    for (i = 0; i < fsz; 
i++10
)
206
10
    {
207
10
      ((double*)df)[i] = (i - hfz) * exp(-((i - hfz) * (i - hfz)) / sigma2) * psigma3;
208
10
      ((double*)gf)[i] = exp(-((i - hfz) * (i - hfz)) / sigma2) * psigma3;
209
10
    }
210
2
    if (db->type & CCV_32S)
211
2
    {
212
12
      for (i = 0; i < fsz; 
i++10
)
213
10
      {
214
10
        // df could be negative, thus, (int)(x + 0.5) shortcut will not work
215
10
        ((int*)df)[i] = (int)round(((double*)df)[i] * 256.0);
216
10
        ((int*)gf)[i] = (int)(((double*)gf)[i] * 256.0 + 0.5);
217
10
      }
218
2
    } else {
219
0
      for (i = 0; i < fsz; i++)
220
0
      {
221
0
        ccv_set_value(db->type, df, i, ((double*)df)[i], 0);
222
0
        ccv_set_value(db->type, gf, i, ((double*)gf)[i], 0);
223
0
      }
224
0
    }
225
2
    if (dx < dy)
226
1
    {
227
1
      unsigned char* tf = df;
228
1
      df = gf;
229
1
      gf = tf;
230
1
    }
231
2
    unsigned char* buf = (unsigned char*)alloca(sizeof(double) * ch * (fsz + ccv_max(a->rows, a->cols)));
232
2
#define for_block(_for_get, _for_type_b, _for_set_b, _for_get_b) \
233
1.00k
    
for (i = 0; 2
i < a->rows;
i++1.00k
) \
234
1.00k
    { \
235
3.00k
      for (j = 0; j < hfz; 
j++2.00k
) \
236
4.00k
        
for (k = 0; 2.00k
k < ch;
k++2.00k
) \
237
2.00k
          _for_set_b(buf, j * ch + k, _for_get(a_ptr, k, 0), 0); \
238
501k
      for (j = 0; j < a->cols; 
j++500k
) \
239
1.00M
        
for (k = 0; 500k
k < ch;
k++500k
) \
240
500k
          _for_set_b(buf, (j + hfz) * ch + k, _for_get(a_ptr, j * ch + k, 0), 0); \
241
3.00k
      for (j = a->cols; j < a->cols + hfz; 
j++2.00k
) \
242
4.00k
        
for (k = 0; 2.00k
k < ch;
k++2.00k
) \
243
2.00k
          _for_set_b(buf, (j + hfz) * ch + k, _for_get(a_ptr, (a->cols - 1) * ch + k, 0), 0); \
244
501k
      for (j = 0; j < a->cols; 
j++500k
) \
245
500k
      { \
246
1.00M
        for (c = 0; c < ch; 
c++500k
) \
247
500k
        { \
248
500k
          _for_type_b sum = 0; \
249
3.00M
          for (k = 0; k < fsz; 
k++2.50M
) \
250
2.50M
            sum += _for_get_b(buf, (j + k) * ch + c, 0) * _for_get_b(df, k, 0); \
251
500k
          _for_set_b(b_ptr, j * ch + c, sum, 8); \
252
500k
        } \
253
500k
      } \
254
1.00k
      a_ptr += a->step; \
255
1.00k
      b_ptr += db->step; \
256
1.00k
    } \
257
2
    b_ptr = db->data.u8; \
258
1.00k
    for (i = 0; i < a->cols; 
i++1.00k
) \
259
1.00k
    { \
260
3.00k
      for (j = 0; j < hfz; 
j++2.00k
) \
261
4.00k
        
for (k = 0; 2.00k
k < ch;
k++2.00k
) \
262
2.00k
          _for_set_b(buf, j * ch + k, _for_get_b(b_ptr, i * ch + k, 0), 0); \
263
501k
      for (j = 0; j < a->rows; 
j++500k
) \
264
1.00M
        
for (k = 0; 500k
k < ch;
k++500k
) \
265
500k
          _for_set_b(buf, (j + hfz) * ch + k, _for_get_b(b_ptr + j * db->step, i * ch + k, 0), 0); \
266
3.00k
      for (j = a->rows; j < a->rows + hfz; 
j++2.00k
) \
267
4.00k
        
for (k = 0; 2.00k
k < ch;
k++2.00k
) \
268
2.00k
          _for_set_b(buf, (j + hfz) * ch + k, _for_get_b(b_ptr + (a->rows - 1) * db->step, i * ch + k, 0), 0); \
269
501k
      for (j = 0; j < a->rows; 
j++500k
) \
270
500k
      { \
271
1.00M
        for (c = 0; c < ch; 
c++500k
) \
272
500k
        { \
273
500k
          _for_type_b sum = 0; \
274
3.00M
          for (k = 0; k < fsz; 
k++2.50M
) \
275
2.50M
            sum += _for_get_b(buf, (j + k) * ch + c, 0) * _for_get_b(gf, k, 0); \
276
500k
          _for_set_b(b_ptr + j * db->step, i * ch + c, sum, 8); \
277
500k
        } \
278
500k
      } \
279
1.00k
    }
280
2
    ccv_matrix_getter(a->type, ccv_matrix_typeof_setter_getter, db->type, for_block);
281
2
#undef for_block
282
2
  }
283
10
}
284
285
/* the fast arctan function adopted from OpenCV */
286
static void _ccv_atan2(float* x, float* y, float* angle, float* mag, int len)
287
0
{
288
0
  int i = 0;
289
0
  float scale = (float)(180.0 / CCV_PI);
290
0
#ifdef HAVE_SSE2
291
0
#ifndef _WIN32
292
0
  union { int i; float fl; } iabsmask; iabsmask.i = 0x7fffffff;
293
0
  __m128 eps = _mm_set1_ps((float)1e-6), absmask = _mm_set1_ps(iabsmask.fl);
294
0
  __m128 _90 = _mm_set1_ps((float)(3.141592654 * 0.5)), _180 = _mm_set1_ps((float)3.141592654), _360 = _mm_set1_ps((float)(3.141592654 * 2));
295
0
  __m128 zero = _mm_setzero_ps(), _0_28 = _mm_set1_ps(0.28f), scale4 = _mm_set1_ps(scale);
296
0
  
297
0
  for(; i <= len - 4; i += 4)
298
0
  {
299
0
    __m128 x4 = _mm_loadu_ps(x + i), y4 = _mm_loadu_ps(y + i);
300
0
    __m128 xq4 = _mm_mul_ps(x4, x4), yq4 = _mm_mul_ps(y4, y4);
301
0
    __m128 xly = _mm_cmplt_ps(xq4, yq4);
302
0
    __m128 z4 = _mm_div_ps(_mm_mul_ps(x4, y4), _mm_add_ps(_mm_add_ps(_mm_max_ps(xq4, yq4), _mm_mul_ps(_mm_min_ps(xq4, yq4), _0_28)), eps));
303
0
304
0
    // a4 <- x < y ? 90 : 0;
305
0
    __m128 a4 = _mm_and_ps(xly, _90);
306
0
    // a4 <- (y < 0 ? 360 - a4 : a4) == ((x < y ? y < 0 ? 270 : 90) : (y < 0 ? 360 : 0))
307
0
    __m128 mask = _mm_cmplt_ps(y4, zero);
308
0
    a4 = _mm_or_ps(_mm_and_ps(_mm_sub_ps(_360, a4), mask), _mm_andnot_ps(mask, a4));
309
0
    // a4 <- (x < 0 && !(x < y) ? 180 : a4)
310
0
    mask = _mm_andnot_ps(xly, _mm_cmplt_ps(x4, zero));
311
0
    a4 = _mm_or_ps(_mm_and_ps(_180, mask), _mm_andnot_ps(mask, a4));
312
0
    
313
0
    // a4 <- (x < y ? a4 - z4 : a4 + z4)
314
0
    a4 = _mm_mul_ps(_mm_add_ps(_mm_xor_ps(z4, _mm_andnot_ps(absmask, xly)), a4), scale4);
315
0
    __m128 m4 = _mm_sqrt_ps(_mm_add_ps(xq4, yq4));
316
0
    _mm_storeu_ps(angle + i, a4);
317
0
    _mm_storeu_ps(mag + i, m4);
318
0
  }
319
0
#endif
320
0
#endif
321
0
  for(; i < len; i++)
322
0
  {
323
0
    float xf = x[i], yf = y[i];
324
0
    float a, x2 = xf * xf, y2 = yf * yf;
325
0
    if(y2 <= x2)
326
0
      a = xf * yf / (x2 + 0.28f * y2 + (float)1e-6) + (float)(xf < 0 ? CCV_PI : yf >= 0 ? 0 : CCV_PI * 2);
327
0
    else
328
0
      a = (float)(yf >= 0 ? CCV_PI * 0.5 : CCV_PI * 1.5) - xf * yf / (y2 + 0.28f * x2 + (float)1e-6);
329
0
    angle[i] = a * scale;
330
0
    mag[i] = sqrtf(x2 + y2);
331
0
  }
332
0
}
333
334
void ccv_gradient(ccv_dense_matrix_t* a, ccv_dense_matrix_t** theta, int ttype, ccv_dense_matrix_t** m, int mtype, int dx, int dy)
335
0
{
336
0
  ccv_declare_derived_signature(tsig, a->sig != 0, ccv_sign_with_format(64, "ccv_gradient(theta,%d,%d)", dx, dy), a->sig, CCV_EOF_SIGN);
337
0
  ccv_declare_derived_signature(msig, a->sig != 0, ccv_sign_with_format(64, "ccv_gradient(m,%d,%d)", dx, dy), a->sig, CCV_EOF_SIGN);
338
0
  int ch = CCV_GET_CHANNEL(a->type);
339
0
  ccv_dense_matrix_t* dtheta = *theta = ccv_dense_matrix_renew(*theta, a->rows, a->cols, CCV_32F | ch, CCV_32F | ch, tsig);
340
0
  ccv_dense_matrix_t* dm = *m = ccv_dense_matrix_renew(*m, a->rows, a->cols, CCV_32F | ch, CCV_32F | ch, msig);
341
0
  assert(dtheta && dm);
342
0
  ccv_object_return_if_cached(, dtheta, dm);
343
0
  ccv_revive_object_if_cached(dtheta, dm);
344
0
  ccv_dense_matrix_t* tx = 0;
345
0
  ccv_dense_matrix_t* ty = 0;
346
0
  ccv_sobel(a, &tx, CCV_32F | ch, dx, 0);
347
0
  ccv_sobel(a, &ty, CCV_32F | ch, 0, dy);
348
0
  _ccv_atan2(tx->data.f32, ty->data.f32, dtheta->data.f32, dm->data.f32, ch * a->rows * a->cols);
349
0
  ccv_matrix_free(tx);
350
0
  ccv_matrix_free(ty);
351
0
}
352
353
static void _ccv_flip_y_self(ccv_dense_matrix_t* a)
354
2
{
355
2
  int i;
356
2
  unsigned char* buffer = (unsigned char*)alloca(a->step);
357
2
  unsigned char* a_ptr = a->data.u8;
358
2
  unsigned char* b_ptr = a->data.u8 + (a->rows - 1) * a->step;
359
502
  for (i = 0; i < a->rows / 2; 
i++500
)
360
500
  {
361
500
    memcpy(buffer, a_ptr, a->step);
362
500
    memcpy(a_ptr, b_ptr, a->step);
363
500
    memcpy(b_ptr, buffer, a->step);
364
500
    a_ptr += a->step;
365
500
    b_ptr -= a->step;
366
500
  }
367
2
}
368
369
static void _ccv_flip_x_self(ccv_dense_matrix_t* a)
370
1.74M
{
371
1.74M
  int i, j;
372
1.74M
  int len = CCV_GET_DATA_TYPE_SIZE(a->type) * CCV_GET_CHANNEL(a->type);
373
1.74M
  unsigned char* buffer = (unsigned char*)alloca(len);
374
1.74M
  unsigned char* a_ptr = a->data.u8;
375
56.5M
  for (i = 0; i < a->rows; 
i++54.8M
)
376
54.8M
  {
377
820M
    for (j = 0; j < a->cols / 2; 
j++765M
)
378
765M
    {
379
765M
      memcpy(buffer, a_ptr + j * len, len);
380
765M
      memcpy(a_ptr + j * len, a_ptr + (a->cols - 1 - j) * len, len);
381
765M
      memcpy(a_ptr + (a->cols - 1 - j) * len, buffer, len);
382
765M
    }
383
54.8M
    a_ptr += a->step;
384
54.8M
  }
385
1.74M
}
386
387
void ccv_flip(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int btype, int type)
388
1.74M
{
389
1.74M
  /* this is the special case where ccv_declare_derived_signature_* macros cannot handle properly */
390
1.74M
  uint64_t sig = a->sig;
391
1.74M
  if (type & CCV_FLIP_Y)
392
2
    sig = (a->sig == 0) ? 
00
: ccv_cache_generate_signature("ccv_flip_y", 10, sig, CCV_EOF_SIGN);
393
1.74M
  if (type & CCV_FLIP_X)
394
1.74M
    sig = (a->sig == 0) ? 
01.74M
:
ccv_cache_generate_signature("ccv_flip_x", 10, sig, 611
CCV_EOF_SIGN611
);
395
1.74M
  ccv_dense_matrix_t* db;
396
1.74M
  if (b == 0)
397
0
  {
398
0
    db = a;
399
0
    if (a->sig != 0)
400
0
    {
401
0
      btype = CCV_GET_DATA_TYPE(a->type) | CCV_GET_CHANNEL(a->type);
402
0
      sig = ccv_cache_generate_signature((const char*)&btype, sizeof(int), sig, CCV_EOF_SIGN);
403
0
      a->sig = sig;
404
0
    }
405
1.74M
  } else {
406
1.74M
    btype = CCV_GET_DATA_TYPE(a->type) | CCV_GET_CHANNEL(a->type);
407
1.74M
    *b = db = ccv_dense_matrix_renew(*b, a->rows, a->cols, btype, btype, sig);
408
1.74M
    ccv_object_return_if_cached(, db);
409
1.74M
    if (a->data.u8 != db->data.u8)
410
3
      memcpy(db->data.u8, a->data.u8, a->rows * a->step);
411
1.74M
  }
412
1.74M
  if (type & CCV_FLIP_Y)
413
2
    _ccv_flip_y_self(db);
414
1.74M
  if (type & CCV_FLIP_X)
415
1.74M
    _ccv_flip_x_self(db);
416
1.74M
}
417
418
void ccv_blur(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type, double sigma)
419
1
{
420
1
  ccv_declare_derived_signature(sig, a->sig != 0, ccv_sign_with_format(64, "ccv_blur(%la)", sigma), a->sig, CCV_EOF_SIGN);
421
1
  type = (type == 0) ? CCV_GET_DATA_TYPE(a->type) | CCV_GET_CHANNEL(a->type) : 
CCV_GET_DATA_TYPE0
(type) | 0
CCV_GET_CHANNEL0
(a->type);
422
1
  ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, a->rows, a->cols, CCV_ALL_DATA_TYPE | CCV_GET_CHANNEL(a->type), type, sig);
423
1
  ccv_object_return_if_cached(, db);
424
1
  int fsz = ccv_max(1, (int)(4.0 * sigma + 1.0 - 1e-8)) * 2 + 1;
425
1
  int hfz = fsz / 2;
426
1
  assert(hfz > 0);
427
1
  unsigned char* buf = (unsigned char*)alloca(sizeof(double) * ccv_max(hfz * 2 + a->rows, (hfz * 2 + a->cols) * CCV_GET_CHANNEL(a->type)));
428
1
  unsigned char* filter = (unsigned char*)alloca(sizeof(double) * fsz);
429
1
  double tw = 0;
430
1
  int i, j, k, ch = CCV_GET_CHANNEL(a->type);
431
1
  assert(fsz > 0);
432
28
  
for (i = 0; 1
i < fsz;
i++27
)
433
27
    tw += ((double*)filter)[i] = exp(-((i - hfz) * (i - hfz)) / (2.0 * sigma * sigma));
434
1
  int no_8u_type = (db->type & CCV_8U) ? CCV_32S : 
db->type0
;
435
1
  if (no_8u_type & CCV_32S)
436
1
  {
437
1
    tw = 256.0 / tw;
438
28
    for (i = 0; i < fsz; 
i++27
)
439
27
      ((int*)filter)[i] = (int)(((double*)filter)[i] * tw + 0.5);
440
1
  } else {
441
0
    tw = 1.0 / tw;
442
0
    for (i = 0; i < fsz; i++)
443
0
      ccv_set_value(no_8u_type, filter, i, ((double*)filter)[i] * tw, 0);
444
0
  }
445
1
  /* horizontal */
446
1
  unsigned char* a_ptr = a->data.u8;
447
1
  unsigned char* b_ptr = db->data.u8;
448
1
  assert(ch > 0);
449
1
#define for_block(_for_type, _for_set_b, _for_get_b, _for_set_a, _for_get_a) \
450
601
  
for (i = 0; 1
i < a->rows;
i++600
) \
451
600
  { \
452
8.40k
    for (j = 0; j < hfz; 
j++7.80k
) \
453
31.2k
      
for (k = 0; 7.80k
k < ch;
k++23.4k
) \
454
23.4k
        _for_set_b(buf, j * ch + k, _for_get_a(a_ptr, k, 0), 0); \
455
1.44M
    for (j = 0; j < a->cols * ch; 
j++1.44M
) \
456
1.44M
      _for_set_b(buf, j + hfz * ch, _for_get_a(a_ptr, j, 0), 0); \
457
8.40k
    for (j = a->cols; j < hfz + a->cols; 
j++7.80k
) \
458
31.2k
      
for (k = 0; 7.80k
k < ch;
k++23.4k
) \
459
23.4k
        _for_set_b(buf, j * ch + hfz * ch + k, _for_get_a(a_ptr, (a->cols - 1) * ch + k, 0), 0); \
460
1.44M
    for (j = 0; j < a->cols * ch; 
j++1.44M
) \
461
1.44M
    { \
462
1.44M
      _for_type sum = 0; \
463
40.3M
      for (k = 0; k < fsz; 
k++38.8M
) \
464
38.8M
        sum += _for_get_b(buf, k * ch + j, 0) * _for_get_b(filter, k, 0); \
465
1.44M
      _for_set_b(buf, j, sum, 8); \
466
1.44M
    } \
467
1.44M
    for (j = 0; j < a->cols * ch; 
j++1.44M
) \
468
1.44M
      _for_set_a(b_ptr, j, _for_get_b(buf, j, 0), 0); \
469
600
    a_ptr += a->step; \
470
600
    b_ptr += db->step; \
471
600
  }
472
1
  ccv_matrix_typeof_setter_getter(no_8u_type, ccv_matrix_setter, db->type, ccv_matrix_getter, a->type, for_block);
473
1
#undef for_block
474
1
  /* vertical */
475
1
  b_ptr = db->data.u8;
476
1
#define for_block(_for_type, _for_set_b, _for_get_b, _for_set_a, _for_get_a) \
477
2.40k
  
for (i = 0; 1
i < a->cols * ch;
i++2.40k
) \
478
2.40k
  { \
479
33.6k
    for (j = 0; j < hfz; 
j++31.2k
) \
480
31.2k
      _for_set_b(buf, j, _for_get_a(b_ptr, i, 0), 0); \
481
1.44M
    for (j = 0; j < a->rows; 
j++1.44M
) \
482
1.44M
      _for_set_b(buf, j + hfz, _for_get_a(b_ptr + j * db->step, i, 0), 0); \
483
33.6k
    for (j = a->rows; j < hfz + a->rows; 
j++31.2k
) \
484
31.2k
      _for_set_b(buf, j + hfz, _for_get_a(b_ptr + (a->rows - 1) * db->step, i, 0), 0); \
485
1.44M
    for (j = 0; j < a->rows; 
j++1.44M
) \
486
1.44M
    { \
487
1.44M
      _for_type sum = 0; \
488
40.3M
      for (k = 0; k < fsz; 
k++38.8M
) \
489
38.8M
        sum += _for_get_b(buf, k + j, 0) * _for_get_b(filter, k, 0); \
490
1.44M
      _for_set_b(buf, j, sum, 8); \
491
1.44M
    } \
492
1.44M
    for (j = 0; j < a->rows; 
j++1.44M
) \
493
1.44M
      _for_set_a(b_ptr + j * db->step, i, _for_get_b(buf, j, 0), 0); \
494
2.40k
  }
495
1
  ccv_matrix_typeof_setter_getter(no_8u_type, ccv_matrix_setter_getter, db->type, for_block);
496
1
#undef for_block
497
1
}