Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/lib/ccv_resample.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
4
/* area interpolation resample is adopted from OpenCV */
5
6
typedef struct {
7
  int si, di;
8
  unsigned int alpha;
9
} ccv_int_alpha;
10
11
static void _ccv_resample_area_8u(ccv_dense_matrix_t* a, ccv_dense_matrix_t* b, double rows_scale, double cols_scale)
12
1
{
13
1
  assert(a->cols > 0 && b->cols > 0);
14
1
  int ch = ccv_clamp(CCV_GET_CHANNEL(a->type), 1, 4);
15
1
  unsigned char* workspace = 0;
16
1
  ccv_int_alpha* xofs;
17
1
  unsigned int* buf;
18
1
  unsigned int* sum;
19
1
  if (a->cols > 1024 || b->cols * ch > 1024)
20
0
  {
21
0
    workspace = (unsigned char*)ccmalloc(sizeof(ccv_int_alpha) * a->cols * 2 + 2 * b->cols * ch * sizeof(unsigned int));
22
0
    xofs = (ccv_int_alpha*)workspace;
23
0
    buf = (unsigned int*)(workspace + sizeof(ccv_int_alpha) * a->cols * 2);
24
0
    sum = buf + b->cols * ch;
25
1
  } else {
26
1
    xofs = (ccv_int_alpha*)alloca(sizeof(ccv_int_alpha) * a->cols * 2);
27
1
    buf = (unsigned int*)alloca(b->cols * ch * sizeof(unsigned int));
28
1
    sum = (unsigned int*)alloca(b->cols * ch * sizeof(unsigned int));
29
1
  }
30
1
  double scale_x = (double)1 / cols_scale;
31
1
  double scale_y = (double)1 / rows_scale;
32
  // double scale = 1.f / (scale_x * scale_y);
33
1
  unsigned int inv_scale_256 = (int)(scale_x * scale_y * 0x10000);
34
1
  int dx, dy, sx, sy, i, k;
35
101
  for (dx = 0, k = 0; dx < b->cols; 
dx++100
)
36
100
  {
37
100
    double fsx1 = dx * scale_x, fsx2 = fsx1 + scale_x;
38
100
    int sx1 = (int)(fsx1 + 1.0 - 1e-6), sx2 = (int)(fsx2);
39
40
100
    if (sx1 > fsx1)
41
0
    {
42
0
      xofs[k].di = dx * ch;
43
0
      xofs[k].si = ccv_min(sx1 - 1, a->cols - 1) * ch;
44
0
      xofs[k++].alpha = (unsigned int)((sx1 - fsx1) * 0x100);
45
0
    }
46
47
600
    for (sx = sx1; sx < sx2; 
sx++500
)
48
500
    {
49
500
      xofs[k].di = dx * ch;
50
500
      xofs[k].si = ccv_min(sx, a->cols - 1) * ch;
51
500
      xofs[k++].alpha = 256;
52
500
    }
53
54
100
    if (fsx2 - sx2 > 1e-3)
55
0
    {
56
0
      xofs[k].di = dx * ch;
57
0
      xofs[k].si = ccv_min(sx2, a->cols - 1) * ch;
58
0
      xofs[k++].alpha = (unsigned int)((fsx2 - sx2) * 256);
59
0
    }
60
100
  }
61
1
  int xofs_count = k;
62
301
  for (dx = 0; dx < b->cols * ch; 
dx++300
)
63
300
    buf[dx] = sum[dx] = 0;
64
1
  dy = 0;
65
1
  int dy_weight_256 = 0;
66
501
  for (sy = 0; sy < a->rows; 
sy++500
)
67
500
  {
68
500
    unsigned char* a_ptr = a->data.u8 + a->step * sy;
69
250k
    for (k = 0; k < xofs_count; 
k++250k
)
70
250k
    {
71
250k
      int dxn = xofs[k].di;
72
250k
      unsigned int alpha = xofs[k].alpha;
73
1.00M
      for (i = 0; i < ch; 
i++750k
)
74
750k
        buf[dxn + i] += a_ptr[xofs[k].si + i] * alpha;
75
250k
    }
76
500
    if ((dy + 1) * scale_y <= sy + 1)
77
100
    {
78
100
      unsigned int beta = (int)(ccv_max(sy + 1 - (dy + 1) * scale_y, 0.f) * 256);
79
100
      unsigned int beta1 = 256 - beta;
80
100
      unsigned char* b_ptr = b->data.u8 + b->step * dy;
81
100
      if (sy == a->rows - 1)
82
1
        beta = (int)(scale_y * 256);
83
99
      else
84
99
        dy_weight_256 = beta;
85
100
      if (beta <= 0)
86
99
      {
87
29.7k
        for (dx = 0; dx < b->cols * ch; 
dx++29.7k
)
88
29.7k
        {
89
29.7k
          b_ptr[dx] = ccv_clamp((sum[dx] + buf[dx] * 256) / inv_scale_256, 0, 255);
90
29.7k
          sum[dx] = buf[dx] = 0;
91
29.7k
        }
92
99
      } else {
93
301
        for (dx = 0; dx < b->cols * ch; 
dx++300
)
94
300
        {
95
300
          b_ptr[dx] = ccv_clamp((sum[dx] + buf[dx] * beta1) / inv_scale_256, 0, 255);
96
300
          sum[dx] = buf[dx] * beta;
97
300
          buf[dx] = 0;
98
300
        }
99
1
      }
100
100
      dy++;
101
400
    } else {
102
400
      if (sy == a->rows - 1)
103
0
      {
104
0
        dy_weight_256 = (int)(scale_y * 256) - dy_weight_256;
105
0
        for(dx = 0; dx < b->cols * ch; dx++)
106
0
        {
107
0
          sum[dx] += buf[dx] * dy_weight_256;
108
0
          buf[dx] = 0;
109
0
        }
110
400
      } else {
111
400
        dy_weight_256 += 256;
112
120k
        for(dx = 0; dx < b->cols * ch; 
dx++120k
)
113
120k
        {
114
120k
          sum[dx] += buf[dx] * 256;
115
120k
          buf[dx] = 0;
116
120k
        }
117
400
      }
118
400
    }
119
500
  }
120
1
  for (; dy < b->rows; 
dy++0
)
121
0
  {
122
0
    unsigned char* b_ptr = b->data.u8 + b->step * dy;
123
0
    for (dx = 0; dx < b->cols * ch; dx++)
124
0
      b_ptr[dx] = ccv_clamp(sum[dx] / inv_scale_256, 0, 255);
125
0
  }
126
1
  if (workspace)
127
0
    ccfree(workspace);
128
1
}
129
130
typedef struct {
131
  int si, di;
132
  float alpha;
133
} ccv_area_alpha_t;
134
135
static void _ccv_resample_area(ccv_dense_matrix_t* a, ccv_dense_matrix_t* b, double rows_scale, double cols_scale)
136
3
{
137
3
  assert(a->cols > 0 && b->cols > 0);
138
3
  int ch = CCV_GET_CHANNEL(a->type);
139
3
  unsigned char* workspace = 0;
140
3
  ccv_area_alpha_t* xofs;
141
3
  float* buf;
142
3
  float* sum;
143
3
  if (a->cols > 1024 || b->cols * ch > 1024)
144
0
  {
145
0
    workspace = (unsigned char*)ccmalloc(sizeof(ccv_area_alpha_t) * a->cols * 2 + 2 * b->cols * ch * sizeof(float));
146
0
    xofs = (ccv_area_alpha_t*)workspace;
147
0
    buf = (float*)(workspace + sizeof(ccv_area_alpha_t) * a->cols * 2);
148
0
    sum = buf + b->cols * ch;
149
3
  } else {
150
3
    xofs = (ccv_area_alpha_t*)alloca(sizeof(ccv_area_alpha_t) * a->cols * 2);
151
3
    buf = (float*)alloca(b->cols * ch * sizeof(float));
152
3
    sum = (float*)alloca(b->cols * ch * sizeof(float));
153
3
  }
154
3
  double scale_x = (double)1 / cols_scale;
155
3
  double scale_y = (double)1 / rows_scale;
156
3
  double scale = 1.f / (scale_x * scale_y);
157
3
  int dx, dy, sx, sy, i, k;
158
801
  for (dx = 0, k = 0; dx < b->cols; 
dx++798
)
159
798
  {
160
798
    double fsx1 = dx * scale_x, fsx2 = fsx1 + scale_x;
161
798
    int sx1 = (int)(fsx1 + 1.0 - 1e-6), sx2 = (int)(fsx2);
162
163
798
    if (sx1 > fsx1)
164
792
    {
165
792
      xofs[k].di = dx * ch;
166
792
      xofs[k].si = ccv_min(sx1 - 1, a->cols - 1) * ch;
167
792
      xofs[k++].alpha = (float)((sx1 - fsx1) * scale);
168
792
    }
169
170
1.76k
    for (sx = sx1; sx < sx2; 
sx++968
)
171
968
    {
172
968
      xofs[k].di = dx * ch;
173
968
      xofs[k].si = ccv_min(sx, a->cols - 1) * ch;
174
968
      xofs[k++].alpha = (float)scale;
175
968
    }
176
177
798
    if (fsx2 - sx2 > 1e-3)
178
792
    {
179
792
      xofs[k].di = dx * ch;
180
792
      xofs[k].si = ccv_min(sx2, a->cols - 1) * ch;
181
792
      xofs[k++].alpha = (float)((fsx2 - sx2) * scale);
182
792
    }
183
798
  }
184
3
  int xofs_count = k;
185
2.39k
  for (dx = 0; dx < b->cols * ch; 
dx++2.39k
)
186
2.39k
    buf[dx] = sum[dx] = 0;
187
3
  dy = 0;
188
3
  float dy_weight = 0;
189
3
#define for_block(_for_get, _for_set) \
190
1.88k
  
for (sy = 0; 3
sy < a->rows;
sy++1.88k
) \
191
1.88k
  { \
192
1.88k
    unsigned char* a_ptr = a->data.u8 + a->step * sy; \
193
1.59M
    for (k = 0; k < xofs_count; 
k++1.59M
) \
194
1.59M
    { \
195
1.59M
      int dxn = xofs[k].di; \
196
1.59M
      float alpha = xofs[k].alpha; \
197
6.36M
      for (i = 0; i < ch; 
i++4.77M
) \
198
4.77M
        buf[dxn + i] += _for_get(a_ptr, xofs[k].si + i) * alpha; \
199
1.59M
    } \
200
1.88k
    if ((dy + 1) * scale_y <= sy + 1) \
201
1.88k
    { \
202
874
      float beta = ccv_max(sy + 1 - (dy + 1) * scale_y, 0.f); \
203
874
      float beta1 = 1 - beta; \
204
874
      unsigned char* b_ptr = b->data.u8 + b->step * dy; \
205
874
      if (sy == a->rows - 1) \
206
874
        
beta = scale_y3
; /* Such that if there are any residue, we will scale it up. */ \
207
874
      else \
208
874
        
dy_weight = beta871
; \
209
874
      if (fabs(beta) < 1e-3) \
210
874
      { \
211
2.55k
        for (dx = 0; dx < b->cols * ch; 
dx++2.55k
) \
212
2.55k
        { \
213
2.55k
          _for_set(b_ptr, dx, sum[dx] + buf[dx]); \
214
2.55k
          sum[dx] = buf[dx] = 0; \
215
2.55k
        } \
216
871
      } else { \
217
687k
        for (dx = 0; dx < b->cols * ch; 
dx++686k
) \
218
686k
        { \
219
686k
          _for_set(b_ptr, dx, sum[dx] + buf[dx] * beta1); \
220
686k
          sum[dx] = buf[dx] * beta; \
221
686k
          buf[dx] = 0; \
222
686k
        } \
223
871
      } \
224
874
      dy++; \
225
1.00k
    } else { \
226
1.00k
      if (sy == a->rows - 1) \
227
1.00k
      { \
228
0
        dy_weight = scale_y - dy_weight; \
229
0
        for(dx = 0; dx < b->cols * ch; dx++) \
230
0
        { \
231
0
          sum[dx] += buf[dx] * dy_weight; \
232
0
          buf[dx] = 0; \
233
0
        } \
234
1.00k
      } else { \
235
1.00k
        dy_weight += 1; \
236
810k
        for(dx = 0; dx < b->cols * ch; 
dx++808k
) \
237
808k
        { \
238
808k
          sum[dx] += buf[dx]; \
239
808k
          buf[dx] = 0; \
240
808k
        } \
241
1.00k
      } \
242
1.00k
    } \
243
1.88k
  } \
244
3
  for (; dy < b->rows; 
dy++0
) \
245
3
  { \
246
0
    unsigned char* b_ptr = b->data.u8 + b->step * dy; \
247
0
    for (dx = 0; dx < b->cols * ch; dx++) \
248
0
      _for_set(b_ptr, dx, sum[dx]); \
249
0
  }
250
3
  ccv_matrix_getter(a->type, ccv_matrix_setter, b->type, for_block);
251
3
#undef for_block
252
3
  if (workspace)
253
0
    ccfree(workspace);
254
3
}
255
256
typedef struct {
257
  int si[4];
258
  float coeffs[4];
259
} ccv_cubic_coeffs_t;
260
261
typedef struct {
262
  int si[4];
263
  int coeffs[4];
264
} ccv_cubic_integer_coeffs_t;
265
266
static void _ccv_init_cubic_coeffs(int si, int sz, float s, ccv_cubic_coeffs_t* coeff)
267
0
{
268
0
  const float A = -0.75f;
269
0
  coeff->si[0] = ccv_min(ccv_max(si - 1, 0), sz - 1);
270
0
  coeff->si[1] = ccv_min(ccv_max(si, 0), sz - 1);
271
0
  coeff->si[2] = ccv_min(ccv_max(si + 1, 0), sz - 1);
272
0
  coeff->si[3] = ccv_min(ccv_max(si + 2, 0), sz - 1);
273
0
  float x = s - si;
274
0
  coeff->coeffs[0] = ((A * (x + 1) - 5 * A) * (x + 1) + 8 * A) * (x + 1) - 4 * A;
275
0
  coeff->coeffs[1] = ((A + 2) * x - (A + 3)) * x * x + 1;
276
0
  coeff->coeffs[2] = ((A + 2) * (1 - x) - (A + 3)) * (1 - x) * (1 - x) + 1;
277
0
  coeff->coeffs[3] = 1.f - coeff->coeffs[0] - coeff->coeffs[1] - coeff->coeffs[2];
278
0
}
279
280
static void _ccv_resample_cubic_float_only(ccv_dense_matrix_t* a, ccv_dense_matrix_t* b, double rows_scale, double cols_scale)
281
0
{
282
0
  assert(CCV_GET_DATA_TYPE(b->type) == CCV_32F || CCV_GET_DATA_TYPE(b->type) == CCV_64F);
283
0
  int i, j, k, ch = CCV_GET_CHANNEL(a->type);
284
0
  assert(b->cols > 0 && b->step > 0);
285
0
  unsigned char* workspace = 0;
286
0
  ccv_cubic_coeffs_t* xofs;
287
0
  unsigned char* buf;
288
0
  if (b->step > 2048)
289
0
  {
290
0
    workspace = (unsigned char*)ccmalloc(sizeof(ccv_cubic_coeffs_t) * b->cols + b->step * 4);
291
0
    xofs = (ccv_cubic_coeffs_t*)workspace;
292
0
    buf = (unsigned char*)(workspace + sizeof(ccv_cubic_coeffs_t) * b->cols);
293
0
  } else {
294
0
    xofs = (ccv_cubic_coeffs_t*)alloca(sizeof(ccv_cubic_coeffs_t) * b->cols);
295
0
    buf = (unsigned char*)alloca(b->step * 4);
296
0
  }
297
0
  double scale_x = (double)1 / cols_scale;
298
0
  for (i = 0; i < b->cols; i++)
299
0
  {
300
0
    double sx = (i + 0.5) * scale_x - 0.5;
301
0
    _ccv_init_cubic_coeffs((int)sx, a->cols, (float)sx, xofs + i);
302
0
  }
303
0
  double scale_y = (double)1 / rows_scale;
304
#ifdef __clang_analyzer__
305
  memset(buf, 0, b->step * 4);
306
#endif
307
0
  unsigned char* a_ptr = a->data.u8;
308
0
  unsigned char* b_ptr = b->data.u8;
309
0
  int psi = -1, siy = 0;
310
0
#define for_block(_for_get, _for_set_b, _for_get_b) \
311
0
  for (i = 0; i < b->rows; i++) \
312
0
  { \
313
0
    ccv_cubic_coeffs_t yofs; \
314
0
    double sy = (i + 0.5) * scale_y - 0.5; \
315
0
    _ccv_init_cubic_coeffs((int)sy, a->rows, (float)sy, &yofs); \
316
0
    if (yofs.si[3] > psi) \
317
0
    { \
318
0
      for (; siy <= yofs.si[3]; siy++) \
319
0
      { \
320
0
        unsigned char* row = buf + (siy & 0x3) * b->step; \
321
0
        for (j = 0; j < b->cols; j++) \
322
0
          for (k = 0; k < ch; k++) \
323
0
            _for_set_b(row, j * ch + k, _for_get(a_ptr, xofs[j].si[0] * ch + k) * xofs[j].coeffs[0] + \
324
0
                          _for_get(a_ptr, xofs[j].si[1] * ch + k) * xofs[j].coeffs[1] + \
325
0
                          _for_get(a_ptr, xofs[j].si[2] * ch + k) * xofs[j].coeffs[2] + \
326
0
                          _for_get(a_ptr, xofs[j].si[3] * ch + k) * xofs[j].coeffs[3]); \
327
0
        a_ptr += a->step; \
328
0
      } \
329
0
      psi = yofs.si[3]; \
330
0
    } \
331
0
    unsigned char* row[4] = { \
332
0
      buf + (yofs.si[0] & 0x3) * b->step, \
333
0
      buf + (yofs.si[1] & 0x3) * b->step, \
334
0
      buf + (yofs.si[2] & 0x3) * b->step, \
335
0
      buf + (yofs.si[3] & 0x3) * b->step, \
336
0
    }; \
337
0
    for (j = 0; j < b->cols * ch; j++) \
338
0
      _for_set_b(b_ptr, j, _for_get_b(row[0], j, 0) * yofs.coeffs[0] + _for_get_b(row[1], j) * yofs.coeffs[1] + \
339
0
                 _for_get_b(row[2], j, 0) * yofs.coeffs[2] + _for_get_b(row[3], j) * yofs.coeffs[3]); \
340
0
    b_ptr += b->step; \
341
0
  }
342
0
  ccv_matrix_getter(a->type, ccv_matrix_setter_getter_float_only, b->type, for_block);
343
0
#undef for_block
344
0
  if (workspace)
345
0
    ccfree(workspace);
346
0
}
347
348
static void _ccv_init_cubic_integer_coeffs(int si, int sz, float s, ccv_cubic_integer_coeffs_t* coeff)
349
0
{
350
0
  const float A = -0.75f;
351
0
  coeff->si[0] = ccv_min(ccv_max(si - 1, 0), sz - 1);
352
0
  coeff->si[1] = ccv_min(ccv_max(si, 0), sz - 1);
353
0
  coeff->si[2] = ccv_min(ccv_max(si + 1, 0), sz - 1);
354
0
  coeff->si[3] = ccv_min(ccv_max(si + 2, 0), sz - 1);
355
0
  float x = s - si;
356
0
  const int W_BITS = 1 << 6;
357
0
  coeff->coeffs[0] = (int)((((A * (x + 1) - 5 * A) * (x + 1) + 8 * A) * (x + 1) - 4 * A) * W_BITS + 0.5);
358
0
  coeff->coeffs[1] = (int)((((A + 2) * x - (A + 3)) * x * x + 1) * W_BITS + 0.5);
359
0
  coeff->coeffs[2] = (int)((((A + 2) * (1 - x) - (A + 3)) * (1 - x) * (1 - x) + 1) * W_BITS + 0.5);
360
0
  coeff->coeffs[3] = W_BITS - coeff->coeffs[0] - coeff->coeffs[1] - coeff->coeffs[2];
361
0
}
362
363
static void _ccv_resample_cubic_integer_only(ccv_dense_matrix_t* a, ccv_dense_matrix_t* b, double rows_scale, double cols_scale)
364
0
{
365
0
  assert(CCV_GET_DATA_TYPE(b->type) == CCV_8U || CCV_GET_DATA_TYPE(b->type) == CCV_32S || CCV_GET_DATA_TYPE(b->type) == CCV_64S);
366
0
  int i, j, k, ch = CCV_GET_CHANNEL(a->type);
367
0
  int no_8u_type = (b->type & CCV_8U) ? CCV_32S : b->type;
368
0
  assert(b->cols > 0);
369
0
  unsigned char* workspace = 0;
370
0
  ccv_cubic_integer_coeffs_t* xofs;
371
0
  unsigned char* buf;
372
0
  int bufstep = b->cols * ch * CCV_GET_DATA_TYPE_SIZE(no_8u_type);
373
0
  if (b->step > 2048)
374
0
  {
375
0
    workspace = (unsigned char*)ccmalloc(sizeof(ccv_cubic_integer_coeffs_t) * b->cols + bufstep * 4);
376
0
    xofs = (ccv_cubic_integer_coeffs_t*)workspace;
377
0
    buf = (unsigned char*)(workspace + sizeof(ccv_cubic_integer_coeffs_t) * b->cols);
378
0
  } else {
379
0
    xofs = (ccv_cubic_integer_coeffs_t*)alloca(sizeof(ccv_cubic_integer_coeffs_t) * b->cols);
380
0
    buf = (unsigned char*)alloca(bufstep * 4);
381
0
  }
382
0
  double scale_x = (double)1 / cols_scale;
383
0
  for (i = 0; i < b->cols; i++)
384
0
  {
385
0
    double sx = (i + 0.5) * scale_x - 0.5;
386
0
    _ccv_init_cubic_integer_coeffs((int)sx, a->cols, (float)sx, xofs + i);
387
0
  }
388
0
  double scale_y = (double)1 / rows_scale;
389
#ifdef __clang_analyzer__
390
  memset(buf, 0, bufstep * 4);
391
#endif
392
0
  unsigned char* a_ptr = a->data.u8;
393
0
  unsigned char* b_ptr = b->data.u8;
394
0
  int psi = -1, siy = 0;
395
0
#define for_block(_for_get_a, _for_set, _for_get, _for_set_b) \
396
0
  for (i = 0; i < b->rows; i++) \
397
0
  { \
398
0
    ccv_cubic_integer_coeffs_t yofs; \
399
0
    double sy = (i + 0.5) * scale_y - 0.5; \
400
0
    _ccv_init_cubic_integer_coeffs((int)sy, a->rows, (float)sy, &yofs); \
401
0
    if (yofs.si[3] > psi) \
402
0
    { \
403
0
      for (; siy <= yofs.si[3]; siy++) \
404
0
      { \
405
0
        unsigned char* row = buf + (siy & 0x3) * bufstep; \
406
0
        for (j = 0; j < b->cols; j++) \
407
0
          for (k = 0; k < ch; k++) \
408
0
            _for_set(row, j * ch + k, _for_get_a(a_ptr, xofs[j].si[0] * ch + k) * xofs[j].coeffs[0] + \
409
0
                          _for_get_a(a_ptr, xofs[j].si[1] * ch + k) * xofs[j].coeffs[1] + \
410
0
                          _for_get_a(a_ptr, xofs[j].si[2] * ch + k) * xofs[j].coeffs[2] + \
411
0
                          _for_get_a(a_ptr, xofs[j].si[3] * ch + k) * xofs[j].coeffs[3]); \
412
0
        a_ptr += a->step; \
413
0
      } \
414
0
      psi = yofs.si[3]; \
415
0
    } \
416
0
    unsigned char* row[4] = { \
417
0
      buf + (yofs.si[0] & 0x3) * bufstep, \
418
0
      buf + (yofs.si[1] & 0x3) * bufstep, \
419
0
      buf + (yofs.si[2] & 0x3) * bufstep, \
420
0
      buf + (yofs.si[3] & 0x3) * bufstep, \
421
0
    }; \
422
0
    for (j = 0; j < b->cols * ch; j++) \
423
0
      _for_set_b(b_ptr, j, ccv_descale(_for_get(row[0], j) * yofs.coeffs[0] + _for_get(row[1], j) * yofs.coeffs[1] + \
424
0
                       _for_get(row[2], j) * yofs.coeffs[2] + _for_get(row[3], j) * yofs.coeffs[3], 12)); \
425
0
    b_ptr += b->step; \
426
0
  }
427
0
  ccv_matrix_getter(a->type, ccv_matrix_setter_getter_integer_only, no_8u_type, ccv_matrix_setter_integer_only, b->type, for_block);
428
0
#undef for_block
429
0
  if (workspace)
430
0
    ccfree(workspace);
431
0
}
432
433
void ccv_resample(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int btype, double rows_scale, double cols_scale, int type)
434
150k
{
435
150k
  assert(rows_scale > 0 && cols_scale > 0);
436
150k
  ccv_declare_derived_signature(sig, a->sig != 0, ccv_sign_with_format(64, "ccv_resample(%f,%f,%d)", rows_scale, cols_scale, type), a->sig, CCV_EOF_SIGN);
437
150k
  btype = (btype == 0) ? 
CCV_GET_DATA_TYPE1
(a->type) | 1
CCV_GET_CHANNEL1
(a->type) :
CCV_GET_DATA_TYPE150k
(btype) | 150k
CCV_GET_CHANNEL150k
(a->type);
438
150k
  int rows, cols;
439
150k
  if (*b)
440
0
  {
441
0
    rows = (*b)->rows;
442
0
    cols = (*b)->cols;
443
150k
  } else {
444
150k
    rows = (int)(a->rows * rows_scale + 0.5);
445
150k
    cols = (int)(a->cols * cols_scale + 0.5);
446
150k
  }
447
150k
  ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, rows, cols, CCV_ALL_DATA_TYPE | CCV_GET_CHANNEL(a->type), btype, sig);
448
150k
  ccv_object_return_if_cached(, db);
449
150k
  if (a->rows == db->rows && 
a->cols == db->cols150k
)
450
150k
  {
451
150k
    if (CCV_GET_CHANNEL(a->type) == CCV_GET_CHANNEL(db->type) && CCV_GET_DATA_TYPE(db->type) == CCV_GET_DATA_TYPE(a->type))
452
150k
      memcpy(db->data.u8, a->data.u8, a->rows * a->step);
453
0
    else {
454
0
      ccv_shift(a, (ccv_matrix_t**)&db, 0, 0, 0);
455
0
    }
456
150k
    return;
457
150k
  }
458
4
  if ((type & CCV_INTER_AREA) && a->rows >= db->rows && a->cols >= db->cols)
459
4
  {
460
    /* using the fast alternative (fix point scale, 0x100 to avoid overflow) */
461
4
    if (CCV_GET_DATA_TYPE(a->type) == CCV_8U && CCV_GET_DATA_TYPE(db->type) == CCV_8U && 
a->rows * a->cols / (db->rows * db->cols) < 0x1001
)
462
1
      _ccv_resample_area_8u(a, db, rows_scale, cols_scale);
463
3
    else
464
3
      _ccv_resample_area(a, db, rows_scale, cols_scale);
465
4
  } else 
if (0
type & CCV_INTER_CUBIC0
) {
466
0
    if (CCV_GET_DATA_TYPE(db->type) == CCV_32F || CCV_GET_DATA_TYPE(db->type) == CCV_64F)
467
0
      _ccv_resample_cubic_float_only(a, db, rows_scale, cols_scale);
468
0
    else
469
0
      _ccv_resample_cubic_integer_only(a, db, rows_scale, cols_scale);
470
0
  } else if (type & CCV_INTER_LINEAR) {
471
0
    assert(0 && "CCV_INTER_LINEAR is not implemented");
472
0
  } else if (type & CCV_INTER_LANCZOS) {
473
0
    assert(0 && "CCV_INTER_LANCZOS is not implemented");
474
0
  } else {
475
0
    assert(0 && "Not implemented");
476
0
  }
477
4
}
478
479
/* the following code is adopted from OpenCV cvPyrDown */
480
void ccv_sample_down(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type, int src_x, int src_y)
481
1
{
482
1
  assert(src_x >= 0 && src_y >= 0);
483
1
  ccv_declare_derived_signature(sig, a->sig != 0, ccv_sign_with_format(64, "ccv_sample_down(%d,%d)", src_x, src_y), a->sig, CCV_EOF_SIGN);
484
1
  type = (type == 0) ? CCV_GET_DATA_TYPE(a->type) | CCV_GET_CHANNEL(a->type) : 
CCV_GET_DATA_TYPE0
(type) | 0
CCV_GET_CHANNEL0
(a->type);
485
1
  ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, a->rows / 2, a->cols / 2, CCV_ALL_DATA_TYPE | CCV_GET_CHANNEL(a->type), type, sig);
486
1
  ccv_object_return_if_cached(, db);
487
1
  int ch = CCV_GET_CHANNEL(a->type);
488
1
  int cols0 = db->cols - 1 - src_x;
489
1
  int dy, sy = -2 + src_y, sx = src_x * ch, dx, k;
490
1
  unsigned char* workspace = 0;
491
1
  int* tab;
492
1
  unsigned char* buf;
493
1
  if (a->cols + src_x + 2 > 1024 || db->cols * ch > 1024)
494
0
  {
495
0
    workspace = (unsigned char*)ccmalloc((a->cols + src_x + 2) * ch * sizeof(int) + 5 * db->cols * ch * ccv_max(CCV_GET_DATA_TYPE_SIZE(db->type), sizeof(int)));
496
0
    tab = (int*)workspace;
497
0
    buf = (unsigned char*)(workspace + (a->cols + src_x + 2) * ch * sizeof(int));
498
1
  } else {
499
1
    tab = (int*)alloca((a->cols + src_x + 2) * ch * sizeof(int));
500
1
    buf = (unsigned char*)alloca(5 * db->cols * ch * ccv_max(CCV_GET_DATA_TYPE_SIZE(db->type), sizeof(int)));
501
1
  }
502
513
  for (dx = 0; dx < a->cols + src_x + 2; 
dx++512
)
503
2.04k
    
for (k = 0; 512
k < ch;
k++1.53k
)
504
1.53k
      tab[dx * ch + k] = ((dx >= a->cols) ? 
a->cols * 2 - 1 - dx36
:
dx1.50k
) * ch + k;
505
1
  int bufstep = db->cols * ch * ccv_max(CCV_GET_DATA_TYPE_SIZE(db->type), sizeof(int));
506
#ifdef __clang_analyzer__
507
  memset(buf, 0, 5 * bufstep);
508
#endif
509
1
  unsigned char* b_ptr = db->data.u8;
510
  /* why is src_y * 4 in computing the offset of row?
511
   * Essentially, it means sy - src_y but in a manner that doesn't result negative number.
512
   * notice that we added src_y before when computing sy in the first place, however,
513
   * it is not desirable to have that offset when we try to wrap it into our 5-row buffer (
514
   * because in later rearrangement, we have no src_y to backup the arrangement). In
515
   * such micro scope, we managed to stripe 5 addition into one shift and addition. */
516
1
#define for_block(_for_get_a, _for_set, _for_get, _for_set_b) \
517
251
  
for (dy = 0; 1
dy < db->rows;
dy++250
) \
518
250
  { \
519
753
    for(; sy <= dy * 2 + 2 + src_y; 
sy++503
) \
520
503
    { \
521
503
      unsigned char* row = buf + ((sy + src_y * 4 + 2) % 5) * bufstep; \
522
503
      int _sy = (sy < 0) ? 
-1 - sy0
: (sy >= a->rows) ?
a->rows * 2 - 1 - sy11
:
sy492
; \
523
503
      unsigned char* a_ptr = a->data.u8 + a->step * _sy; \
524
2.01k
      for (k = 0; k < ch; 
k++1.50k
) \
525
503
        _for_set(row, k, _for_get_a(a_ptr, sx + k) * 10 + _for_get_a(a_ptr, ch + sx + k) * 5 + _for_get_a(a_ptr, 2 * ch + sx + k)); \
526
120k
      for(dx = ch; dx < cols0 * ch; 
dx += ch119k
) \
527
478k
        
for (k = 0; 119k
k < ch;
k++359k
) \
528
119k
          
_for_set503
(row, dx + k, _for_get_a(a_ptr, dx * 2 + sx + k) * 6 + (_for_get_a(a_ptr, dx * 2 + sx + k - ch) + _for_get_a(a_ptr, dx * 2 + sx + k + ch)) * 4 + _for_get_a(a_ptr, dx * 2 + sx + k - ch * 2) + _for_get_a(a_ptr, dx * 2 + sx + k + ch * 2)); \
529
503
      x_block(_for_get_a, _for_set, _for_get, _for_set_b); \
530
503
    } \
531
250
    unsigned char* rows[5]; \
532
1.50k
    for(k = 0; k < 5; 
k++1.25k
) \
533
1.25k
      rows[k] = buf + ((dy * 2 + k) % 5) * bufstep; \
534
187k
    for(dx = 0; dx < db->cols * ch; 
dx++187k
) \
535
250
      _for_set_b(b_ptr, dx, (_for_get(rows[2], dx) * 6 + (_for_get(rows[1], dx) + _for_get(rows[3], dx)) * 4 + _for_get(rows[0], dx) + _for_get(rows[4], dx)) / 256); \
536
250
    b_ptr += db->step; \
537
250
  }
538
1
  int no_8u_type = (a->type & CCV_8U) ? CCV_32S : 
a->type0
;
539
1
  if (src_x > 0)
540
1
  {
541
1
#define x_block(_for_get_a, _for_set, _for_get, _for_set_b) \
542
6.03k
    
for (dx = cols0 * ch; 503
dx < db->cols * ch;
dx += ch5.53k
) \
543
22.1k
      
for (k = 0; 5.53k
k < ch;
k++16.5k
) \
544
5.53k
        
_for_set503
(row, dx + k, _for_get_a(a_ptr, tab[dx * 2 + sx + k]) * 6 + (_for_get_a(a_ptr, tab[dx * 2 + sx + k - ch]) + _for_get_a(a_ptr, tab[dx * 2 + sx + k + ch])) * 4 + _for_get_a(a_ptr, tab[dx * 2 + sx + k - ch * 2]) + _for_get_a(a_ptr, tab[dx * 2 + sx + k + ch * 2]));
545
1
    ccv_matrix_getter_a(a->type, ccv_matrix_setter_getter, no_8u_type, ccv_matrix_setter_b, db->type, for_block);
546
1
#undef x_block
547
1
  } else {
548
0
#define x_block(_for_get_a, _for_set, _for_get, _for_set_b) \
549
0
    for (k = 0; k < ch; k++) \
550
0
      _for_set(row, (db->cols - 1) * ch + k, _for_get_a(a_ptr, a->cols * ch + sx - ch + k) * 10 + _for_get_a(a_ptr, (a->cols - 2) * ch + sx + k) * 5 + _for_get_a(a_ptr, (a->cols - 3) * ch + sx + k));
551
0
    ccv_matrix_getter_a(a->type, ccv_matrix_setter_getter, no_8u_type, ccv_matrix_setter_b, db->type, for_block);
552
0
#undef x_block
553
0
  }
554
1
#undef for_block
555
1
  if (workspace)
556
0
    ccfree(workspace);
557
1
}
558
559
void ccv_sample_up(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type, int src_x, int src_y)
560
1
{
561
1
  assert(src_x >= 0 && src_y >= 0);
562
1
  ccv_declare_derived_signature(sig, a->sig != 0, ccv_sign_with_format(64, "ccv_sample_up(%d,%d)", src_x, src_y), a->sig, CCV_EOF_SIGN);
563
1
  type = (type == 0) ? CCV_GET_DATA_TYPE(a->type) | CCV_GET_CHANNEL(a->type) : 
CCV_GET_DATA_TYPE0
(type) | 0
CCV_GET_CHANNEL0
(a->type);
564
1
  ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, a->rows * 2, a->cols * 2, CCV_ALL_DATA_TYPE | CCV_GET_CHANNEL(a->type), type, sig);
565
1
  ccv_object_return_if_cached(, db);
566
1
  int ch = CCV_GET_CHANNEL(a->type);
567
1
  int cols0 = a->cols - 1 - src_x;
568
1
  assert(a->cols > 0 && cols0 > 0);
569
1
  int y, x, sy = -1 + src_y, sx = src_x * ch, k;
570
1
  unsigned char* workspace = 0;
571
1
  int* tab;
572
1
  unsigned char* buf;
573
1
  if (a->cols + src_x + 2 > 1024 || db->cols * ch > 1024)
574
1
  {
575
1
    workspace = (unsigned char*)ccmalloc((a->cols + src_x + 2) * ch * sizeof(int) + 3 * db->cols * ch * ccv_max(CCV_GET_DATA_TYPE_SIZE(db->type), sizeof(int)));
576
1
    tab = (int*)workspace;
577
1
    buf = (unsigned char*)(workspace + (a->cols + src_x + 2) * ch * sizeof(int));
578
1
  } else {
579
0
    tab = (int*)alloca((a->cols + src_x + 2) * ch * sizeof(int));
580
0
    buf = (unsigned char*)alloca(3 * db->cols * ch * ccv_max(CCV_GET_DATA_TYPE_SIZE(db->type), sizeof(int)));
581
0
  }
582
513
  for (x = 0; x < a->cols + src_x + 2; 
x++512
)
583
2.04k
    
for (k = 0; 512
k < ch;
k++1.53k
)
584
1.53k
      tab[x * ch + k] = ((x >= a->cols) ? 
a->cols * 2 - 1 - x36
:
x1.50k
) * ch + k;
585
1
  int bufstep = db->cols * ch * ccv_max(CCV_GET_DATA_TYPE_SIZE(db->type), sizeof(int));
586
#ifdef __clang_analyzer__
587
  memset(buf, 0, 3 * bufstep);
588
#endif
589
1
  unsigned char* b_ptr = db->data.u8;
590
  /* why src_y * 2: the same argument as in ccv_sample_down */
591
1
#define for_block(_for_get_a, _for_set, _for_get, _for_set_b) \
592
501
  
for (y = 0; 1
y < a->rows;
y++500
) \
593
500
  { \
594
1.00k
    for (; sy <= y + 1 + src_y; 
sy++502
) \
595
502
    { \
596
502
      unsigned char* row = buf + ((sy + src_y * 2 + 1) % 3) * bufstep; \
597
502
      int _sy = (sy < 0) ? 
-1 - sy0
: (sy >= a->rows) ?
a->rows * 2 - 1 - sy11
:
sy491
; \
598
502
      unsigned char* a_ptr = a->data.u8 + a->step * _sy; \
599
502
      if (a->cols == 1) \
600
502
      { \
601
0
        for (k = 0; k < ch; k++) \
602
0
        { \
603
0
          _for_set(row, k, _for_get_a(a_ptr, k) * (G025 + G075 + G125)); \
604
0
          _for_set(row, k + ch, _for_get_a(a_ptr, k) * (G025 + G075 + G125)); \
605
0
        } \
606
0
        continue; \
607
0
      } \
608
502
      if (sx == 0) \
609
502
      { \
610
0
        for (k = 0; k < ch; k++) \
611
0
        { \
612
0
          _for_set(row, k, _for_get_a(a_ptr, k + sx) * (G025 + G075) + _for_get_a(a_ptr, k + sx + ch) * G125); \
613
0
          _for_set(row, k + ch, _for_get_a(a_ptr, k + sx) * (G125 + G025) + _for_get_a(a_ptr, k + sx + ch) * G075); \
614
0
        } \
615
0
      } \
616
      /* some serious flaw in computing Gaussian weighting in previous version
617
       * specially, we are doing perfect upsampling (2x) so, it concerns a grid like:
618
       * XXYY
619
       * XXYY
620
       * in this case, to upsampling, the weight should be from distance 0.25 and 1.25, and 0.25 and 0.75
621
       * previously, it was mistakingly be 0.0 1.0, 0.5 0.5 (imperfect upsampling (2x - 1)) */ \
622
245k
      for (x = 
(sx == 0)502
?
ch0
:
0502
; x < cols0 * ch;
x += ch245k
) \
623
245k
      { \
624
981k
        for (k = 0; k < ch; 
k++736k
) \
625
736k
        { \
626
736k
          _for_set(row, x * 2 + k, _for_get_a(a_ptr, x + sx - ch + k) * G075 + _for_get_a(a_ptr, x + sx + k) * G025 + _for_get_a(a_ptr, x + sx + ch + k) * G125); \
627
736k
          _for_set(row, x * 2 + ch + k, _for_get_a(a_ptr, x + sx - ch + k) * G125 + _for_get_a(a_ptr, x + sx + k) * G025 + _for_get_a(a_ptr, x + sx + ch + k) * G075); \
628
736k
        } \
629
245k
      } \
630
502
      x_block(_for_get_a, _for_set, _for_get, _for_set_b); \
631
502
    } \
632
500
    unsigned char* rows[3]; \
633
2.00k
    for (k = 0; k < 3; 
k++1.50k
) \
634
1.50k
      rows[k] = buf + ((y + k) % 3) * bufstep; \
635
1.50M
    for (x = 0; x < db->cols * ch; 
x++1.50M
) \
636
1.50M
    { \
637
1.50M
      _for_set_b(b_ptr, x, (_for_get(rows[0], x) * G075 + _for_get(rows[1], x) * G025 + _for_get(rows[2], x) * G125) / GALL); \
638
1.50M
      _for_set_b(b_ptr + db->step, x, (_for_get(rows[0], x) * G125 + _for_get(rows[1], x) * G025 + _for_get(rows[2], x) * G075) / GALL); \
639
1.50M
    } \
640
500
    b_ptr += 2 * db->step; \
641
500
  }
642
1
  int no_8u_type = (a->type & CCV_8U) ? CCV_32S : 
a->type0
;
643
  /* unswitch if condition in manual way */
644
1
  if ((a->type & CCV_8U) || 
(a->type & CCV_32S)0
||
(a->type & CCV_64S)0
)
645
1
  {
646
1
#define G025 (23)
647
1
#define G075 (8)
648
1
#define G125 (1)
649
1
#define GALL (1024)
650
1
    if (src_x > 0)
651
1
    {
652
1
#define x_block(_for_get_a, _for_set, _for_get, _for_set_b) \
653
6.02k
      
for (x = cols0 * ch; 502
x < a->cols * ch;
x += ch5.52k
) \
654
22.0k
        
for (k = 0; 5.52k
k < ch;
k++16.5k
) \
655
16.5k
        { \
656
16.5k
          _for_set(row, x * 2 + k, _for_get_a(a_ptr, tab[x + sx - ch + k]) * G075 + _for_get_a(a_ptr, tab[x + sx + k]) * G025 + _for_get_a(a_ptr, tab[x + sx + ch + k]) * G125); \
657
16.5k
          _for_set(row, x * 2 + ch + k, _for_get_a(a_ptr, tab[x + sx - ch + k]) * G125 + _for_get_a(a_ptr, tab[x + sx + k]) * G025 + _for_get_a(a_ptr, tab[x + sx + ch + k], 0) * G075); \
658
16.5k
      }
659
1
      ccv_matrix_getter_integer_only(a->type, ccv_matrix_setter_getter_integer_only, no_8u_type, ccv_matrix_setter_b, db->type, for_block);
660
1
#undef x_block
661
1
    } else {
662
0
#define x_block(_for_get_a, _for_set, _for_get, _for_set_b) \
663
0
      for (k = 0; k < ch; k++) \
664
0
      { \
665
0
        _for_set(row, (a->cols - 1) * 2 * ch + k, _for_get_a(a_ptr, (a->cols - 2) * ch + k) * G075 + _for_get_a(a_ptr, (a->cols - 1) * ch + k) * (G025 + G125)); \
666
0
        _for_set(row, (a->cols - 1) * 2 * ch + ch + k, _for_get_a(a_ptr, (a->cols - 2) * ch + k) * G125 + _for_get_a(a_ptr, (a->cols - 1) * ch + k) * (G025 + G075)); \
667
0
      }
668
0
      ccv_matrix_getter_integer_only(a->type, ccv_matrix_setter_getter_integer_only, no_8u_type, ccv_matrix_setter_b, db->type, for_block);
669
0
#undef x_block
670
0
    }
671
1
#undef GALL
672
1
#undef G125
673
1
#undef G075
674
1
#undef G025
675
1
  } else {
676
0
#define G025 (0.705385)
677
0
#define G075 (0.259496)
678
0
#define G125 (0.035119)
679
0
#define GALL (1)
680
0
    if (src_x > 0)
681
0
    {
682
0
#define x_block(_for_get_a, _for_set, _for_get, _for_set_b) \
683
0
      for (x = cols0 * ch; x < a->cols * ch; x += ch) \
684
0
        for (k = 0; k < ch; k++) \
685
0
        { \
686
0
          _for_set(row, x * 2 + k, _for_get_a(a_ptr, tab[x + sx - ch + k]) * G075 + _for_get_a(a_ptr, tab[x + sx + k]) * G025 + _for_get_a(a_ptr, tab[x + sx + ch + k]) * G125); \
687
0
          _for_set(row, x * 2 + ch + k, _for_get_a(a_ptr, tab[x + sx - ch + k]) * G125 + _for_get_a(a_ptr, tab[x + sx + k]) * G025 + _for_get_a(a_ptr, tab[x + sx + ch + k]) * G075); \
688
0
      }
689
0
      ccv_matrix_getter_float_only(a->type, ccv_matrix_setter_getter_float_only, no_8u_type, ccv_matrix_setter_b, db->type, for_block);
690
0
#undef x_block
691
0
    } else {
692
0
#define x_block(_for_get_a, _for_set, _for_get, _for_set_b) \
693
0
      for (k = 0; k < ch; k++) \
694
0
      { \
695
0
        _for_set(row, (a->cols - 1) * 2 * ch + k, _for_get_a(a_ptr, (a->cols - 2) * ch + k) * G075 + _for_get_a(a_ptr, (a->cols - 1) * ch + k) * (G025 + G125)); \
696
0
        _for_set(row, (a->cols - 1) * 2 * ch + ch + k, _for_get_a(a_ptr, (a->cols - 2) * ch + k) * G125 + _for_get_a(a_ptr, (a->cols - 1) * ch + k) * (G025 + G075)); \
697
0
      }
698
0
      ccv_matrix_getter_float_only(a->type, ccv_matrix_setter_getter_float_only, no_8u_type, ccv_matrix_setter_b, db->type, for_block);
699
0
#undef x_block
700
0
    }
701
0
#undef GALL
702
0
#undef G125
703
0
#undef G075
704
0
#undef G025
705
0
  }
706
1
#undef for_block
707
1
  if (workspace)
708
1
    ccfree(workspace);
709
1
}