Coverage Report

Created: 2021-09-21 22:26

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/upsample/ccv_nnc_upsample_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
// Shared methods.
14
#include "../_ccv_nnc_cpu_ref.h"
15
16
typedef struct {
17
  int si[2];
18
  float sc[2];
19
} ccv_nnc_bi_coeffs_t;
20
21
static void _ccv_nnc_init_bi_coeffs(const int ss, const int sz, const float s, ccv_nnc_bi_coeffs_t* const coeff)
22
8
{
23
8
  int i;
24
6.00k
  for (i = 0; i < sz; 
i++6.00k
)
25
6.00k
  {
26
6.00k
    const float xs = (i + 0.5) * s - 0.5;
27
6.00k
    coeff[i].si[0] = (int)xs;
28
6.00k
    coeff[i].si[1] = ccv_min(coeff[i].si[0] + 1, ss - 1);
29
6.00k
    coeff[i].sc[1] = xs - coeff[i].si[0];
30
6.00k
    coeff[i].sc[0] = 1.0 - coeff[i].sc[1];
31
6.00k
  }
32
8
}
33
34
static int _ccv_nnc_upsample_bilinear_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
35
2
{
36
2
  assert(input_size >= 1);
37
2
  assert(output_size >= 1);
38
2
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
39
2
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
40
2
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
41
2
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
42
2
  // Assuming this is float 32.
43
2
  int adim[CCV_NNC_MAX_DIM_ALLOC];
44
2
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
45
2
  ccv_nnc_tensor_view_get_dim(a, adim);
46
2
  ccv_nnc_tensor_view_get_dim(b, bdim);
47
2
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
48
2
  int binc[CCV_NNC_MAX_DIM_ALLOC];
49
2
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
50
2
  ccv_nnc_tensor_view_get_inc(a, ainc);
51
2
  ccv_nnc_tensor_view_get_inc(b, binc);
52
2
  int i[CCV_NNC_MAX_DIM + 2];
53
2
  int xd, yd, cd;
54
2
  const float* ap = a->data.f32;
55
2
  float* bp = b->data.f32;
56
2
  assert(a->info.format == b->info.format);
57
2
  if (a->info.format == CCV_TENSOR_FORMAT_NCHW)
58
1
  {
59
1
    const float rheight = (float)adim[2] / bdim[2];
60
1
    const float rwidth = (float)adim[3] / bdim[3];
61
1
    assert(rheight <= 1);
62
1
    assert(rwidth <= 1);
63
1
    ccv_nnc_bi_coeffs_t* const ycoeff = (ccv_nnc_bi_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(ccv_nnc_bi_coeffs_t) * (bdim[2] + bdim[3]), CCV_TENSOR_CPU_MEMORY);
64
1
    ccv_nnc_bi_coeffs_t* const xcoeff = ycoeff + bdim[2];
65
1
    _ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rheight, ycoeff);
66
1
    _ccv_nnc_init_bi_coeffs(adim[3], bdim[3], rwidth, xcoeff);
67
1
    assert(adim[0] == bdim[0]);
68
1
    assert(adim[1] == bdim[1]);
69
2
    
for (i[0] = 0; 1
i[0] < adim[0];
i[0]++1
)
70
1
    {
71
4
      for (i[1] = 0; i[1] < adim[1]; 
i[1]++3
)
72
3
      {
73
3
        int pysi0 = 0;
74
3
        const float* ap0 = ap;
75
3.00k
        for (yd = 0; yd < bdim[2]; 
yd++3.00k
)
76
3.00k
        {
77
3.00k
          const int ysi0 = ycoeff[yd].si[0];
78
3.00k
          const int ysi1 = (ysi0 + 1 < adim[2]) ? 
12.99k
:
03
;
79
3.00k
          const float ysc0 = ycoeff[yd].sc[0];
80
3.00k
          const float ysc1 = ycoeff[yd].sc[1];
81
3.00k
          if (pysi0 < ysi0) // Move to ay1 line.
82
1.49k
          {
83
1.49k
            ap0 += (ysi0 - pysi0) * ainc[3];
84
1.49k
            pysi0 = ysi0;
85
1.49k
          }
86
3.00M
          for (xd = 0; xd < bdim[3]; 
xd++3.00M
)
87
3.00M
          {
88
3.00M
            const ccv_nnc_bi_coeffs_t cof = xcoeff[xd];
89
3.00M
            bp[xd] = ap0[cof.si[0]] * cof.sc[0] * ysc0 + ap0[cof.si[1]] * cof.sc[1] * ysc0 +
90
3.00M
              ap0[cof.si[0] + ainc[3] * ysi1] * cof.sc[0] * ysc1 + ap0[cof.si[1] + ainc[3] * ysi1] * cof.sc[1] * ysc1;
91
3.00M
          }
92
3.00k
          bp += binc[3];
93
3.00k
        }
94
3
        ap += ainc[2] * ainc[3];
95
3
        bp += (binc[2] - bdim[2]) * binc[3];
96
3
      }
97
1
      ap += (ainc[1] - adim[1]) * ainc[2] * ainc[3];
98
1
      bp += (binc[1] - bdim[1]) * binc[2] * binc[3];
99
1
    }
100
1
  } else {
101
1
    // Any case, this is either NHWC or CHWN
102
1
    assert(a->info.format == CCV_TENSOR_FORMAT_NHWC || a->info.format == CCV_TENSOR_FORMAT_CHWN);
103
1
    const float rheight = (float)adim[1] / bdim[1];
104
1
    const float rwidth = (float)adim[2] / bdim[2];
105
1
    assert(rheight <= 1);
106
1
    assert(rwidth <= 1);
107
1
    ccv_nnc_bi_coeffs_t* const ycoeff = (ccv_nnc_bi_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(ccv_nnc_bi_coeffs_t) * (bdim[1] + bdim[2]), CCV_TENSOR_CPU_MEMORY);
108
1
    ccv_nnc_bi_coeffs_t* const xcoeff = ycoeff + bdim[1];
109
1
    _ccv_nnc_init_bi_coeffs(adim[1], bdim[1], rheight, ycoeff);
110
1
    _ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rwidth, xcoeff);
111
1
    assert(adim[0] == bdim[0]);
112
1
    assert(adim[3] == bdim[3]);
113
2
    
for (i[0] = 0; 1
i[0] < adim[0];
i[0]++1
)
114
1
    {
115
1
      int pysi0 = 0;
116
1
      const float* ap0 = ap;
117
1.00k
      for (yd = 0; yd < bdim[1]; 
yd++1.00k
)
118
1.00k
      {
119
1.00k
        const int ysi0 = ycoeff[yd].si[0];
120
1.00k
        const int ysi1 = (ysi0 + 1 < adim[1]) ? 
1999
:
01
;
121
1.00k
        const float ysc0 = ycoeff[yd].sc[0];
122
1.00k
        const float ysc1 = ycoeff[yd].sc[1];
123
1.00k
        if (pysi0 < ysi0) // Move to ay1 line.
124
499
        {
125
499
          ap0 += (ysi0 - pysi0) * ainc[2] * ainc[3];
126
499
          pysi0 = ysi0;
127
499
        }
128
1.00M
        for (xd = 0; xd < bdim[2]; 
xd++1.00M
)
129
1.00M
        {
130
1.00M
          const ccv_nnc_bi_coeffs_t cof = xcoeff[xd];
131
1.00M
          const float c00 = cof.sc[0] * ysc0;
132
1.00M
          const float c01 = cof.sc[1] * ysc0;
133
1.00M
          const float c10 = cof.sc[0] * ysc1;
134
1.00M
          const float c11 = cof.sc[1] * ysc1;
135
1.00M
          const float* const ap00 = ap0 + cof.si[0] * ainc[3];
136
1.00M
          const float* const ap01 = ap0 + cof.si[1] * ainc[3];
137
1.00M
          const float* const ap10 = ap0 + (cof.si[0] + ysi1 * ainc[2]) * ainc[3];
138
1.00M
          const float* const ap11 = ap0 + (cof.si[1] + ysi1 * ainc[2]) * ainc[3];
139
4.00M
          for (cd = 0; cd < bdim[3]; 
cd++3.00M
)
140
3.00M
            bp[cd] = ap00[cd] * c00 + ap01[cd] * c01 +
141
3.00M
              ap10[cd] * c10 + ap11[cd] * c11;
142
1.00M
          bp += binc[3];
143
1.00M
        }
144
1.00k
        bp += (binc[2] - bdim[2]) * binc[3];
145
1.00k
      }
146
1
      ap += ainc[1] * ainc[2] * ainc[3];
147
1
      bp += (binc[1] - bdim[1]) * binc[2] * binc[3];
148
1
    }
149
1
  }
150
2
  return CCV_NNC_EXEC_SUCCESS;
151
2
}
152
153
static int _ccv_nnc_upsample_bilinear_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
154
2
{
155
2
  assert(input_size >= 1);
156
2
  assert(output_size >= 1);
157
2
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[0];
158
2
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)outputs[0];
159
2
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
160
2
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
161
2
  // Assuming this is float 32.
162
2
  int adim[CCV_NNC_MAX_DIM_ALLOC];
163
2
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
164
2
  ccv_nnc_tensor_view_get_dim(a, adim);
165
2
  ccv_nnc_tensor_view_get_dim(b, bdim);
166
2
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
167
2
  int binc[CCV_NNC_MAX_DIM_ALLOC];
168
2
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
169
2
  ccv_nnc_tensor_view_get_inc(a, ainc);
170
2
  ccv_nnc_tensor_view_get_inc(b, binc);
171
2
  int i[CCV_NNC_MAX_DIM + 2];
172
2
  int xd, yd, cd;
173
2
  _ccv_nnc_tensor_set_cpu_ref(a, 0);
174
2
  float* ap = a->data.f32;
175
2
  const float* bp = b->data.f32;
176
2
  assert(a->info.format == b->info.format);
177
2
  if (a->info.format == CCV_TENSOR_FORMAT_NCHW)
178
1
  {
179
1
    const float rheight = (float)adim[2] / bdim[2];
180
1
    const float rwidth = (float)adim[3] / bdim[3];
181
1
    assert(rheight <= 1);
182
1
    assert(rwidth <= 1);
183
1
    ccv_nnc_bi_coeffs_t* const ycoeff = (ccv_nnc_bi_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(ccv_nnc_bi_coeffs_t) * (bdim[2] + bdim[3]), CCV_TENSOR_CPU_MEMORY);
184
1
    ccv_nnc_bi_coeffs_t* const xcoeff = ycoeff + bdim[2];
185
1
    _ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rheight, ycoeff);
186
1
    _ccv_nnc_init_bi_coeffs(adim[3], bdim[3], rwidth, xcoeff);
187
1
    assert(adim[0] == bdim[0]);
188
1
    assert(adim[1] == bdim[1]);
189
2
    
for (i[0] = 0; 1
i[0] < adim[0];
i[0]++1
)
190
1
    {
191
4
      for (i[1] = 0; i[1] < adim[1]; 
i[1]++3
)
192
3
      {
193
3
        int pysi0 = 0;
194
3
        float* ap0 = ap;
195
1.50k
        for (yd = 0; yd < bdim[2]; 
yd++1.50k
)
196
1.50k
        {
197
1.50k
          const int ysi0 = ycoeff[yd].si[0];
198
1.50k
          const int ysi1 = (ysi0 + 1 < adim[2]) ? 
11.49k
:
03
;
199
1.50k
          const float ysc0 = ycoeff[yd].sc[0];
200
1.50k
          const float ysc1 = ycoeff[yd].sc[1];
201
1.50k
          if (pysi0 < ysi0) // Move to ay1 line.
202
747
          {
203
747
            ap0 += (ysi0 - pysi0) * ainc[3];
204
747
            pysi0 = ysi0;
205
747
          }
206
751k
          for (xd = 0; xd < bdim[3]; 
xd++750k
)
207
750k
          {
208
750k
            const ccv_nnc_bi_coeffs_t cof = xcoeff[xd];
209
750k
            ap0[cof.si[0]] += bp[xd] * ysc0 * cof.sc[0];
210
750k
            ap0[cof.si[1]] += bp[xd] * ysc0 * cof.sc[1];
211
750k
            ap0[cof.si[0] + ainc[3] * ysi1] += bp[xd] * ysc1 * cof.sc[0];
212
750k
            ap0[cof.si[1] + ainc[3] * ysi1] += bp[xd] * ysc1 * cof.sc[1];
213
750k
          }
214
1.50k
          bp += binc[3];
215
1.50k
        }
216
3
        ap += ainc[2] * ainc[3];
217
3
        bp += (binc[2] - bdim[2]) * binc[3];
218
3
      }
219
1
      ap += (ainc[1] - adim[1]) * ainc[2] * ainc[3];
220
1
      bp += (binc[1] - bdim[1]) * binc[2] * binc[3];
221
1
    }
222
1
  } else {
223
1
    // Any case, this is either NHWC or CHWN
224
1
    assert(a->info.format == CCV_TENSOR_FORMAT_NHWC || a->info.format == CCV_TENSOR_FORMAT_CHWN);
225
1
    const float rheight = (float)adim[1] / bdim[1];
226
1
    const float rwidth = (float)adim[2] / bdim[2];
227
1
    assert(rheight <= 1);
228
1
    assert(rwidth <= 1);
229
1
    ccv_nnc_bi_coeffs_t* const ycoeff = (ccv_nnc_bi_coeffs_t*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(ccv_nnc_bi_coeffs_t) * (bdim[1] + bdim[2]), CCV_TENSOR_CPU_MEMORY);
230
1
    ccv_nnc_bi_coeffs_t* const xcoeff = ycoeff + bdim[1];
231
1
    _ccv_nnc_init_bi_coeffs(adim[1], bdim[1], rheight, ycoeff);
232
1
    _ccv_nnc_init_bi_coeffs(adim[2], bdim[2], rwidth, xcoeff);
233
1
    assert(adim[0] == bdim[0]);
234
1
    assert(adim[3] == bdim[3]);
235
2
    
for (i[0] = 0; 1
i[0] < adim[0];
i[0]++1
)
236
1
    {
237
1
      int pysi0 = 0;
238
1
      float* ap0 = ap;
239
501
      for (yd = 0; yd < bdim[1]; 
yd++500
)
240
500
      {
241
500
        const int ysi0 = ycoeff[yd].si[0];
242
500
        const int ysi1 = (ysi0 + 1 < adim[1]) ? 
1499
:
01
;
243
500
        const float ysc0 = ycoeff[yd].sc[0];
244
500
        const float ysc1 = ycoeff[yd].sc[1];
245
500
        if (pysi0 < ysi0) // Move to ay1 line.
246
249
        {
247
249
          ap0 += (ysi0 - pysi0) * ainc[2] * ainc[3];
248
249
          pysi0 = ysi0;
249
249
        }
250
250k
        for (xd = 0; xd < bdim[2]; 
xd++250k
)
251
250k
        {
252
250k
          const ccv_nnc_bi_coeffs_t cof = xcoeff[xd];
253
250k
          const float c00 = cof.sc[0] * ysc0;
254
250k
          const float c01 = cof.sc[1] * ysc0;
255
250k
          const float c10 = cof.sc[0] * ysc1;
256
250k
          const float c11 = cof.sc[1] * ysc1;
257
250k
          float* const ap00 = ap0 + cof.si[0] * ainc[3];
258
250k
          float* const ap01 = ap0 + cof.si[1] * ainc[3];
259
250k
          float* const ap10 = ap0 + (cof.si[0] + ysi1 * ainc[2]) * ainc[3];
260
250k
          float* const ap11 = ap0 + (cof.si[1] + ysi1 * ainc[2]) * ainc[3];
261
1.00M
          for (cd = 0; cd < bdim[3]; 
cd++750k
)
262
750k
          {
263
750k
            ap00[cd] += bp[cd] * c00;
264
750k
            ap01[cd] += bp[cd] * c01;
265
750k
            ap10[cd] += bp[cd] * c10;
266
750k
            ap11[cd] += bp[cd] * c11;
267
750k
          }
268
250k
          bp += binc[3];
269
250k
        }
270
500
        bp += (binc[2] - bdim[2]) * binc[3];
271
500
      }
272
1
      ap += ainc[1] * ainc[2] * ainc[3];
273
1
      bp += (binc[1] - bdim[1]) * binc[2] * binc[3];
274
1
    }
275
1
  }
276
2
  return CCV_NNC_EXEC_SUCCESS;
277
2
}
278
279
REGISTER_COMMAND_BACKEND(CCV_NNC_UPSAMPLE_BILINEAR_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
280
1
{
281
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
282
1
  registry->tensor_datatypes = CCV_32F;
283
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
284
1
  registry->algorithms = 1;
285
1
  registry->exec = _ccv_nnc_upsample_bilinear_forw;
286
1
}
287
288
REGISTER_COMMAND_BACKEND(CCV_NNC_UPSAMPLE_BILINEAR_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
289
1
{
290
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
291
1
  registry->tensor_datatypes = CCV_32F;
292
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
293
1
  registry->algorithms = 1;
294
1
  registry->exec = _ccv_nnc_upsample_bilinear_back;
295
1
}