Coverage Report

Created: 2021-04-07 03:47

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/compression/ccv_nnc_lssc_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_lssc_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
6
{
15
6
  assert(output_size <= input_size);
16
6
  int n;
17
6
  ccv_float16_t a16[16];
18
6
  float a32[16];
19
6
  float bm[2];
20
12
  for (n = 0; n < output_size; 
n++6
)
21
6
  {
22
6
    const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[n];
23
6
    ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[n];
24
6
    int i[CCV_NNC_MAX_DIM];
25
6
    int j[CCV_NNC_MAX_DIM];
26
6
    int c, k;
27
6
    const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
28
6
    assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2);
29
6
    const int* adim = (a_nd == CCV_NNC_MAX_DIM + 1) ? 
a->info.dim2
:
a->info.dim + 14
;
30
6
    const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
31
6
    assert(b_nd == CCV_NNC_MAX_DIM + 1 || b_nd == CCV_NNC_MAX_DIM + 2);
32
6
    const int* bdim = (b_nd == CCV_NNC_MAX_DIM + 1) ? 
b->info.dim2
:
b->info.dim + 14
;
33
6
    ccv_float16_t* ap = a->data.f16;
34
6
    const int* ainc = CCV_IS_TENSOR_VIEW(a) ? 
((a_nd == 0
CCV_NNC_MAX_DIM0
+ 1) ?
a->inc0
:
a->inc + 10
) : adim;
35
6
    ccv_float16_t* bp = b->data.f16;
36
6
    const int* binc = CCV_IS_TENSOR_VIEW(b) ? 
((b_nd == 0
CCV_NNC_MAX_DIM0
+ 1) ?
b->inc0
:
b->inc + 10
) : bdim;
37
6
    const int nxc = ccv_nnc_tensor_get_n(a->info) * ccv_nnc_tensor_get_c(a->info);
38
6
    assert(nxc == ccv_nnc_tensor_get_n(b->info) * ccv_nnc_tensor_get_c(b->info));
39
172k
    
for (k = 0; 6
k < nxc;
k++172k
)
40
172k
    {
41
1.03M
      for (i[0] = 0; i[0] < bdim[1]; 
i[0]++860k
)
42
860k
      {
43
860k
        assert(bdim[CCV_NNC_MAX_DIM] % 4 == 0);
44
860k
        const int bw = bdim[CCV_NNC_MAX_DIM] / 4;
45
10.7M
        for (i[1] = 0; i[1] < bw; 
i[1]++9.89M
)
46
9.89M
        {
47
9.89M
          ccv_float16_t* apz = ap + i[0] * 4 * ainc[CCV_NNC_MAX_DIM] + i[1] * 4;
48
9.89M
          const int h = ccv_min(i[0] * 4 + 4, adim[1]) - i[0] * 4;
49
9.89M
          const int w = ccv_min(i[1] * 4 + 4, adim[CCV_NNC_MAX_DIM]) - i[1] * 4;
50
168M
          for (c = 0; c < 16; 
c++158M
)
51
158M
            a16[c] = apz[0];
52
48.1M
          for (j[0] = 0; j[0] < h; 
j[0]++38.2M
)
53
186M
            
for (j[1] = 0; 38.2M
j[1] < w;
j[1]++148M
)
54
148M
              a16[j[0] * 4 + j[1]] = apz[j[0] * ainc[CCV_NNC_MAX_DIM] + j[1]];
55
9.89M
          ccv_half_precision_to_float((uint16_t*)a16, a32, 16);
56
9.89M
          float amax = a32[0];
57
9.89M
          float amin = a32[0];
58
158M
          for (c = 1; c < 16; 
c++148M
)
59
148M
            amax = ccv_max(a32[c], amax), amin = ccv_min(a32[c], amin);
60
9.89M
          bm[0] = amin;
61
9.89M
          bm[1] = amax;
62
9.89M
          ccv_float16_t* bpz = bp + i[0] * binc[CCV_NNC_MAX_DIM] + i[1] * 4;
63
9.89M
          uint16_t* const bpz16 = (uint16_t*)bpz;
64
9.89M
          ccv_float_to_half_precision(bm, bpz16, 2);
65
9.89M
          const float abottom = amin * 7 / 6 - amax / 6;
66
9.89M
          const float ascale = 3 / ccv_max(amax - amin, 1e-6);
67
9.89M
          bpz16[2] = 0;
68
89.0M
          for (c = 0; c < 8; 
c++79.1M
)
69
79.1M
            bpz16[2] |= ((ccv_clamp((int)((a32[c] - abottom) * ascale), 0, 3)) << (c << 1));
70
9.89M
          bpz16[3] = 0;
71
89.0M
          for (c = 0; c < 8; 
c++79.1M
)
72
79.1M
            bpz16[3] |= ((ccv_clamp((int)((a32[8 + c] - abottom) * ascale), 0, 3)) << (c << 1));
73
9.89M
        }
74
860k
      }
75
172k
      bp += binc[CCV_NNC_MAX_DIM - 1] * binc[CCV_NNC_MAX_DIM];
76
172k
      ap += ainc[CCV_NNC_MAX_DIM - 1] * ainc[CCV_NNC_MAX_DIM];
77
172k
    }
78
6
  }
79
6
  return CCV_NNC_EXEC_SUCCESS;
80
6
}
81
82
static int _ccv_nnc_lssc_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
83
6
{
84
6
  assert(output_size <= input_size);
85
6
  int n;
86
6
  ccv_float16_t a16[16];
87
6
  float a32[16];
88
6
  float bm[4];
89
12
  for (n = 0; n < output_size; 
n++6
)
90
6
  {
91
6
    const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[n];
92
6
    ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)outputs[n];
93
6
    int i[CCV_NNC_MAX_DIM];
94
6
    int j[CCV_NNC_MAX_DIM];
95
6
    int c, k;
96
6
    const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
97
6
    assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2);
98
6
    const int* adim = (a_nd == CCV_NNC_MAX_DIM + 1) ? 
a->info.dim2
:
a->info.dim + 14
;
99
6
    const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
100
6
    assert(b_nd == CCV_NNC_MAX_DIM + 1 || b_nd == CCV_NNC_MAX_DIM + 2);
101
6
    const int* bdim = (b_nd == CCV_NNC_MAX_DIM + 1) ? 
b->info.dim2
:
b->info.dim + 14
;
102
6
    ccv_float16_t* ap = a->data.f16;
103
6
    const int* ainc = CCV_IS_TENSOR_VIEW(a) ? 
((a_nd == 0
CCV_NNC_MAX_DIM0
+ 1) ?
a->inc0
:
a->inc + 10
) : adim;
104
6
    ccv_float16_t* bp = b->data.f16;
105
6
    const int* binc = CCV_IS_TENSOR_VIEW(b) ? 
((b_nd == 0
CCV_NNC_MAX_DIM0
+ 1) ?
b->inc0
:
b->inc + 10
) : bdim;
106
6
    const int nxc = ccv_nnc_tensor_get_n(a->info) * ccv_nnc_tensor_get_c(a->info);
107
6
    assert(nxc == ccv_nnc_tensor_get_n(b->info) * ccv_nnc_tensor_get_c(b->info));
108
172k
    
for (k = 0; 6
k < nxc;
k++172k
)
109
172k
    {
110
1.03M
      for (i[0] = 0; i[0] < bdim[1]; 
i[0]++860k
)
111
860k
      {
112
860k
        assert(bdim[CCV_NNC_MAX_DIM] % 4 == 0);
113
860k
        const int bw = bdim[CCV_NNC_MAX_DIM] / 4;
114
10.7M
        for (i[1] = 0; i[1] < bw; 
i[1]++9.89M
)
115
9.89M
        {
116
9.89M
          ccv_float16_t* bpz = bp + i[0] * binc[CCV_NNC_MAX_DIM] + i[1] * 4;
117
9.89M
          uint16_t* const bpz16 = (uint16_t*)bpz;
118
9.89M
          ccv_half_precision_to_float(bpz16, bm, 2);
119
9.89M
          bm[3] = bm[1];
120
9.89M
          bm[1] = bm[3] / 3 + bm[0] * 2 / 3;
121
9.89M
          bm[2] = bm[3] * 2 / 3 + bm[0] / 3;
122
89.0M
          for (c = 0; c < 8; 
c++79.1M
)
123
79.1M
            a32[c] = bm[((bpz16[2] >> (c << 1)) & 3)];
124
89.0M
          for (c = 0; c < 8; 
c++79.1M
)
125
79.1M
            a32[8 + c] = bm[((bpz16[3] >> (c << 1)) & 3)];
126
9.89M
          ccv_float_to_half_precision(a32, (uint16_t*)a16, 16);
127
9.89M
          ccv_float16_t* apz = ap + i[0] * 4 * ainc[CCV_NNC_MAX_DIM] + i[1] * 4;
128
9.89M
          const int h = ccv_min(i[0] * 4 + 4, adim[1]) - i[0] * 4;
129
9.89M
          const int w = ccv_min(i[1] * 4 + 4, adim[CCV_NNC_MAX_DIM]) - i[1] * 4;
130
48.1M
          for (j[0] = 0; j[0] < h; 
j[0]++38.2M
)
131
186M
            
for (j[1] = 0; 38.2M
j[1] < w;
j[1]++148M
)
132
148M
               apz[j[0] * ainc[CCV_NNC_MAX_DIM] + j[1]] = a16[j[0] * 4 + j[1]];
133
9.89M
        }
134
860k
      }
135
172k
      bp += binc[CCV_NNC_MAX_DIM - 1] * binc[CCV_NNC_MAX_DIM];
136
172k
      ap += ainc[CCV_NNC_MAX_DIM - 1] * ainc[CCV_NNC_MAX_DIM];
137
172k
    }
138
6
  }
139
6
  return CCV_NNC_EXEC_SUCCESS;
140
6
  
return CCV_NNC_EXEC_SUCCESS0
;
141
6
}
142
143
REGISTER_COMMAND_BACKEND(CCV_NNC_COMPRESSION_LSSC_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
144
1
{
145
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW;
146
1
  registry->tensor_datatypes = CCV_16F;
147
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
148
1
  registry->algorithms = 1;
149
1
  registry->exec = _ccv_nnc_lssc_forw;
150
1
}
151
152
REGISTER_COMMAND_BACKEND(CCV_NNC_COMPRESSION_LSSC_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
153
1
{
154
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW;
155
1
  registry->tensor_datatypes = CCV_16F;
156
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
157
1
  registry->algorithms = 1;
158
1
  registry->exec = _ccv_nnc_lssc_back;
159
1
}