Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/compression/ccv_nnc_lssc_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_lssc_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
6
{
15
6
  assert(output_size <= input_size);
16
6
  int n;
17
6
  ccv_float16_t a16[16];
18
6
  float a32[16];
19
6
  float bm[2];
20
12
  for (n = 0; n < output_size; 
n++6
)
21
6
  {
22
6
    const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[n];
23
6
    ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[n];
24
6
    int i[CCV_NNC_MAX_DIM];
25
6
    int j[CCV_NNC_MAX_DIM];
26
6
    int c, k;
27
6
    const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
28
6
    assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2);
29
6
    const int* adim = (a_nd == CCV_NNC_MAX_DIM + 1) ? 
a->info.dim2
:
a->info.dim + 14
;
30
6
    const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
31
6
    assert(b_nd == CCV_NNC_MAX_DIM + 1 || b_nd == CCV_NNC_MAX_DIM + 2);
32
6
    const int* bdim = (b_nd == CCV_NNC_MAX_DIM + 1) ? 
b->info.dim2
:
b->info.dim + 14
;
33
6
    ccv_float16_t* ap = a->data.f16;
34
6
    int astride[CCV_NNC_MAX_DIM_ALLOC];
35
6
    ccv_nnc_tensor_view_get_stride(a, astride);
36
6
    ccv_float16_t* bp = b->data.f16;
37
6
    int bstride[CCV_NNC_MAX_DIM_ALLOC];
38
6
    ccv_nnc_tensor_view_get_stride(b, bstride);
39
6
    const int nxc = ccv_nnc_tensor_get_n(a->info) * ccv_nnc_tensor_get_c(a->info);
40
6
    assert(nxc == ccv_nnc_tensor_get_n(b->info) * ccv_nnc_tensor_get_c(b->info));
41
172k
    
for (k = 0; 6
k < nxc;
k++172k
)
42
172k
    {
43
1.03M
      for (i[0] = 0; i[0] < bdim[1]; 
i[0]++860k
)
44
860k
      {
45
860k
        assert(bdim[CCV_NNC_MAX_DIM] % 4 == 0);
46
860k
        const int bw = bdim[CCV_NNC_MAX_DIM] / 4;
47
10.7M
        for (i[1] = 0; i[1] < bw; 
i[1]++9.89M
)
48
9.89M
        {
49
9.89M
          ccv_float16_t* apz = ap + i[0] * 4 * astride[CCV_NNC_MAX_DIM] + i[1] * 4;
50
9.89M
          const int h = ccv_min(i[0] * 4 + 4, adim[1]) - i[0] * 4;
51
9.89M
          const int w = ccv_min(i[1] * 4 + 4, adim[CCV_NNC_MAX_DIM]) - i[1] * 4;
52
168M
          for (c = 0; c < 16; 
c++158M
)
53
158M
            a16[c] = apz[0];
54
48.1M
          for (j[0] = 0; j[0] < h; 
j[0]++38.2M
)
55
186M
            
for (j[1] = 0; 38.2M
j[1] < w;
j[1]++148M
)
56
148M
              a16[j[0] * 4 + j[1]] = apz[j[0] * astride[CCV_NNC_MAX_DIM] + j[1]];
57
9.89M
          ccv_half_precision_to_float((uint16_t*)a16, a32, 16);
58
9.89M
          float amax = a32[0];
59
9.89M
          float amin = a32[0];
60
158M
          for (c = 1; c < 16; 
c++148M
)
61
148M
            amax = ccv_max(a32[c], amax), amin = ccv_min(a32[c], amin);
62
9.89M
          bm[0] = amin;
63
9.89M
          bm[1] = amax;
64
9.89M
          ccv_float16_t* bpz = bp + i[0] * bstride[CCV_NNC_MAX_DIM] + i[1] * 4;
65
9.89M
          uint16_t* const bpz16 = (uint16_t*)bpz;
66
9.89M
          ccv_float_to_half_precision(bm, bpz16, 2);
67
9.89M
          const float abottom = amin * 7 / 6 - amax / 6;
68
9.89M
          const float ascale = 3 / ccv_max(amax - amin, 1e-6);
69
9.89M
          bpz16[2] = 0;
70
89.0M
          for (c = 0; c < 8; 
c++79.1M
)
71
79.1M
            bpz16[2] |= ((ccv_clamp((int)((a32[c] - abottom) * ascale), 0, 3)) << (c << 1));
72
9.89M
          bpz16[3] = 0;
73
89.0M
          for (c = 0; c < 8; 
c++79.1M
)
74
79.1M
            bpz16[3] |= ((ccv_clamp((int)((a32[8 + c] - abottom) * ascale), 0, 3)) << (c << 1));
75
9.89M
        }
76
860k
      }
77
172k
      bp += bstride[CCV_NNC_MAX_DIM - 1];
78
172k
      ap += astride[CCV_NNC_MAX_DIM - 1];
79
172k
    }
80
6
  }
81
6
  return CCV_NNC_EXEC_SUCCESS;
82
6
}
83
84
static int _ccv_nnc_lssc_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
85
6
{
86
6
  assert(output_size <= input_size);
87
6
  int n;
88
6
  ccv_float16_t a16[16];
89
6
  float a32[16];
90
6
  float bm[4];
91
12
  for (n = 0; n < output_size; 
n++6
)
92
6
  {
93
6
    const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[n];
94
6
    ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)outputs[n];
95
6
    int i[CCV_NNC_MAX_DIM];
96
6
    int j[CCV_NNC_MAX_DIM];
97
6
    int c, k;
98
6
    const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
99
6
    assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2);
100
6
    const int* adim = (a_nd == CCV_NNC_MAX_DIM + 1) ? 
a->info.dim2
:
a->info.dim + 14
;
101
6
    const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
102
6
    assert(b_nd == CCV_NNC_MAX_DIM + 1 || b_nd == CCV_NNC_MAX_DIM + 2);
103
6
    const int* bdim = (b_nd == CCV_NNC_MAX_DIM + 1) ? 
b->info.dim2
:
b->info.dim + 14
;
104
6
    ccv_float16_t* ap = a->data.f16;
105
6
    int astride[CCV_NNC_MAX_DIM_ALLOC];
106
6
    ccv_nnc_tensor_view_get_stride(a, astride);
107
6
    ccv_float16_t* bp = b->data.f16;
108
6
    int bstride[CCV_NNC_MAX_DIM_ALLOC];
109
6
    ccv_nnc_tensor_view_get_stride(b, bstride);
110
6
    const int nxc = ccv_nnc_tensor_get_n(a->info) * ccv_nnc_tensor_get_c(a->info);
111
6
    assert(nxc == ccv_nnc_tensor_get_n(b->info) * ccv_nnc_tensor_get_c(b->info));
112
172k
    
for (k = 0; 6
k < nxc;
k++172k
)
113
172k
    {
114
1.03M
      for (i[0] = 0; i[0] < bdim[1]; 
i[0]++860k
)
115
860k
      {
116
860k
        assert(bdim[CCV_NNC_MAX_DIM] % 4 == 0);
117
860k
        const int bw = bdim[CCV_NNC_MAX_DIM] / 4;
118
10.7M
        for (i[1] = 0; i[1] < bw; 
i[1]++9.89M
)
119
9.89M
        {
120
9.89M
          ccv_float16_t* bpz = bp + i[0] * bstride[CCV_NNC_MAX_DIM] + i[1] * 4;
121
9.89M
          uint16_t* const bpz16 = (uint16_t*)bpz;
122
9.89M
          ccv_half_precision_to_float(bpz16, bm, 2);
123
9.89M
          bm[3] = bm[1];
124
9.89M
          bm[1] = bm[3] / 3 + bm[0] * 2 / 3;
125
9.89M
          bm[2] = bm[3] * 2 / 3 + bm[0] / 3;
126
89.0M
          for (c = 0; c < 8; 
c++79.1M
)
127
79.1M
            a32[c] = bm[((bpz16[2] >> (c << 1)) & 3)];
128
89.0M
          for (c = 0; c < 8; 
c++79.1M
)
129
79.1M
            a32[8 + c] = bm[((bpz16[3] >> (c << 1)) & 3)];
130
9.89M
          ccv_float_to_half_precision(a32, (uint16_t*)a16, 16);
131
9.89M
          ccv_float16_t* apz = ap + i[0] * 4 * astride[CCV_NNC_MAX_DIM] + i[1] * 4;
132
9.89M
          const int h = ccv_min(i[0] * 4 + 4, adim[1]) - i[0] * 4;
133
9.89M
          const int w = ccv_min(i[1] * 4 + 4, adim[CCV_NNC_MAX_DIM]) - i[1] * 4;
134
48.1M
          for (j[0] = 0; j[0] < h; 
j[0]++38.2M
)
135
186M
            
for (j[1] = 0; 38.2M
j[1] < w;
j[1]++148M
)
136
148M
               apz[j[0] * astride[CCV_NNC_MAX_DIM] + j[1]] = a16[j[0] * 4 + j[1]];
137
9.89M
        }
138
860k
      }
139
172k
      bp += bstride[CCV_NNC_MAX_DIM - 1];
140
172k
      ap += astride[CCV_NNC_MAX_DIM - 1];
141
172k
    }
142
6
  }
143
6
  return CCV_NNC_EXEC_SUCCESS;
144
0
  return CCV_NNC_EXEC_SUCCESS;
145
6
}
146
147
REGISTER_COMMAND_BACKEND(CCV_NNC_COMPRESSION_LSSC_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
148
1
{
149
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW;
150
1
  registry->tensor_datatypes = CCV_16F;
151
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
152
1
  registry->algorithms = 1;
153
1
  registry->exec = _ccv_nnc_lssc_forw;
154
1
}
155
156
REGISTER_COMMAND_BACKEND(CCV_NNC_COMPRESSION_LSSC_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
157
1
{
158
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW;
159
1
  registry->tensor_datatypes = CCV_16F;
160
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
161
1
  registry->algorithms = 1;
162
1
  registry->exec = _ccv_nnc_lssc_back;
163
1
}