Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/pad/ccv_nnc_pad_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_pad_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
11
{
15
11
  assert(input_size == 1);
16
11
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
17
11
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
18
11
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
19
11
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
20
  // Assuming this is float 32.
21
11
  int adim[CCV_NNC_MAX_DIM_ALLOC];
22
11
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
23
11
  ccv_nnc_tensor_view_get_dim(a, adim);
24
11
  ccv_nnc_tensor_view_get_dim(b, bdim);
25
11
  int astride[CCV_NNC_MAX_DIM_ALLOC];
26
11
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
27
11
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
28
11
  ccv_nnc_tensor_view_get_stride(a, astride);
29
11
  ccv_nnc_tensor_view_get_stride(b, bstride);
30
11
  int i[CCV_NNC_MAX_DIM + 2];
31
11
  int x;
32
11
  float* const ap = a->data.f32;
33
11
  float* const bp = b->data.f32;
34
11
  const int nd = ccv_nnc_tensor_nd(a->info.dim);
35
11
  const int offset = CCV_NNC_MAX_DIM + 2 - nd;
36
11
  assert(offset >= 0);
37
39
  
for (x = 0; 11
x < nd;
x++28
) // We don't support negative pad.
38
28
    { assert(cmd.info.size.dim[x] >= 0 && cmd.info.pad.end[x] >= 0); }
39
11
  int begin[CCV_NNC_MAX_DIM_ALLOC];
40
39
  for (x = 0; x < nd; 
x++28
)
41
28
    begin[x + offset] = cmd.info.size.dim[x];
42
27
  for (x = 0; x < offset; 
x++16
)
43
16
    begin[x] = 0;
44
  // Non-optimal case, need to do skip if needed.
45
11
  if (cmd.info.pad.type == CCV_NNC_PAD_ZERO)
46
6
  {
47
16
    for (i[0] = 0; i[0] < bdim[0]; 
i[0]++10
)
48
10
    {
49
10
      float* const ap0 = (i[0] >= begin[0] && 
i[0] < adim[0] + begin[0]8
) ?
ap + (i[0] - begin[0]) * astride[0]7
:
03
;
50
10
      float* const bp0 = bp + i[0] * bstride[0];
51
42
      for (i[1] = 0; i[1] < bdim[1]; 
i[1]++32
)
52
32
      {
53
32
        float* const ap1 = (ap0 && 
i[1] >= begin[1]20
&&
i[1] < adim[1] + begin[1]15
) ?
ap0 + (i[1] - begin[1]) * astride[1]11
:
021
;
54
32
        float* bp1 = bp0 + i[1] * bstride[1];
55
154
        for (i[2] = 0; i[2] < bdim[2]; 
i[2]++122
)
56
122
        {
57
122
          float* const ap2 = (ap1 && 
i[2] >= begin[2]47
&&
i[2] < adim[2] + begin[2]32
) ?
ap1 + (i[2] - begin[2]) * astride[2]24
:
098
;
58
789
          for (x = 0; x < bdim[3]; 
x++667
)
59
667
            bp1[x] = (ap2 && 
x >= begin[3]148
&&
x < adim[3] + begin[3]142
) ?
ap2[x - begin[3]]122
:
0545
;
60
122
          bp1 += bstride[2];
61
122
        }
62
32
      }
63
10
    }
64
6
  } else {
65
5
    assert(cmd.info.pad.type == CCV_NNC_PAD_REPLICATE);
66
14
    
for (i[0] = 0; 5
i[0] < bdim[0];
i[0]++9
)
67
9
    {
68
9
      float* const ap0 = ap + ccv_min(adim[0] - 1, ccv_max(0, i[0] - begin[0])) * astride[0];
69
9
      float* const bp0 = bp + i[0] * bstride[0];
70
35
      for (i[1] = 0; i[1] < bdim[1]; 
i[1]++26
)
71
26
      {
72
26
        float* const ap1 = ap0 + ccv_min(adim[1] - 1, ccv_max(0, i[1] - begin[1])) * astride[1];
73
26
        float* bp1 = bp0 + i[1] * bstride[1];
74
106
        for (i[2] = 0; i[2] < bdim[2]; 
i[2]++80
)
75
80
        {
76
80
          float* const ap2 = ap1 + ccv_min(adim[2] - 1, ccv_max(0, i[2] - begin[2])) * astride[2];
77
290
          for (x = 0; x < bdim[3]; 
x++210
)
78
210
            bp1[x] = ap2[ccv_min(adim[3] - 1, ccv_max(0, x - begin[3]))];
79
80
          bp1 += bstride[2];
80
80
        }
81
26
      }
82
9
    }
83
5
  }
84
11
  return CCV_NNC_EXEC_SUCCESS;
85
11
}
86
87
static int _ccv_nnc_pad_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
88
4
{
89
4
  assert(input_size == 1);
90
4
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
91
4
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
92
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
93
4
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
94
  // Assuming this is float 32.
95
4
  int adim[CCV_NNC_MAX_DIM_ALLOC];
96
4
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
97
4
  ccv_nnc_tensor_view_get_dim(a, adim);
98
4
  ccv_nnc_tensor_view_get_dim(b, bdim);
99
4
  int astride[CCV_NNC_MAX_DIM_ALLOC];
100
4
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
101
4
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
102
4
  ccv_nnc_tensor_view_get_stride(a, astride);
103
4
  ccv_nnc_tensor_view_get_stride(b, bstride);
104
4
  int i[CCV_NNC_MAX_DIM + 2];
105
4
  int x;
106
4
  float* const ap = a->data.f32;
107
4
  float* const bp = b->data.f32;
108
4
  const int nd = ccv_nnc_tensor_nd(a->info.dim);
109
4
  const int offset = CCV_NNC_MAX_DIM + 2 - nd;
110
4
  assert(offset >= 0);
111
14
  
for (x = 0; 4
x < nd;
x++10
) // We don't support negative pad.
112
10
    { assert(cmd.info.size.dim[x] >= 0 && cmd.info.pad.end[x] >= 0); }
113
4
  int begin[CCV_NNC_MAX_DIM_ALLOC];
114
14
  for (x = 0; x < nd; 
x++10
)
115
10
    begin[x + offset] = cmd.info.size.dim[x];
116
10
  for (x = 0; x < offset; 
x++6
)
117
6
    begin[x] = 0;
118
  // Non-optimal case, need to do skip if needed.
119
9
  for (i[0] = 0; i[0] < bdim[0]; 
i[0]++5
)
120
5
  {
121
5
    float* const ap0 = ap + (i[0] + begin[0]) * astride[0];
122
5
    float* const bp0 = bp + i[0] * bstride[0];
123
12
    for (i[1] = 0; i[1] < bdim[1]; 
i[1]++7
)
124
7
    {
125
7
      float* const ap1 = ap0 + (i[1] + begin[1]) * astride[1];
126
7
      float* bp1 = bp0 + i[1] * bstride[1];
127
20
      for (i[2] = 0; i[2] < bdim[2]; 
i[2]++13
)
128
13
      {
129
13
        float* const ap2 = ap1 + (i[2] + begin[2]) * astride[2];
130
39
        for (x = 0; x < bdim[3]; 
x++26
)
131
26
          bp1[x] = ap2[x + begin[3]];
132
13
        bp1 += bstride[2];
133
13
      }
134
7
    }
135
5
  }
136
4
  return CCV_NNC_EXEC_SUCCESS;
137
4
}
138
139
REGISTER_COMMAND_BACKEND(CCV_NNC_PAD_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
140
1
{
141
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
142
1
  registry->tensor_datatypes = CCV_32F;
143
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
144
1
  registry->algorithms = 1;
145
1
  registry->exec = _ccv_nnc_pad_forw;
146
1
}
147
148
REGISTER_COMMAND_BACKEND(CCV_NNC_PAD_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
149
1
{
150
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
151
1
  registry->tensor_datatypes = CCV_32F;
152
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
153
1
  registry->algorithms = 1;
154
1
  registry->exec = _ccv_nnc_pad_back;
155
1
}