Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/reduce/ccv_nnc_reduce_norm2_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
// Shared methods.
14
#include "../_ccv_nnc_cpu_ref.h"
15
16
void _ccv_nnc_reduce_norm2_forw_cpu_ref(ccv_nnc_tensor_view_t* const a, ccv_nnc_tensor_view_t* const b)
17
10
{
18
10
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
19
10
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
20
  // Assuming this is float 32.
21
10
  int adim[CCV_NNC_MAX_DIM_ALLOC];
22
10
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
23
10
  ccv_nnc_tensor_view_get_dim(a, adim);
24
10
  ccv_nnc_tensor_view_get_dim(b, bdim);
25
10
  assert(ccv_nnc_tensor_view_check_broadcast_dim(b, adim));
26
10
  int astride[CCV_NNC_MAX_DIM_ALLOC];
27
10
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
28
10
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
29
10
  ccv_nnc_tensor_view_get_stride(a, astride);
30
10
  ccv_nnc_tensor_view_get_stride(b, bstride);
31
10
  int i[CCV_NNC_MAX_DIM + 2];
32
10
  int x;
33
10
  ccv_nnc_tensor_zero(b);
34
10
  float* const ap = a->data.f32;
35
10
  float* const bp = b->data.f32;
36
  // Non-optimal case, need to do skip if needed.
37
20
  for (i[0] = 0; i[0] < adim[0]; 
i[0]++10
)
38
10
  {
39
10
    float* const ap0 = ap + i[0] * astride[0];
40
10
    float* const bp0 = bdim[0] == 1 ? bp : 
bp + i[0] * bstride[0]0
;
41
20
    for (i[1] = 0; i[1] < adim[1]; 
i[1]++10
)
42
10
    {
43
10
      float* ap1 = ap0 + i[1] * astride[1];
44
10
      float* const bp1 = bdim[1] == 1 ? bp0 : 
bp0 + i[1] * bstride[1]0
;
45
32
      for (i[2] = 0; i[2] < adim[2]; 
i[2]++22
)
46
22
      {
47
22
        float* const bp2 = bdim[2] == 1 ? 
bp112
:
bp1 + i[2] * bstride[2]10
;
48
22
        if (bdim[3] == 1)
49
32
          
for (x = 0; 12
x < adim[3];
x++20
)
50
20
            bp2[0] += ap1[x] * ap1[x];
51
10
        else
52
40
          
for (x = 0; 10
x < adim[3];
x++30
)
53
30
            bp2[x] += ap1[x] * ap1[x];
54
22
        ap1 += astride[2];
55
22
      }
56
10
    }
57
10
  }
58
20
  for (i[0] = 0; i[0] < bdim[0]; 
i[0]++10
)
59
10
  {
60
10
    float* const bp0 = bp + i[0] * bstride[0];
61
20
    for (i[1] = 0; i[1] < bdim[1]; 
i[1]++10
)
62
10
    {
63
10
      float* bp1 = bp0 + i[1] * bstride[1];
64
27
      for (i[2] = 0; i[2] < bdim[2]; 
i[2]++17
)
65
17
      {
66
44
        for (x = 0; x < bdim[3]; 
x++27
)
67
27
          bp1[x] = sqrt(bp1[x]);
68
17
        bp1 += bstride[2];
69
17
      }
70
10
    }
71
10
  }
72
10
}
73
74
static int _ccv_nnc_reduce_norm2_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
75
10
{
76
10
  assert(input_size == 1);
77
10
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
78
10
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
79
10
  _ccv_nnc_reduce_norm2_forw_cpu_ref(a, b);
80
10
  return CCV_NNC_EXEC_SUCCESS;
81
10
}
82
83
static int _ccv_nnc_reduce_norm2_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
84
3
{
85
3
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
86
3
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
87
3
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
88
3
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
89
  // Assuming this is float 32.
90
3
  int adim[CCV_NNC_MAX_DIM_ALLOC];
91
3
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
92
3
  ccv_nnc_tensor_view_get_dim(a, adim);
93
3
  ccv_nnc_tensor_view_get_dim(b, bdim);
94
3
  assert(ccv_nnc_tensor_view_check_broadcast_dim(b, adim));
95
3
  int astride[CCV_NNC_MAX_DIM_ALLOC];
96
3
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
97
3
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
98
3
  ccv_nnc_tensor_view_get_stride(a, astride);
99
3
  ccv_nnc_tensor_view_get_stride(b, bstride);
100
3
  ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
101
3
  assert(ccv_nnc_tensor_view_check_broadcast_dim(h, adim));
102
3
  assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2);
103
  // Assuming this is float 32.
104
3
  int hdim[CCV_NNC_MAX_DIM_ALLOC];
105
3
  ccv_nnc_tensor_view_get_dim(h, hdim);
106
3
  int hstride[CCV_NNC_MAX_DIM_ALLOC];
107
3
  ccv_nnc_tensor_view_get_stride(h, hstride);
108
3
  int i[CCV_NNC_MAX_DIM + 2];
109
3
  int x;
110
3
  float* const hp = h->data.f32;
111
3
  const float* const ap = a->data.f32;
112
3
  const float* const bp = b->data.f32;
113
3
  if (inputs[0] == 0)
114
0
  {
115
    // Non-optimal case, need to do skip if needed.
116
0
    for (i[0] = 0; i[0] < adim[0]; i[0]++)
117
0
    {
118
0
      const float* const ap0 = ap + i[0] * astride[0];
119
0
      float* const hp0 = hp + i[0] * hstride[0];
120
0
      const float* const bp0 = bdim[0] == 1 ? bp : bp + i[0] * bstride[0];
121
0
      for (i[1] = 0; i[1] < adim[1]; i[1]++)
122
0
      {
123
0
        const float* ap1 = ap0 + i[1] * astride[1];
124
0
        float* hp1 = hp0 + i[1] * hstride[1];
125
0
        const float* const bp1 = bdim[1] == 1 ? bp0 : bp0 + i[1] * bstride[1];
126
0
        for (i[2] = 0; i[2] < adim[2]; i[2]++)
127
0
        {
128
0
          const float* const bp2 = bdim[2] == 1 ? bp1 : bp1 + i[2] * bstride[2];
129
0
          if (bdim[3] == 1)
130
0
            for (x = 0; x < adim[3]; x++)
131
0
              hp1[x] = ap1[x] / bp2[0];
132
0
          else
133
0
            for (x = 0; x < adim[3]; x++)
134
0
              hp1[x] = ap1[x] / bp2[x];
135
0
          ap1 += astride[2];
136
0
          hp1 += hstride[2];
137
0
        }
138
0
      }
139
0
    }
140
0
    return CCV_NNC_EXEC_SUCCESS;
141
0
  }
142
3
  ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
143
3
  assert(ccv_nnc_tensor_view_check_broadcast_dim(g, bdim));
144
3
  assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2);
145
3
  int gdim[CCV_NNC_MAX_DIM_ALLOC];
146
3
  ccv_nnc_tensor_view_get_dim(g, gdim);
147
3
  int gstride[CCV_NNC_MAX_DIM_ALLOC];
148
3
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
149
3
  ccv_nnc_tensor_view_get_stride(g, gstride);
150
3
  float* const gp = g->data.f32;
151
  // Non-optimal case, need to do skip if needed.
152
6
  for (i[0] = 0; i[0] < hdim[0]; 
i[0]++3
)
153
3
  {
154
3
    const float* const ap0 = ap + i[0] * astride[0];
155
3
    float* const hp0 = hp + i[0] * hstride[0];
156
3
    float* const gp0 = gdim[0] == 1 ? gp : 
gp + i[0] * gstride[0]0
;
157
3
    const float* const bp0 = bdim[0] == 1 ? bp : 
bp + i[0] * bstride[0]0
;
158
6
    for (i[1] = 0; i[1] < hdim[1]; 
i[1]++3
)
159
3
    {
160
3
      const float* ap1 = ap0 + i[1] * astride[1];
161
3
      float* hp1 = hp0 + i[1] * hstride[1];
162
3
      float* const gp1 = gdim[1] == 1 ? gp0 : 
gp0 + i[1] * gstride[1]0
;
163
3
      const float* const bp1 = bdim[1] == 1 ? bp0 : 
bp0 + i[1] * bstride[1]0
;
164
9
      for (i[2] = 0; i[2] < hdim[2]; 
i[2]++6
)
165
6
      {
166
6
        float* const gp2 = gdim[2] == 1 ? 
gp14
:
gp1 + i[2] * gstride[2]2
;
167
6
        const float* const bp2 = bdim[2] == 1 ? 
bp14
:
bp1 + i[2] * bstride[2]2
;
168
6
        if (gdim[3] == 1)
169
8
          
for (x = 0; 2
x < hdim[3];
x++6
)
170
6
            hp1[x] = gp2[0] * ap1[x] / bp2[0];
171
4
        else
172
16
          
for (x = 0; 4
x < hdim[3];
x++12
)
173
12
            hp1[x] = gp2[x] * ap1[x] / bp2[x];
174
6
        ap1 += astride[2];
175
6
        hp1 += hstride[2];
176
6
      }
177
3
    }
178
3
  }
179
3
  return CCV_NNC_EXEC_SUCCESS;
180
3
}
181
182
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
183
1
{
184
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
185
1
  registry->tensor_datatypes = CCV_32F;
186
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
187
1
  registry->algorithms = 1;
188
1
  registry->exec = _ccv_nnc_reduce_norm2_forw;
189
1
}
190
191
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_NORM2_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
192
1
{
193
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
194
1
  registry->tensor_datatypes = CCV_32F;
195
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
196
1
  registry->algorithms = 1;
197
1
  registry->exec = _ccv_nnc_reduce_norm2_back;
198
1
}