Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/loss/ccv_nnc_smooth_l1_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_smooth_l1_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
4
{
15
4
  assert(input_size == 2);
16
4
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
17
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
18
4
  const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
19
4
  assert(output_size == 1);
20
4
  ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
21
4
  int dim[CCV_NNC_MAX_DIM_ALLOC];
22
4
  int astride[CCV_NNC_MAX_DIM_ALLOC];
23
4
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
24
4
  int cstride[CCV_NNC_MAX_DIM_ALLOC];
25
4
  ccv_nnc_tensor_view_get_dim(a, dim);
26
4
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
27
4
  ccv_nnc_tensor_view_get_stride(a, astride);
28
4
  ccv_nnc_tensor_view_get_stride(b, bstride);
29
4
  ccv_nnc_tensor_view_get_stride(c, cstride);
30
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
31
4
  const int batch_size = dim[CCV_NNC_MAX_DIM];
32
4
  assert(ccv_nnc_tensor_count(c->info) == batch_size);
33
4
  const int count = dim[CCV_NNC_MAX_DIM + 1];
34
4
  const int astep = astride[CCV_NNC_MAX_DIM];
35
4
  const int bstep = bstride[CCV_NNC_MAX_DIM];
36
4
  const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 
13
:
cstride[1
CCV_NNC_MAX_DIM1
];
37
4
  const float beta = cmd.info.smooth_l1.beta;
38
4
  const float beta_inv_2 = 0.5 / beta;
39
4
  const float beta_2 = 0.5 * beta;
40
32
  
parallel_for4
(i, batch_size) {
41
32
    int j;
42
32
    const float* const ap = a->data.f32 + i * astep;
43
32
    const float* const bp = b->data.f32 + i * bstep;
44
32
    float cp = 0;
45
3.03k
    for (j = 0; j < count; 
j++3.00k
)
46
3.00k
      cp += fabs(bp[j] - ap[j]);
47
32
    if (cp < beta)
48
31
    {
49
31
      cp = 0;
50
3.03k
      for (j = 0; j < count; 
j++3.00k
)
51
3.00k
        cp += (bp[j] - ap[j]) * (bp[j] - ap[j]);
52
31
      cp *= beta_inv_2;
53
31
    } else
54
1
      cp -= beta_2;
55
32
    c->data.f32[i * cstep] = cp;
56
32
  } parallel_endfor
57
4
  return CCV_NNC_EXEC_SUCCESS;
58
4
}
59
60
static int _ccv_nnc_smooth_l1_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
61
3
{
62
3
  assert(input_size >= 3);
63
3
  assert(output_size >= 1);
64
3
  const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
65
3
  assert(!g || !CCV_IS_TENSOR_VIEW(g));
66
3
  const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
67
3
  const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
68
3
  const ccv_nnc_tensor_view_t* const c = (ccv_nnc_tensor_view_t*)inputs[3];
69
3
  ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
70
3
  int dim[CCV_NNC_MAX_DIM_ALLOC];
71
3
  int astride[CCV_NNC_MAX_DIM_ALLOC];
72
3
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
73
3
  int cstride[CCV_NNC_MAX_DIM_ALLOC];
74
3
  int hstride[CCV_NNC_MAX_DIM_ALLOC];
75
3
  ccv_nnc_tensor_view_get_dim(a, dim);
76
3
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
77
3
  assert(ccv_nnc_tensor_view_check_dim(h, dim));
78
3
  ccv_nnc_tensor_view_get_stride(a, astride);
79
3
  ccv_nnc_tensor_view_get_stride(b, bstride);
80
3
  ccv_nnc_tensor_view_get_stride(c, cstride);
81
3
  ccv_nnc_tensor_view_get_stride(h, hstride);
82
3
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
83
3
  const int batch_size = dim[CCV_NNC_MAX_DIM];
84
3
  assert(ccv_nnc_tensor_count(c->info) == batch_size);
85
3
  const int count = dim[CCV_NNC_MAX_DIM + 1];
86
3
  const int astep = astride[CCV_NNC_MAX_DIM];
87
3
  const int bstep = bstride[CCV_NNC_MAX_DIM];
88
3
  const int hstep = hstride[CCV_NNC_MAX_DIM];
89
3
  const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 
12
:
cstride[1
CCV_NNC_MAX_DIM1
];
90
3
  const float beta = cmd.info.smooth_l1.beta;
91
3
  const float beta_2 = 0.5 * beta;
92
3
  const float inv_beta = 1.0 / beta;
93
3
  if (g)
94
2
  {
95
2
    int gstride[CCV_NNC_MAX_DIM_ALLOC];
96
2
    ccv_nnc_tensor_view_get_stride(g, gstride);
97
2
    assert(ccv_nnc_tensor_count(g->info) == batch_size);
98
2
    const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 
11
:
gstride[1
CCV_NNC_MAX_DIM1
];
99
12
    
parallel_for2
(i, batch_size) {
100
12
      int j;
101
12
      const float cp = c->data.f32[i * cstep];
102
12
      const float* const ap = a->data.f32 + i * astep;
103
12
      const float* const bp = b->data.f32 + i * bstep;
104
12
      float* const hp = h->data.f32 + i * hstep;
105
12
      if (cp < beta_2)
106
11
      {
107
11
        const float gp = inv_beta * g->data.f32[i * gstep];
108
1.01k
        for (j = 0; j < count; 
j++1.00k
)
109
1.00k
          hp[j] = gp * (ap[j] - bp[j]);
110
11
      } else {
111
1
        const float gp = g->data.f32[i * gstep];
112
4
        for (j = 0; j < count; 
j++3
)
113
3
          hp[j] = ((ap[j] - bp[j]) > 0 ? 
11
:
-12
) * gp;
114
1
      }
115
12
    } parallel_endfor
116
2
  } else {
117
10
    
parallel_for1
(i, batch_size) {
118
10
      int j;
119
10
      const float cp = c->data.f32[i * cstep];
120
10
      const float* const ap = a->data.f32 + i * astep;
121
10
      const float* const bp = b->data.f32 + i * bstep;
122
10
      float* const hp = h->data.f32 + i * hstep;
123
10
      if (cp < beta_2)
124
1.01k
        
for (j = 0; 10
j < count;
j++1.00k
)
125
1.00k
          hp[j] = inv_beta * (ap[j] - bp[j]);
126
0
      else
127
0
        for (j = 0; j < count; j++)
128
0
          hp[j] = (ap[j] - bp[j]) > 0 ? 1 : -1;
129
10
    } parallel_endfor
130
1
  }
131
3
  return CCV_NNC_EXEC_SUCCESS;
132
3
}
133
134
REGISTER_COMMAND_BACKEND(CCV_NNC_SMOOTH_L1_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
135
1
{
136
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
137
1
  registry->tensor_datatypes = CCV_32F;
138
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
139
1
  registry->algorithms = 1;
140
1
  registry->exec = _ccv_nnc_smooth_l1_forw;
141
1
}
142
143
REGISTER_COMMAND_BACKEND(CCV_NNC_SMOOTH_L1_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
144
1
{
145
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
146
1
  registry->tensor_datatypes = CCV_32F;
147
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
148
1
  registry->algorithms = 1;
149
1
  registry->exec = _ccv_nnc_smooth_l1_back;
150
1
}