Coverage Report

Created: 2022-07-27 23:53

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/loss/ccv_nnc_smooth_l1_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_smooth_l1_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
4
{
15
4
  assert(input_size == 2);
16
4
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
17
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
18
4
  const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
19
4
  assert(output_size == 1);
20
4
  ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
21
4
  int dim[CCV_NNC_MAX_DIM_ALLOC];
22
4
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
23
4
  int binc[CCV_NNC_MAX_DIM_ALLOC];
24
4
  int cinc[CCV_NNC_MAX_DIM_ALLOC];
25
4
  ccv_nnc_tensor_view_get_dim(a, dim);
26
4
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
27
4
  ccv_nnc_tensor_view_get_inc(a, ainc);
28
4
  ccv_nnc_tensor_view_get_inc(b, binc);
29
4
  ccv_nnc_tensor_view_get_inc(c, cinc);
30
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
31
4
  const int batch_size = dim[CCV_NNC_MAX_DIM];
32
4
  assert(ccv_nnc_tensor_count(c->info) == batch_size);
33
4
  const int count = dim[CCV_NNC_MAX_DIM + 1];
34
4
  const int astep = ainc[CCV_NNC_MAX_DIM + 1];
35
4
  const int bstep = binc[CCV_NNC_MAX_DIM + 1];
36
4
  const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 
13
:
cinc[1
CCV_NNC_MAX_DIM1
+ 1];
37
4
  const float beta = cmd.info.smooth_l1.beta;
38
4
  const float beta_inv_2 = 0.5 / beta;
39
4
  const float beta_2 = 0.5 * beta;
40
236
  
parallel_for122
(i, batch_size) {
41
236
    int j;
42
236
    const float* const ap = a->data.f32 + i * astep;
43
236
    const float* const bp = b->data.f32 + i * bstep;
44
236
    float cp = 0;
45
1.63k
    for (j = 0; j < count; 
j++1.51k
)
46
1.51k
      cp += fabs(bp[j] - ap[j]);
47
236
    if (
cp < beta118
)
48
30
    {
49
30
      cp = 0;
50
1.82k
      for (j = 0; j < count; 
j++1.79k
)
51
1.79k
        cp += (bp[j] - ap[j]) * (bp[j] - ap[j]);
52
30
      cp *= beta_inv_2;
53
30
    } else
54
88
      cp -= beta_2;
55
236
    c->data.f32[i * cstep] = cp;
56
236
  } 
parallel_endfor122
57
4
  return CCV_NNC_EXEC_SUCCESS;
58
4
}
59
60
static int _ccv_nnc_smooth_l1_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
61
3
{
62
3
  assert(input_size >= 3);
63
3
  assert(output_size >= 1);
64
3
  const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
65
3
  assert(!g || !CCV_IS_TENSOR_VIEW(g));
66
3
  const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
67
3
  const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
68
3
  const ccv_nnc_tensor_view_t* const c = (ccv_nnc_tensor_view_t*)inputs[3];
69
3
  ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
70
3
  int dim[CCV_NNC_MAX_DIM_ALLOC];
71
3
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
72
3
  int binc[CCV_NNC_MAX_DIM_ALLOC];
73
3
  int cinc[CCV_NNC_MAX_DIM_ALLOC];
74
3
  int hinc[CCV_NNC_MAX_DIM_ALLOC];
75
3
  ccv_nnc_tensor_view_get_dim(a, dim);
76
3
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
77
3
  assert(ccv_nnc_tensor_view_check_dim(h, dim));
78
3
  ccv_nnc_tensor_view_get_inc(a, ainc);
79
3
  ccv_nnc_tensor_view_get_inc(b, binc);
80
3
  ccv_nnc_tensor_view_get_inc(c, cinc);
81
3
  ccv_nnc_tensor_view_get_inc(h, hinc);
82
3
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
83
3
  const int batch_size = dim[CCV_NNC_MAX_DIM];
84
3
  assert(ccv_nnc_tensor_count(c->info) == batch_size);
85
3
  const int count = dim[CCV_NNC_MAX_DIM + 1];
86
3
  const int astep = ainc[CCV_NNC_MAX_DIM + 1];
87
3
  const int bstep = binc[CCV_NNC_MAX_DIM + 1];
88
3
  const int hstep = hinc[CCV_NNC_MAX_DIM + 1];
89
3
  const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 
12
:
cinc[1
CCV_NNC_MAX_DIM1
+ 1];
90
3
  const float beta = cmd.info.smooth_l1.beta;
91
3
  const float beta_2 = 0.5 * beta;
92
3
  const float inv_beta = 1.0 / beta;
93
3
  if (g)
94
2
  {
95
2
    int ginc[CCV_NNC_MAX_DIM_ALLOC];
96
2
    ccv_nnc_tensor_view_get_inc(g, ginc);
97
2
    assert(ccv_nnc_tensor_count(g->info) == batch_size);
98
2
    const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 
11
:
ginc[1
CCV_NNC_MAX_DIM1
+ 1];
99
74
    
parallel_for39
(i, batch_size) {
100
74
      int j;
101
74
      const float cp = c->data.f32[i * cstep];
102
74
      const float* const ap = a->data.f32 + i * astep;
103
74
      const float* const bp = b->data.f32 + i * bstep;
104
74
      float* const hp = h->data.f32 + i * hstep;
105
74
      if (
cp < beta_237
)
106
5
      {
107
5
        const float gp = inv_beta * g->data.f32[i * gstep];
108
266
        for (j = 0; j < count; 
j++261
)
109
261
          hp[j] = gp * (ap[j] - bp[j]);
110
32
      } else {
111
32
        const float gp = g->data.f32[i * gstep];
112
35
        for (j = 0; j < count; 
j++3
)
113
3
          hp[j] = ((ap[j] - bp[j]) > 0 ? 
11
:
-12
) * gp;
114
32
      }
115
74
    } 
parallel_endfor39
116
2
  } else {
117
46
    
parallel_for24
(i, batch_size) {
118
46
      int j;
119
46
      const float cp = c->data.f32[i * cstep];
120
46
      const float* const ap = a->data.f32 + i * astep;
121
46
      const float* const bp = b->data.f32 + i * bstep;
122
46
      float* const hp = h->data.f32 + i * hstep;
123
46
      if (
cp < beta_223
)
124
211
        
for (j = 0; 3
j < count;
j++208
)
125
208
          hp[j] = inv_beta * (ap[j] - bp[j]);
126
20
      else
127
20
        for (j = 0; j < count; 
j++0
)
128
0
          hp[j] = (ap[j] - bp[j]) > 0 ? 1 : -1;
129
46
    } 
parallel_endfor24
130
1
  }
131
3
  return CCV_NNC_EXEC_SUCCESS;
132
3
}
133
134
REGISTER_COMMAND_BACKEND(CCV_NNC_SMOOTH_L1_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
135
1
{
136
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
137
1
  registry->tensor_datatypes = CCV_32F;
138
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
139
1
  registry->algorithms = 1;
140
1
  registry->exec = _ccv_nnc_smooth_l1_forw;
141
1
}
142
143
REGISTER_COMMAND_BACKEND(CCV_NNC_SMOOTH_L1_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
144
1
{
145
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
146
1
  registry->tensor_datatypes = CCV_32F;
147
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
148
1
  registry->algorithms = 1;
149
1
  registry->exec = _ccv_nnc_smooth_l1_back;
150
1
}