Coverage Report

Created: 2021-09-30 20:21

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/loss/ccv_nnc_smooth_l1_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_smooth_l1_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
4
{
15
4
  assert(input_size == 2);
16
4
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
17
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
18
4
  const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
19
4
  assert(output_size == 1);
20
4
  ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
21
4
  int dim[CCV_NNC_MAX_DIM_ALLOC];
22
4
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
23
4
  int binc[CCV_NNC_MAX_DIM_ALLOC];
24
4
  int cinc[CCV_NNC_MAX_DIM_ALLOC];
25
4
  ccv_nnc_tensor_view_get_dim(a, dim);
26
4
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
27
4
  ccv_nnc_tensor_view_get_inc(a, ainc);
28
4
  ccv_nnc_tensor_view_get_inc(b, binc);
29
4
  ccv_nnc_tensor_view_get_inc(c, cinc);
30
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
31
4
  const int batch_size = dim[CCV_NNC_MAX_DIM];
32
4
  assert(ccv_nnc_tensor_count(c->info) == batch_size);
33
4
  const int count = dim[CCV_NNC_MAX_DIM + 1];
34
4
  const int astep = ainc[CCV_NNC_MAX_DIM + 1];
35
4
  const int bstep = binc[CCV_NNC_MAX_DIM + 1];
36
4
  const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 
13
:
cinc[1
CCV_NNC_MAX_DIM1
+ 1];
37
4
  const float beta = cmd.info.smooth_l1.beta;
38
4
  const float beta_inv_2 = 0.5 / beta;
39
4
  const float beta_2 = 0.5 * beta;
40
4
  parallel_for(i, batch_size) {
41
0
    int j;
42
0
    const float* const ap = a->data.f32 + i * astep;
43
0
    const float* const bp = b->data.f32 + i * bstep;
44
0
    float cp = 0;
45
866
    for (j = 0; j < count; j++)
46
866
      cp += fabs(bp[j] - ap[j]);
47
0
    if (cp < beta)
48
25
    {
49
25
      cp = 0;
50
1.16k
      for (j = 0; j < count; 
j++1.14k
)
51
1.14k
        cp += (bp[j] - ap[j]) * (bp[j] - ap[j]);
52
25
      cp *= beta_inv_2;
53
25
    } else
54
18.4E
      cp -= beta_2;
55
0
    c->data.f32[i * cstep] = cp;
56
4
  } parallel_endfor
57
4
  return CCV_NNC_EXEC_SUCCESS;
58
4
}
59
60
static int _ccv_nnc_smooth_l1_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
61
3
{
62
3
  assert(input_size >= 3);
63
3
  assert(output_size >= 1);
64
3
  const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
65
3
  assert(!g || !CCV_IS_TENSOR_VIEW(g));
66
3
  const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
67
3
  const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
68
3
  const ccv_nnc_tensor_view_t* const c = (ccv_nnc_tensor_view_t*)inputs[3];
69
3
  ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
70
3
  int dim[CCV_NNC_MAX_DIM_ALLOC];
71
3
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
72
3
  int binc[CCV_NNC_MAX_DIM_ALLOC];
73
3
  int cinc[CCV_NNC_MAX_DIM_ALLOC];
74
3
  int hinc[CCV_NNC_MAX_DIM_ALLOC];
75
3
  ccv_nnc_tensor_view_get_dim(a, dim);
76
3
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
77
3
  assert(ccv_nnc_tensor_view_check_dim(h, dim));
78
3
  ccv_nnc_tensor_view_get_inc(a, ainc);
79
3
  ccv_nnc_tensor_view_get_inc(b, binc);
80
3
  ccv_nnc_tensor_view_get_inc(c, cinc);
81
3
  ccv_nnc_tensor_view_get_inc(h, hinc);
82
3
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
83
3
  const int batch_size = dim[CCV_NNC_MAX_DIM];
84
3
  assert(ccv_nnc_tensor_count(c->info) == batch_size);
85
3
  const int count = dim[CCV_NNC_MAX_DIM + 1];
86
3
  const int astep = ainc[CCV_NNC_MAX_DIM + 1];
87
3
  const int bstep = binc[CCV_NNC_MAX_DIM + 1];
88
3
  const int hstep = hinc[CCV_NNC_MAX_DIM + 1];
89
3
  const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 
12
:
cinc[1
CCV_NNC_MAX_DIM1
+ 1];
90
3
  const float beta = cmd.info.smooth_l1.beta;
91
3
  const float beta_2 = 0.5 * beta;
92
3
  const float inv_beta = 1.0 / beta;
93
3
  if (g)
94
2
  {
95
2
    int ginc[CCV_NNC_MAX_DIM_ALLOC];
96
2
    ccv_nnc_tensor_view_get_inc(g, ginc);
97
2
    assert(ccv_nnc_tensor_count(g->info) == batch_size);
98
2
    const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 
11
:
ginc[1
CCV_NNC_MAX_DIM1
+ 1];
99
2
    parallel_for(i, batch_size) {
100
0
      int j;
101
0
      const float cp = c->data.f32[i * cstep];
102
0
      const float* const ap = a->data.f32 + i * astep;
103
0
      const float* const bp = b->data.f32 + i * bstep;
104
0
      float* const hp = h->data.f32 + i * hstep;
105
0
      if (cp < beta_2)
106
8
      {
107
8
        const float gp = inv_beta * g->data.f32[i * gstep];
108
238
        for (j = 0; j < count; 
j++230
)
109
230
          hp[j] = gp * (ap[j] - bp[j]);
110
18.4E
      } else {
111
18.4E
        const float gp = g->data.f32[i * gstep];
112
18.4E
        for (j = 0; j < count; 
j++3
)
113
3
          hp[j] = ((ap[j] - bp[j]) > 0 ? 
11
:
-12
) * gp;
114
18.4E
      }
115
2
    } parallel_endfor
116
2
  } else {
117
1
    parallel_for(i, batch_size) {
118
0
      int j;
119
0
      const float cp = c->data.f32[i * cstep];
120
0
      const float* const ap = a->data.f32 + i * astep;
121
0
      const float* const bp = b->data.f32 + i * bstep;
122
0
      float* const hp = h->data.f32 + i * hstep;
123
0
      if (cp < beta_2)
124
182
        
for (j = 0; 7
j < count;
j++175
)
125
175
          hp[j] = inv_beta * (ap[j] - bp[j]);
126
18.4E
      else
127
18.4E
        for (j = 0; j < count; 
j++0
)
128
0
          hp[j] = (ap[j] - bp[j]) > 0 ? 1 : -1;
129
1
    } parallel_endfor
130
1
  }
131
3
  return CCV_NNC_EXEC_SUCCESS;
132
3
}
133
134
REGISTER_COMMAND_BACKEND(CCV_NNC_SMOOTH_L1_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
135
1
{
136
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
137
1
  registry->tensor_datatypes = CCV_32F;
138
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
139
1
  registry->algorithms = 1;
140
1
  registry->exec = _ccv_nnc_smooth_l1_forw;
141
1
}
142
143
REGISTER_COMMAND_BACKEND(CCV_NNC_SMOOTH_L1_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
144
1
{
145
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
146
1
  registry->tensor_datatypes = CCV_32F;
147
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
148
1
  registry->algorithms = 1;
149
1
  registry->exec = _ccv_nnc_smooth_l1_back;
150
1
}