Coverage Report

Created: 2022-07-27 23:53

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/loss/ccv_nnc_mse_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_mse_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
8
{
15
8
  assert(input_size == 2);
16
8
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
17
8
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
18
8
  const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
19
8
  assert(output_size == 1);
20
8
  ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
21
8
  int dim[CCV_NNC_MAX_DIM_ALLOC];
22
8
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
23
8
  int binc[CCV_NNC_MAX_DIM_ALLOC];
24
8
  int cinc[CCV_NNC_MAX_DIM_ALLOC];
25
8
  ccv_nnc_tensor_view_get_dim(a, dim);
26
8
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
27
8
  ccv_nnc_tensor_view_get_inc(a, ainc);
28
8
  ccv_nnc_tensor_view_get_inc(b, binc);
29
8
  ccv_nnc_tensor_view_get_inc(c, cinc);
30
8
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
31
8
  const int batch_size = dim[CCV_NNC_MAX_DIM];
32
8
  assert(ccv_nnc_tensor_count(c->info) == batch_size);
33
8
  const int count = dim[CCV_NNC_MAX_DIM + 1];
34
8
  const int astep = ainc[CCV_NNC_MAX_DIM + 1];
35
8
  const int bstep = binc[CCV_NNC_MAX_DIM + 1];
36
8
  const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 1 : 
cinc[0
CCV_NNC_MAX_DIM0
+ 1];
37
8
  if (cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN)
38
4
  {
39
4
    const float inv_mean = 1.0 / (float)count;
40
186
    
parallel_for97
(i, batch_size) {
41
186
      int j;
42
186
      const float* const ap = a->data.f32 + i * astep;
43
186
      const float* const bp = b->data.f32 + i * bstep;
44
186
      float cp = 0;
45
462
      for (j = 0; j < count; 
j++369
)
46
369
        cp += (bp[j] - ap[j]) * (bp[j] - ap[j]);
47
186
      c->data.f32[i * cstep] = cp * inv_mean;
48
186
    } 
parallel_endfor97
49
4
  } else {
50
4
    assert(cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM);
51
160
    
parallel_for84
(i, batch_size) {
52
160
      int j;
53
160
      const float* const ap = a->data.f32 + i * astep;
54
160
      const float* const bp = b->data.f32 + i * bstep;
55
160
      float cp = 0;
56
592
      for (j = 0; j < count; 
j++512
)
57
512
        cp += (bp[j] - ap[j]) * (bp[j] - ap[j]);
58
160
      c->data.f32[i * cstep] = cp;
59
160
    } 
parallel_endfor84
60
4
  }
61
8
  return CCV_NNC_EXEC_SUCCESS;
62
8
}
63
64
static int _ccv_nnc_mse_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
65
4
{
66
4
  assert(input_size >= 3);
67
4
  assert(output_size >= 1);
68
4
  const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
69
4
  assert(!g || !CCV_IS_TENSOR_VIEW(g));
70
4
  const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
71
4
  const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
72
4
  ccv_nnc_tensor_view_t* const ha = (ccv_nnc_tensor_view_t*)outputs[0];
73
4
  ccv_nnc_tensor_view_t* const hb = output_size >= 2 ? (ccv_nnc_tensor_view_t*)outputs[1] : 
00
;
74
4
  int dim[CCV_NNC_MAX_DIM_ALLOC];
75
4
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
76
4
  int binc[CCV_NNC_MAX_DIM_ALLOC];
77
4
  int hainc[CCV_NNC_MAX_DIM_ALLOC];
78
4
  int hbinc[CCV_NNC_MAX_DIM_ALLOC];
79
4
  ccv_nnc_tensor_view_get_dim(a, dim);
80
4
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
81
4
  if (ha)
82
4
    { assert(ccv_nnc_tensor_view_check_dim(ha, dim)); }
83
4
  if (hb)
84
4
    { assert(ccv_nnc_tensor_view_check_dim(hb, dim)); }
85
4
  ccv_nnc_tensor_view_get_inc(a, ainc);
86
4
  ccv_nnc_tensor_view_get_inc(b, binc);
87
4
  if (ha)
88
4
    ccv_nnc_tensor_view_get_inc(ha, hainc);
89
4
  if (hb)
90
4
    ccv_nnc_tensor_view_get_inc(hb, hbinc);
91
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
92
4
  const int batch_size = dim[CCV_NNC_MAX_DIM];
93
4
  const int count = dim[CCV_NNC_MAX_DIM + 1];
94
4
  const float inv_mean_2 = cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN ? 
2.0 / (float)count2
:
2.02
;
95
4
  assert(cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN || cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM);
96
4
  const int astep = ainc[CCV_NNC_MAX_DIM + 1];
97
4
  const int bstep = binc[CCV_NNC_MAX_DIM + 1];
98
4
  const int hastep = hainc[CCV_NNC_MAX_DIM + 1];
99
4
  const int hbstep = hbinc[CCV_NNC_MAX_DIM + 1];
100
4
  if (g)
101
4
  {
102
4
    int ginc[CCV_NNC_MAX_DIM_ALLOC];
103
4
    ccv_nnc_tensor_view_get_inc(g, ginc);
104
4
    assert(ccv_nnc_tensor_count(g->info) == batch_size);
105
4
    const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 1 : 
ginc[0
CCV_NNC_MAX_DIM0
+ 1];
106
4
    if (ha)
107
4
    {
108
152
      
parallel_for80
(i, batch_size) {
109
152
        int j;
110
152
        const float* const ap = a->data.f32 + i * astep;
111
152
        const float* const bp = b->data.f32 + i * bstep;
112
152
        float* const hp = ha->data.f32 + i * hastep;
113
152
        const float gp = inv_mean_2 * g->data.f32[i * gstep];
114
541
        for (j = 0; j < count; 
j++465
)
115
465
          hp[j] = gp * (ap[j] - bp[j]);
116
152
      } 
parallel_endfor80
117
4
    }
118
4
    if (hb)
119
4
    {
120
120
      
parallel_for64
(i, batch_size) {
121
120
        int j;
122
120
        const float* const ap = a->data.f32 + i * astep;
123
120
        const float* const bp = b->data.f32 + i * bstep;
124
120
        float* const hp = hb->data.f32 + i * hbstep;
125
120
        const float gp = inv_mean_2 * g->data.f32[i * gstep];
126
533
        for (j = 0; j < count; 
j++473
)
127
473
          hp[j] = gp * (bp[j] - ap[j]);
128
120
      } 
parallel_endfor64
129
4
    }
130
4
  } else {
131
0
    if (ha)
132
0
    {
133
0
      parallel_for(i, batch_size) {
134
0
        int j;
135
0
        const float* const ap = a->data.f32 + i * astep;
136
0
        const float* const bp = b->data.f32 + i * bstep;
137
0
        float* const hp = ha->data.f32 + i * hastep;
138
0
        for (j = 0; j < count; j++)
139
0
          hp[j] = inv_mean_2 * (ap[j] - bp[j]);
140
0
      } parallel_endfor
141
0
    }
142
0
    if (hb)
143
0
    {
144
0
      parallel_for(i, batch_size) {
145
0
        int j;
146
0
        const float* const ap = a->data.f32 + i * astep;
147
0
        const float* const bp = b->data.f32 + i * bstep;
148
0
        float* const hp = hb->data.f32 + i * hbstep;
149
0
        for (j = 0; j < count; j++)
150
0
          hp[j] = inv_mean_2 * (bp[j] - ap[j]);
151
0
      } parallel_endfor
152
0
    }
153
0
  }
154
4
  return CCV_NNC_EXEC_SUCCESS;
155
4
}
156
157
REGISTER_COMMAND_BACKEND(CCV_NNC_MSE_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
158
1
{
159
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
160
1
  registry->tensor_datatypes = CCV_32F;
161
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
162
1
  registry->algorithms = 1;
163
1
  registry->exec = _ccv_nnc_mse_forw;
164
1
}
165
166
REGISTER_COMMAND_BACKEND(CCV_NNC_MSE_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
167
1
{
168
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
169
1
  registry->tensor_datatypes = CCV_32F;
170
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
171
1
  registry->algorithms = 1;
172
1
  registry->exec = _ccv_nnc_mse_back;
173
1
}