Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/loss/ccv_nnc_mse_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_mse_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
131
{
15
131
  assert(input_size == 2);
16
131
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
17
131
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
18
131
  const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
19
131
  assert(output_size == 1);
20
131
  ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
21
131
  int dim[CCV_NNC_MAX_DIM_ALLOC];
22
131
  int astride[CCV_NNC_MAX_DIM_ALLOC];
23
131
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
24
131
  int cstride[CCV_NNC_MAX_DIM_ALLOC];
25
131
  ccv_nnc_tensor_view_get_dim(a, dim);
26
131
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
27
131
  ccv_nnc_tensor_view_get_stride(a, astride);
28
131
  ccv_nnc_tensor_view_get_stride(b, bstride);
29
131
  ccv_nnc_tensor_view_get_stride(c, cstride);
30
131
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
31
131
  const int batch_size = dim[CCV_NNC_MAX_DIM];
32
131
  assert(ccv_nnc_tensor_count(c->info) == batch_size);
33
131
  const int count = dim[CCV_NNC_MAX_DIM + 1];
34
131
  const int astep = astride[CCV_NNC_MAX_DIM];
35
131
  const int bstep = bstride[CCV_NNC_MAX_DIM];
36
131
  const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 1 : 
cstride[0
CCV_NNC_MAX_DIM0
];
37
131
  if (cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN)
38
127
  {
39
127
    const float inv_mean = 1.0 / (float)count;
40
163
    
parallel_for127
(i, batch_size) {
41
163
      int j;
42
163
      const float* const ap = a->data.f32 + i * astep;
43
163
      const float* const bp = b->data.f32 + i * bstep;
44
163
      float cp = 0;
45
3.08k
      for (j = 0; j < count; 
j++2.92k
)
46
2.92k
        cp += (bp[j] - ap[j]) * (bp[j] - ap[j]);
47
163
      c->data.f32[i * cstep] = cp * inv_mean;
48
163
    } parallel_endfor
49
127
  } else {
50
4
    assert(cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM);
51
40
    
parallel_for4
(i, batch_size) {
52
40
      int j;
53
40
      const float* const ap = a->data.f32 + i * astep;
54
40
      const float* const bp = b->data.f32 + i * bstep;
55
40
      float cp = 0;
56
2.24k
      for (j = 0; j < count; 
j++2.20k
)
57
2.20k
        cp += (bp[j] - ap[j]) * (bp[j] - ap[j]);
58
40
      c->data.f32[i * cstep] = cp;
59
40
    } parallel_endfor
60
4
  }
61
131
  return CCV_NNC_EXEC_SUCCESS;
62
131
}
63
64
static int _ccv_nnc_mse_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
65
127
{
66
127
  assert(input_size >= 3);
67
127
  assert(output_size >= 1);
68
127
  const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
69
127
  assert(!g || !CCV_IS_TENSOR_VIEW(g));
70
127
  const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
71
127
  const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
72
127
  ccv_nnc_tensor_view_t* const ha = (ccv_nnc_tensor_view_t*)outputs[0];
73
127
  ccv_nnc_tensor_view_t* const hb = output_size >= 2 ? (ccv_nnc_tensor_view_t*)outputs[1] : 
00
;
74
127
  int dim[CCV_NNC_MAX_DIM_ALLOC];
75
127
  int astride[CCV_NNC_MAX_DIM_ALLOC];
76
127
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
77
127
  int hastride[CCV_NNC_MAX_DIM_ALLOC];
78
127
  int hbstride[CCV_NNC_MAX_DIM_ALLOC];
79
127
  ccv_nnc_tensor_view_get_dim(a, dim);
80
127
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
81
127
  if (ha)
82
127
    { assert(ccv_nnc_tensor_view_check_dim(ha, dim)); }
83
127
  if (hb)
84
4
    { assert(ccv_nnc_tensor_view_check_dim(hb, dim)); }
85
127
  ccv_nnc_tensor_view_get_stride(a, astride);
86
127
  ccv_nnc_tensor_view_get_stride(b, bstride);
87
127
  if (ha)
88
127
    ccv_nnc_tensor_view_get_stride(ha, hastride);
89
127
  if (hb)
90
4
    ccv_nnc_tensor_view_get_stride(hb, hbstride);
91
127
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
92
127
  const int batch_size = dim[CCV_NNC_MAX_DIM];
93
127
  const int count = dim[CCV_NNC_MAX_DIM + 1];
94
127
  const float inv_mean_2 = cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN ? 
2.0 / (float)count125
:
2.02
;
95
127
  assert(cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN || cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM);
96
127
  const int astep = astride[CCV_NNC_MAX_DIM];
97
127
  const int bstep = bstride[CCV_NNC_MAX_DIM];
98
127
  const int hastep = hastride[CCV_NNC_MAX_DIM];
99
127
  const int hbstep = hbstride[CCV_NNC_MAX_DIM];
100
127
  if (g)
101
127
  {
102
127
    int gstride[CCV_NNC_MAX_DIM_ALLOC];
103
127
    ccv_nnc_tensor_view_get_stride(g, gstride);
104
127
    assert(ccv_nnc_tensor_count(g->info) == batch_size);
105
127
    const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 1 : 
gstride[0
CCV_NNC_MAX_DIM0
];
106
127
    if (ha)
107
127
    {
108
163
      
parallel_for127
(i, batch_size) {
109
163
        int j;
110
163
        const float* const ap = a->data.f32 + i * astep;
111
163
        const float* const bp = b->data.f32 + i * bstep;
112
163
        float* const hp = ha->data.f32 + i * hastep;
113
163
        const float gp = inv_mean_2 * g->data.f32[i * gstep];
114
3.08k
        for (j = 0; j < count; 
j++2.92k
)
115
2.92k
          hp[j] = gp * (ap[j] - bp[j]);
116
163
      } parallel_endfor
117
127
    }
118
127
    if (hb)
119
4
    {
120
40
      
parallel_for4
(i, batch_size) {
121
40
        int j;
122
40
        const float* const ap = a->data.f32 + i * astep;
123
40
        const float* const bp = b->data.f32 + i * bstep;
124
40
        float* const hp = hb->data.f32 + i * hbstep;
125
40
        const float gp = inv_mean_2 * g->data.f32[i * gstep];
126
2.24k
        for (j = 0; j < count; 
j++2.20k
)
127
2.20k
          hp[j] = gp * (bp[j] - ap[j]);
128
40
      } parallel_endfor
129
4
    }
130
127
  } else {
131
0
    if (ha)
132
0
    {
133
0
      parallel_for(i, batch_size) {
134
0
        int j;
135
0
        const float* const ap = a->data.f32 + i * astep;
136
0
        const float* const bp = b->data.f32 + i * bstep;
137
0
        float* const hp = ha->data.f32 + i * hastep;
138
0
        for (j = 0; j < count; j++)
139
0
          hp[j] = inv_mean_2 * (ap[j] - bp[j]);
140
0
      } parallel_endfor
141
0
    }
142
0
    if (hb)
143
0
    {
144
0
      parallel_for(i, batch_size) {
145
0
        int j;
146
0
        const float* const ap = a->data.f32 + i * astep;
147
0
        const float* const bp = b->data.f32 + i * bstep;
148
0
        float* const hp = hb->data.f32 + i * hbstep;
149
0
        for (j = 0; j < count; j++)
150
0
          hp[j] = inv_mean_2 * (bp[j] - ap[j]);
151
0
      } parallel_endfor
152
0
    }
153
0
  }
154
127
  return CCV_NNC_EXEC_SUCCESS;
155
127
}
156
157
REGISTER_COMMAND_BACKEND(CCV_NNC_MSE_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
158
1
{
159
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
160
1
  registry->tensor_datatypes = CCV_32F;
161
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
162
1
  registry->algorithms = 1;
163
1
  registry->exec = _ccv_nnc_mse_forw;
164
1
}
165
166
REGISTER_COMMAND_BACKEND(CCV_NNC_MSE_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
167
1
{
168
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
169
1
  registry->tensor_datatypes = CCV_32F;
170
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
171
1
  registry->algorithms = 1;
172
1
  registry->exec = _ccv_nnc_mse_back;
173
1
}