Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/loss/ccv_nnc_binary_crossentropy_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_binary_crossentropy_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
14
{
15
14
  assert(input_size == 2);
16
14
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
17
14
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
18
14
  const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
19
14
  assert(output_size == 1);
20
14
  ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
21
14
  int dim[CCV_NNC_MAX_DIM_ALLOC];
22
14
  int astride[CCV_NNC_MAX_DIM_ALLOC];
23
14
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
24
14
  int cstride[CCV_NNC_MAX_DIM_ALLOC];
25
14
  ccv_nnc_tensor_view_get_dim(a, dim);
26
14
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
27
14
  ccv_nnc_tensor_view_get_stride(a, astride);
28
14
  ccv_nnc_tensor_view_get_stride(b, bstride);
29
14
  ccv_nnc_tensor_view_get_stride(c, cstride);
30
14
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
31
14
  const int batch_size = dim[CCV_NNC_MAX_DIM];
32
14
  assert(ccv_nnc_tensor_count(c->info) == batch_size);
33
14
  const int count = dim[CCV_NNC_MAX_DIM + 1];
34
14
  const int astep = astride[CCV_NNC_MAX_DIM];
35
14
  const int bstep = bstride[CCV_NNC_MAX_DIM];
36
14
  const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 
112
:
cstride[2
CCV_NNC_MAX_DIM2
];
37
14
  const float pos_weight = cmd.info.binary_crossentropy.pos_weight;
38
14
  if (pos_weight == 1)
39
7
  {
40
62
    
parallel_for7
(i, batch_size) {
41
62
      int j;
42
62
      const float* const ap = a->data.f32 + i * astep;
43
62
      const float* const bp = b->data.f32 + i * bstep;
44
62
      float cp = 0;
45
6.06k
      for (j = 0; j < count; 
j++6.00k
)
46
6.00k
        cp += (bp[j] - 1) * log(1 - ap[j]) - bp[j] * log(ap[j]);
47
62
      c->data.f32[i * cstep] = cp;
48
62
    } parallel_endfor
49
7
  } else {
50
62
    
parallel_for7
(i, batch_size) {
51
62
      int j;
52
62
      const float* const ap = a->data.f32 + i * astep;
53
62
      const float* const bp = b->data.f32 + i * bstep;
54
62
      float cp1 = 0, cp2 = 0;
55
6.06k
      for (j = 0; j < count; 
j++6.00k
)
56
6.00k
        cp1 += (bp[j] - 1) * log(1 - ap[j]), cp2 += bp[j] * log(ap[j]);
57
62
      c->data.f32[i * cstep] = cp1 - cp2 * pos_weight;
58
62
    } parallel_endfor
59
7
  }
60
14
  return CCV_NNC_EXEC_SUCCESS;
61
14
}
62
63
static int _ccv_nnc_binary_crossentropy_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
64
10
{
65
10
  assert(input_size >= 3);
66
10
  assert(output_size >= 1);
67
10
  const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
68
10
  assert(!g || !CCV_IS_TENSOR_VIEW(g));
69
10
  const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
70
10
  const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
71
10
  ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
72
10
  int dim[CCV_NNC_MAX_DIM_ALLOC];
73
10
  int astride[CCV_NNC_MAX_DIM_ALLOC];
74
10
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
75
10
  int hstride[CCV_NNC_MAX_DIM_ALLOC];
76
10
  ccv_nnc_tensor_view_get_dim(a, dim);
77
10
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
78
10
  assert(ccv_nnc_tensor_view_check_dim(h, dim));
79
10
  ccv_nnc_tensor_view_get_stride(a, astride);
80
10
  ccv_nnc_tensor_view_get_stride(b, bstride);
81
10
  ccv_nnc_tensor_view_get_stride(h, hstride);
82
10
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
83
10
  const int batch_size = dim[CCV_NNC_MAX_DIM];
84
10
  const int count = dim[CCV_NNC_MAX_DIM + 1];
85
10
  const int astep = astride[CCV_NNC_MAX_DIM];
86
10
  const int bstep = bstride[CCV_NNC_MAX_DIM];
87
10
  const int hstep = hstride[CCV_NNC_MAX_DIM];
88
10
  const float pos_weight = cmd.info.binary_crossentropy.pos_weight;
89
10
  if (pos_weight == 1)
90
5
  {
91
5
    if (g)
92
3
    {
93
3
      int gstride[CCV_NNC_MAX_DIM_ALLOC];
94
3
      ccv_nnc_tensor_view_get_stride(g, gstride);
95
3
      assert(ccv_nnc_tensor_count(g->info) == batch_size);
96
3
      const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 
12
:
gstride[1
CCV_NNC_MAX_DIM1
];
97
22
      
parallel_for3
(i, batch_size) {
98
22
        int j;
99
22
        const float gp = g->data.f32[i * gstep];
100
22
        const float* const ap = a->data.f32 + i * astep;
101
22
        const float* const bp = b->data.f32 + i * bstep;
102
22
        float* const hp = h->data.f32 + i * hstep;
103
2.02k
        for (j = 0; j < count; 
j++2.00k
)
104
2.00k
          hp[j] = gp * (ap[j] - bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
105
22
      } parallel_endfor
106
3
    } else {
107
20
      
parallel_for2
(i, batch_size) {
108
20
        int j;
109
20
        const float* const ap = a->data.f32 + i * astep;
110
20
        const float* const bp = b->data.f32 + i * bstep;
111
20
        float* const hp = h->data.f32 + i * hstep;
112
2.02k
        for (j = 0; j < count; 
j++2.00k
)
113
2.00k
          hp[j] = (ap[j] - bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
114
20
      } parallel_endfor
115
2
    }
116
5
  } else {
117
5
    const float pos_weight_1 = pos_weight - 1;
118
5
    if (g)
119
3
    {
120
3
      int gstride[CCV_NNC_MAX_DIM_ALLOC];
121
3
      ccv_nnc_tensor_view_get_stride(g, gstride);
122
3
      assert(ccv_nnc_tensor_count(g->info) == batch_size);
123
3
      const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 
12
:
gstride[1
CCV_NNC_MAX_DIM1
];
124
22
      
parallel_for3
(i, batch_size) {
125
22
        int j;
126
22
        const float gp = g->data.f32[i * gstep];
127
22
        const float* const ap = a->data.f32 + i * astep;
128
22
        const float* const bp = b->data.f32 + i * bstep;
129
22
        float* const hp = h->data.f32 + i * hstep;
130
2.02k
        for (j = 0; j < count; 
j++2.00k
)
131
2.00k
          hp[j] = gp * (ap[j] * bp[j] * pos_weight_1 + ap[j] - pos_weight * bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
132
22
      } parallel_endfor
133
3
    } else {
134
20
      
parallel_for2
(i, batch_size) {
135
20
        int j;
136
20
        const float* const ap = a->data.f32 + i * astep;
137
20
        const float* const bp = b->data.f32 + i * bstep;
138
20
        float* const hp = h->data.f32 + i * hstep;
139
2.02k
        for (j = 0; j < count; 
j++2.00k
)
140
2.00k
          hp[j] = (ap[j] * bp[j] * pos_weight_1 + ap[j] - pos_weight * bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
141
20
      } parallel_endfor
142
2
    }
143
5
  }
144
10
  return CCV_NNC_EXEC_SUCCESS;
145
10
}
146
147
REGISTER_COMMAND_BACKEND(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
148
1
{
149
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
150
1
  registry->tensor_datatypes = CCV_32F;
151
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
152
1
  registry->algorithms = 1;
153
1
  registry->exec = _ccv_nnc_binary_crossentropy_forw;
154
1
}
155
156
REGISTER_COMMAND_BACKEND(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
157
1
{
158
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
159
1
  registry->tensor_datatypes = CCV_32F;
160
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
161
1
  registry->algorithms = 1;
162
1
  registry->exec = _ccv_nnc_binary_crossentropy_back;
163
1
}