/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/loss/ccv_nnc_binary_crossentropy_cpu_ref.c

Source
#include "ccv.h"
#include "ccv_internal.h"
#include "nnc/ccv_nnc.h"
#include "nnc/ccv_nnc_easy.h"
#include "nnc/ccv_nnc_internal.h"
#ifdef USE_OPENMP
#include <omp.h>
#endif
#ifdef USE_DISPATCH
#include <dispatch/dispatch.h>
#endif

static int _ccv_nnc_binary_crossentropy_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
{
  assert(input_size == 2);
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
  const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
  assert(output_size == 1);
  ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
  int dim[CCV_NNC_MAX_DIM_ALLOC];
  int astride[CCV_NNC_MAX_DIM_ALLOC];
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
  int cstride[CCV_NNC_MAX_DIM_ALLOC];
  ccv_nnc_tensor_view_get_dim(a, dim);
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
  ccv_nnc_tensor_view_get_stride(a, astride);
  ccv_nnc_tensor_view_get_stride(b, bstride);
  ccv_nnc_tensor_view_get_stride(c, cstride);
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
  const int batch_size = dim[CCV_NNC_MAX_DIM];
  assert(ccv_nnc_tensor_count(c->info) == batch_size);
  const int count = dim[CCV_NNC_MAX_DIM + 1];
  const int astep = astride[CCV_NNC_MAX_DIM];
  const int bstep = bstride[CCV_NNC_MAX_DIM];
  const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 112 : cstride[2CCV_NNC_MAX_DIM2];
  const float pos_weight = cmd.info.binary_crossentropy.pos_weight;
  if (pos_weight == 1)
  {
    parallel_for7(i, batch_size) {
      int j;
      const float* const ap = a->data.f32 + i * astep;
      const float* const bp = b->data.f32 + i * bstep;
      float cp = 0;
      for (j = 0; j < count; j++6.00k)
        cp += (bp[j] - 1) * log(1 - ap[j]) - bp[j] * log(ap[j]);
      c->data.f32[i * cstep] = cp;
    } parallel_endfor
  } else {
    parallel_for7(i, batch_size) {
      int j;
      const float* const ap = a->data.f32 + i * astep;
      const float* const bp = b->data.f32 + i * bstep;
      float cp1 = 0, cp2 = 0;
      for (j = 0; j < count; j++6.00k)
        cp1 += (bp[j] - 1) * log(1 - ap[j]), cp2 += bp[j] * log(ap[j]);
      c->data.f32[i * cstep] = cp1 - cp2 * pos_weight;
    } parallel_endfor
  }
  return CCV_NNC_EXEC_SUCCESS;
}

static int _ccv_nnc_binary_crossentropy_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
{
  assert(input_size >= 3);
  assert(output_size >= 1);
  const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
  assert(!g || !CCV_IS_TENSOR_VIEW(g));
  const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
  const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
  ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
  int dim[CCV_NNC_MAX_DIM_ALLOC];
  int astride[CCV_NNC_MAX_DIM_ALLOC];
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
  int hstride[CCV_NNC_MAX_DIM_ALLOC];
  ccv_nnc_tensor_view_get_dim(a, dim);
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
  assert(ccv_nnc_tensor_view_check_dim(h, dim));
  ccv_nnc_tensor_view_get_stride(a, astride);
  ccv_nnc_tensor_view_get_stride(b, bstride);
  ccv_nnc_tensor_view_get_stride(h, hstride);
  assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
  const int batch_size = dim[CCV_NNC_MAX_DIM];
  const int count = dim[CCV_NNC_MAX_DIM + 1];
  const int astep = astride[CCV_NNC_MAX_DIM];
  const int bstep = bstride[CCV_NNC_MAX_DIM];
  const int hstep = hstride[CCV_NNC_MAX_DIM];
  const float pos_weight = cmd.info.binary_crossentropy.pos_weight;
  if (pos_weight == 1)
  {
    if (g)
    {
      int gstride[CCV_NNC_MAX_DIM_ALLOC];
      ccv_nnc_tensor_view_get_stride(g, gstride);
      assert(ccv_nnc_tensor_count(g->info) == batch_size);
      const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 12 : gstride[1CCV_NNC_MAX_DIM1];
      parallel_for3(i, batch_size) {
        int j;
        const float gp = g->data.f32[i * gstep];
        const float* const ap = a->data.f32 + i * astep;
        const float* const bp = b->data.f32 + i * bstep;
        float* const hp = h->data.f32 + i * hstep;
        for (j = 0; j < count; j++2.00k)
          hp[j] = gp * (ap[j] - bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
      } parallel_endfor
    } else {
      parallel_for2(i, batch_size) {
        int j;
        const float* const ap = a->data.f32 + i * astep;
        const float* const bp = b->data.f32 + i * bstep;
        float* const hp = h->data.f32 + i * hstep;
        for (j = 0; j < count; j++2.00k)
          hp[j] = (ap[j] - bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
      } parallel_endfor
    }
  } else {
    const float pos_weight_1 = pos_weight - 1;
    if (g)
    {
      int gstride[CCV_NNC_MAX_DIM_ALLOC];
      ccv_nnc_tensor_view_get_stride(g, gstride);
      assert(ccv_nnc_tensor_count(g->info) == batch_size);
      const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 12 : gstride[1CCV_NNC_MAX_DIM1];
      parallel_for3(i, batch_size) {
        int j;
        const float gp = g->data.f32[i * gstep];
        const float* const ap = a->data.f32 + i * astep;
        const float* const bp = b->data.f32 + i * bstep;
        float* const hp = h->data.f32 + i * hstep;
        for (j = 0; j < count; j++2.00k)
          hp[j] = gp * (ap[j] * bp[j] * pos_weight_1 + ap[j] - pos_weight * bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
      } parallel_endfor
    } else {
      parallel_for2(i, batch_size) {
        int j;
        const float* const ap = a->data.f32 + i * astep;
        const float* const bp = b->data.f32 + i * bstep;
        float* const hp = h->data.f32 + i * hstep;
        for (j = 0; j < count; j++2.00k)
          hp[j] = (ap[j] * bp[j] * pos_weight_1 + ap[j] - pos_weight * bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
      } parallel_endfor
    }
  }
  return CCV_NNC_EXEC_SUCCESS;
}

REGISTER_COMMAND_BACKEND(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
{
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
  registry->tensor_datatypes = CCV_32F;
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
  registry->algorithms = 1;
  registry->exec = _ccv_nnc_binary_crossentropy_forw;
}

REGISTER_COMMAND_BACKEND(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
{
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
  registry->tensor_datatypes = CCV_32F;
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
  registry->algorithms = 1;
  registry->exec = _ccv_nnc_binary_crossentropy_back;
}

Line	Count	Source
1		#include "ccv.h"
2		#include "ccv_internal.h"
3		#include "nnc/ccv_nnc.h"
4		#include "nnc/ccv_nnc_easy.h"
5		#include "nnc/ccv_nnc_internal.h"
6		#ifdef USE_OPENMP
7		#include <omp.h>
8		#endif
9		#ifdef USE_DISPATCH
10		#include <dispatch/dispatch.h>
11		#endif
12
13		static int _ccv_nnc_binary_crossentropy_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14	14	{
15	14	assert(input_size == 2);
16	14	const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
17	14	assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
18	14	const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1];
19	14	assert(output_size == 1);
20	14	ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0];
21	14	int dim[CCV_NNC_MAX_DIM_ALLOC];
22	14	int astride[CCV_NNC_MAX_DIM_ALLOC];
23	14	int bstride[CCV_NNC_MAX_DIM_ALLOC];
24	14	int cstride[CCV_NNC_MAX_DIM_ALLOC];
25	14	ccv_nnc_tensor_view_get_dim(a, dim);
26	14	assert(ccv_nnc_tensor_view_check_dim(b, dim));
27	14	ccv_nnc_tensor_view_get_stride(a, astride);
28	14	ccv_nnc_tensor_view_get_stride(b, bstride);
29	14	ccv_nnc_tensor_view_get_stride(c, cstride);
30	14	assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
31	14	const int batch_size = dim[CCV_NNC_MAX_DIM];
32	14	assert(ccv_nnc_tensor_count(c->info) == batch_size);
33	14	const int count = dim[CCV_NNC_MAX_DIM + 1];
34	14	const int astep = astride[CCV_NNC_MAX_DIM];
35	14	const int bstep = bstride[CCV_NNC_MAX_DIM];
36	14	const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 112 : cstride[2 CCV_NNC_MAX_DIM2 ];
37	14	const float pos_weight = cmd.info.binary_crossentropy.pos_weight;
38	14	if (pos_weight == 1)
39	7	{
40	62	parallel_for7 (i, batch_size) {
41	62	int j;
42	62	const float* const ap = a->data.f32 + i * astep;
43	62	const float* const bp = b->data.f32 + i * bstep;
44	62	float cp = 0;
45	6.06k	for (j = 0; j < count; j++6.00k )
46	6.00k	cp += (bp[j] - 1) * log(1 - ap[j]) - bp[j] * log(ap[j]);
47	62	c->data.f32[i * cstep] = cp;
48	62	} parallel_endfor
49	7	} else {
50	62	parallel_for7 (i, batch_size) {
51	62	int j;
52	62	const float* const ap = a->data.f32 + i * astep;
53	62	const float* const bp = b->data.f32 + i * bstep;
54	62	float cp1 = 0, cp2 = 0;
55	6.06k	for (j = 0; j < count; j++6.00k )
56	6.00k	cp1 += (bp[j] - 1) * log(1 - ap[j]), cp2 += bp[j] * log(ap[j]);
57	62	c->data.f32[i * cstep] = cp1 - cp2 * pos_weight;
58	62	} parallel_endfor
59	7	}
60	14	return CCV_NNC_EXEC_SUCCESS;
61	14	}
62
63		static int _ccv_nnc_binary_crossentropy_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
64	10	{
65	10	assert(input_size >= 3);
66	10	assert(output_size >= 1);
67	10	const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
68	10	assert(!g \|\| !CCV_IS_TENSOR_VIEW(g));
69	10	const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
70	10	const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
71	10	ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
72	10	int dim[CCV_NNC_MAX_DIM_ALLOC];
73	10	int astride[CCV_NNC_MAX_DIM_ALLOC];
74	10	int bstride[CCV_NNC_MAX_DIM_ALLOC];
75	10	int hstride[CCV_NNC_MAX_DIM_ALLOC];
76	10	ccv_nnc_tensor_view_get_dim(a, dim);
77	10	assert(ccv_nnc_tensor_view_check_dim(b, dim));
78	10	assert(ccv_nnc_tensor_view_check_dim(h, dim));
79	10	ccv_nnc_tensor_view_get_stride(a, astride);
80	10	ccv_nnc_tensor_view_get_stride(b, bstride);
81	10	ccv_nnc_tensor_view_get_stride(h, hstride);
82	10	assert(ccv_nnc_tensor_nd(a->info.dim) <= 2);
83	10	const int batch_size = dim[CCV_NNC_MAX_DIM];
84	10	const int count = dim[CCV_NNC_MAX_DIM + 1];
85	10	const int astep = astride[CCV_NNC_MAX_DIM];
86	10	const int bstep = bstride[CCV_NNC_MAX_DIM];
87	10	const int hstep = hstride[CCV_NNC_MAX_DIM];
88	10	const float pos_weight = cmd.info.binary_crossentropy.pos_weight;
89	10	if (pos_weight == 1)
90	5	{
91	5	if (g)
92	3	{
93	3	int gstride[CCV_NNC_MAX_DIM_ALLOC];
94	3	ccv_nnc_tensor_view_get_stride(g, gstride);
95	3	assert(ccv_nnc_tensor_count(g->info) == batch_size);
96	3	const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 12 : gstride[1 CCV_NNC_MAX_DIM1 ];
97	22	parallel_for3 (i, batch_size) {
98	22	int j;
99	22	const float gp = g->data.f32[i * gstep];
100	22	const float* const ap = a->data.f32 + i * astep;
101	22	const float* const bp = b->data.f32 + i * bstep;
102	22	float* const hp = h->data.f32 + i * hstep;
103	2.02k	for (j = 0; j < count; j++2.00k )
104	2.00k	hp[j] = gp * (ap[j] - bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
105	22	} parallel_endfor
106	3	} else {
107	20	parallel_for2 (i, batch_size) {
108	20	int j;
109	20	const float* const ap = a->data.f32 + i * astep;
110	20	const float* const bp = b->data.f32 + i * bstep;
111	20	float* const hp = h->data.f32 + i * hstep;
112	2.02k	for (j = 0; j < count; j++2.00k )
113	2.00k	hp[j] = (ap[j] - bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
114	20	} parallel_endfor
115	2	}
116	5	} else {
117	5	const float pos_weight_1 = pos_weight - 1;
118	5	if (g)
119	3	{
120	3	int gstride[CCV_NNC_MAX_DIM_ALLOC];
121	3	ccv_nnc_tensor_view_get_stride(g, gstride);
122	3	assert(ccv_nnc_tensor_count(g->info) == batch_size);
123	3	const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 12 : gstride[1 CCV_NNC_MAX_DIM1 ];
124	22	parallel_for3 (i, batch_size) {
125	22	int j;
126	22	const float gp = g->data.f32[i * gstep];
127	22	const float* const ap = a->data.f32 + i * astep;
128	22	const float* const bp = b->data.f32 + i * bstep;
129	22	float* const hp = h->data.f32 + i * hstep;
130	2.02k	for (j = 0; j < count; j++2.00k )
131	2.00k	hp[j] = gp * (ap[j] * bp[j] * pos_weight_1 + ap[j] - pos_weight * bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
132	22	} parallel_endfor
133	3	} else {
134	20	parallel_for2 (i, batch_size) {
135	20	int j;
136	20	const float* const ap = a->data.f32 + i * astep;
137	20	const float* const bp = b->data.f32 + i * bstep;
138	20	float* const hp = h->data.f32 + i * hstep;
139	2.02k	for (j = 0; j < count; j++2.00k )
140	2.00k	hp[j] = (ap[j] * bp[j] * pos_weight_1 + ap[j] - pos_weight * bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12);
141	20	} parallel_endfor
142	2	}
143	5	}
144	10	return CCV_NNC_EXEC_SUCCESS;
145	10	}
146
147		REGISTER_COMMAND_BACKEND(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
148	1	{
149	1	registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC \| CCV_TENSOR_FORMAT_NCHW;
150	1	registry->tensor_datatypes = CCV_32F;
151	1	registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
152	1	registry->algorithms = 1;
153	1	registry->exec = _ccv_nnc_binary_crossentropy_forw;
154	1	}
155
156		REGISTER_COMMAND_BACKEND(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
157	1	{
158	1	registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC \| CCV_TENSOR_FORMAT_NCHW;
159	1	registry->tensor_datatypes = CCV_32F;
160	1	registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
161	1	registry->algorithms = 1;
162	1	registry->exec = _ccv_nnc_binary_crossentropy_back;
163	1	}

Coverage Report

Created: 2024-06-09 23:55