/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/loss/ccv_nnc_binary_crossentropy_cpu_ref.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #ifdef USE_OPENMP |
7 | | #include <omp.h> |
8 | | #endif |
9 | | #ifdef USE_DISPATCH |
10 | | #include <dispatch/dispatch.h> |
11 | | #endif |
12 | | |
13 | | static int _ccv_nnc_binary_crossentropy_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
14 | 14 | { |
15 | 14 | assert(input_size == 2); |
16 | 14 | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0]; |
17 | 14 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2); |
18 | 14 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1]; |
19 | 14 | assert(output_size == 1); |
20 | 14 | ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0]; |
21 | 14 | int dim[CCV_NNC_MAX_DIM_ALLOC]; |
22 | 14 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
23 | 14 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
24 | 14 | int cstride[CCV_NNC_MAX_DIM_ALLOC]; |
25 | 14 | ccv_nnc_tensor_view_get_dim(a, dim); |
26 | 14 | assert(ccv_nnc_tensor_view_check_dim(b, dim)); |
27 | 14 | ccv_nnc_tensor_view_get_stride(a, astride); |
28 | 14 | ccv_nnc_tensor_view_get_stride(b, bstride); |
29 | 14 | ccv_nnc_tensor_view_get_stride(c, cstride); |
30 | 14 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2); |
31 | 14 | const int batch_size = dim[CCV_NNC_MAX_DIM]; |
32 | 14 | assert(ccv_nnc_tensor_count(c->info) == batch_size); |
33 | 14 | const int count = dim[CCV_NNC_MAX_DIM + 1]; |
34 | 14 | const int astep = astride[CCV_NNC_MAX_DIM]; |
35 | 14 | const int bstep = bstride[CCV_NNC_MAX_DIM]; |
36 | 14 | const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 112 : cstride[2 CCV_NNC_MAX_DIM2 ]; |
37 | 14 | const float pos_weight = cmd.info.binary_crossentropy.pos_weight; |
38 | 14 | if (pos_weight == 1) |
39 | 7 | { |
40 | 62 | parallel_for7 (i, batch_size) { |
41 | 62 | int j; |
42 | 62 | const float* const ap = a->data.f32 + i * astep; |
43 | 62 | const float* const bp = b->data.f32 + i * bstep; |
44 | 62 | float cp = 0; |
45 | 6.06k | for (j = 0; j < count; j++6.00k ) |
46 | 6.00k | cp += (bp[j] - 1) * log(1 - ap[j]) - bp[j] * log(ap[j]); |
47 | 62 | c->data.f32[i * cstep] = cp; |
48 | 62 | } parallel_endfor |
49 | 7 | } else { |
50 | 62 | parallel_for7 (i, batch_size) { |
51 | 62 | int j; |
52 | 62 | const float* const ap = a->data.f32 + i * astep; |
53 | 62 | const float* const bp = b->data.f32 + i * bstep; |
54 | 62 | float cp1 = 0, cp2 = 0; |
55 | 6.06k | for (j = 0; j < count; j++6.00k ) |
56 | 6.00k | cp1 += (bp[j] - 1) * log(1 - ap[j]), cp2 += bp[j] * log(ap[j]); |
57 | 62 | c->data.f32[i * cstep] = cp1 - cp2 * pos_weight; |
58 | 62 | } parallel_endfor |
59 | 7 | } |
60 | 14 | return CCV_NNC_EXEC_SUCCESS; |
61 | 14 | } |
62 | | |
63 | | static int _ccv_nnc_binary_crossentropy_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
64 | 10 | { |
65 | 10 | assert(input_size >= 3); |
66 | 10 | assert(output_size >= 1); |
67 | 10 | const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0]; |
68 | 10 | assert(!g || !CCV_IS_TENSOR_VIEW(g)); |
69 | 10 | const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1]; |
70 | 10 | const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2]; |
71 | 10 | ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0]; |
72 | 10 | int dim[CCV_NNC_MAX_DIM_ALLOC]; |
73 | 10 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
74 | 10 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
75 | 10 | int hstride[CCV_NNC_MAX_DIM_ALLOC]; |
76 | 10 | ccv_nnc_tensor_view_get_dim(a, dim); |
77 | 10 | assert(ccv_nnc_tensor_view_check_dim(b, dim)); |
78 | 10 | assert(ccv_nnc_tensor_view_check_dim(h, dim)); |
79 | 10 | ccv_nnc_tensor_view_get_stride(a, astride); |
80 | 10 | ccv_nnc_tensor_view_get_stride(b, bstride); |
81 | 10 | ccv_nnc_tensor_view_get_stride(h, hstride); |
82 | 10 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2); |
83 | 10 | const int batch_size = dim[CCV_NNC_MAX_DIM]; |
84 | 10 | const int count = dim[CCV_NNC_MAX_DIM + 1]; |
85 | 10 | const int astep = astride[CCV_NNC_MAX_DIM]; |
86 | 10 | const int bstep = bstride[CCV_NNC_MAX_DIM]; |
87 | 10 | const int hstep = hstride[CCV_NNC_MAX_DIM]; |
88 | 10 | const float pos_weight = cmd.info.binary_crossentropy.pos_weight; |
89 | 10 | if (pos_weight == 1) |
90 | 5 | { |
91 | 5 | if (g) |
92 | 3 | { |
93 | 3 | int gstride[CCV_NNC_MAX_DIM_ALLOC]; |
94 | 3 | ccv_nnc_tensor_view_get_stride(g, gstride); |
95 | 3 | assert(ccv_nnc_tensor_count(g->info) == batch_size); |
96 | 3 | const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 12 : gstride[1 CCV_NNC_MAX_DIM1 ]; |
97 | 22 | parallel_for3 (i, batch_size) { |
98 | 22 | int j; |
99 | 22 | const float gp = g->data.f32[i * gstep]; |
100 | 22 | const float* const ap = a->data.f32 + i * astep; |
101 | 22 | const float* const bp = b->data.f32 + i * bstep; |
102 | 22 | float* const hp = h->data.f32 + i * hstep; |
103 | 2.02k | for (j = 0; j < count; j++2.00k ) |
104 | 2.00k | hp[j] = gp * (ap[j] - bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12); |
105 | 22 | } parallel_endfor |
106 | 3 | } else { |
107 | 20 | parallel_for2 (i, batch_size) { |
108 | 20 | int j; |
109 | 20 | const float* const ap = a->data.f32 + i * astep; |
110 | 20 | const float* const bp = b->data.f32 + i * bstep; |
111 | 20 | float* const hp = h->data.f32 + i * hstep; |
112 | 2.02k | for (j = 0; j < count; j++2.00k ) |
113 | 2.00k | hp[j] = (ap[j] - bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12); |
114 | 20 | } parallel_endfor |
115 | 2 | } |
116 | 5 | } else { |
117 | 5 | const float pos_weight_1 = pos_weight - 1; |
118 | 5 | if (g) |
119 | 3 | { |
120 | 3 | int gstride[CCV_NNC_MAX_DIM_ALLOC]; |
121 | 3 | ccv_nnc_tensor_view_get_stride(g, gstride); |
122 | 3 | assert(ccv_nnc_tensor_count(g->info) == batch_size); |
123 | 3 | const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 12 : gstride[1 CCV_NNC_MAX_DIM1 ]; |
124 | 22 | parallel_for3 (i, batch_size) { |
125 | 22 | int j; |
126 | 22 | const float gp = g->data.f32[i * gstep]; |
127 | 22 | const float* const ap = a->data.f32 + i * astep; |
128 | 22 | const float* const bp = b->data.f32 + i * bstep; |
129 | 22 | float* const hp = h->data.f32 + i * hstep; |
130 | 2.02k | for (j = 0; j < count; j++2.00k ) |
131 | 2.00k | hp[j] = gp * (ap[j] * bp[j] * pos_weight_1 + ap[j] - pos_weight * bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12); |
132 | 22 | } parallel_endfor |
133 | 3 | } else { |
134 | 20 | parallel_for2 (i, batch_size) { |
135 | 20 | int j; |
136 | 20 | const float* const ap = a->data.f32 + i * astep; |
137 | 20 | const float* const bp = b->data.f32 + i * bstep; |
138 | 20 | float* const hp = h->data.f32 + i * hstep; |
139 | 2.02k | for (j = 0; j < count; j++2.00k ) |
140 | 2.00k | hp[j] = (ap[j] * bp[j] * pos_weight_1 + ap[j] - pos_weight * bp[j]) / ccv_max((1 - ap[j]) * ap[j], 1e-12); |
141 | 20 | } parallel_endfor |
142 | 2 | } |
143 | 5 | } |
144 | 10 | return CCV_NNC_EXEC_SUCCESS; |
145 | 10 | } |
146 | | |
147 | | REGISTER_COMMAND_BACKEND(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
148 | 1 | { |
149 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; |
150 | 1 | registry->tensor_datatypes = CCV_32F; |
151 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
152 | 1 | registry->algorithms = 1; |
153 | 1 | registry->exec = _ccv_nnc_binary_crossentropy_forw; |
154 | 1 | } |
155 | | |
156 | | REGISTER_COMMAND_BACKEND(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
157 | 1 | { |
158 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; |
159 | 1 | registry->tensor_datatypes = CCV_32F; |
160 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
161 | 1 | registry->algorithms = 1; |
162 | 1 | registry->exec = _ccv_nnc_binary_crossentropy_back; |
163 | 1 | } |