/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/sigmoid_loss/ccv_nnc_sigmoid_binary_crossentropy_cpu_ref.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #ifdef USE_OPENMP |
7 | | #include <omp.h> |
8 | | #endif |
9 | | #ifdef USE_DISPATCH |
10 | | #include <dispatch/dispatch.h> |
11 | | #endif |
12 | | |
13 | | static int _ccv_nnc_sigmoid_binary_crossentropy_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
14 | 18 | { |
15 | 18 | assert(input_size == 2); |
16 | 18 | const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0]; |
17 | 18 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2); |
18 | 18 | const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[1]; |
19 | 18 | assert(output_size == 2); |
20 | 18 | ccv_nnc_tensor_view_t* const c = (ccv_nnc_tensor_view_t*)outputs[0]; |
21 | 18 | ccv_nnc_tensor_view_t* const d = (ccv_nnc_tensor_view_t*)outputs[1]; |
22 | 18 | int dim[CCV_NNC_MAX_DIM_ALLOC]; |
23 | 18 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
24 | 18 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
25 | 18 | int dstride[CCV_NNC_MAX_DIM_ALLOC]; |
26 | 18 | ccv_nnc_tensor_view_get_dim(a, dim); |
27 | 18 | assert(ccv_nnc_tensor_view_check_dim(b, dim)); |
28 | 18 | assert(ccv_nnc_tensor_view_check_dim(d, dim)); |
29 | 18 | ccv_nnc_tensor_view_get_stride(a, astride); |
30 | 18 | ccv_nnc_tensor_view_get_stride(b, bstride); |
31 | 18 | ccv_nnc_tensor_view_get_stride(d, dstride); |
32 | 18 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2); |
33 | 18 | const int batch_size = dim[CCV_NNC_MAX_DIM]; |
34 | 18 | const int count = dim[CCV_NNC_MAX_DIM + 1]; |
35 | 18 | const int astep = astride[CCV_NNC_MAX_DIM]; |
36 | 18 | const int bstep = bstride[CCV_NNC_MAX_DIM]; |
37 | 18 | const int dstep = dstride[CCV_NNC_MAX_DIM]; |
38 | 18 | if (c) |
39 | 6 | { |
40 | 6 | int cstride[CCV_NNC_MAX_DIM_ALLOC]; |
41 | 6 | assert(ccv_nnc_tensor_count(c->info) == batch_size); |
42 | 6 | ccv_nnc_tensor_view_get_stride(c, cstride); |
43 | 6 | const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 14 : cstride[2 CCV_NNC_MAX_DIM2 ]; |
44 | 6 | const float pos_weight = cmd.info.binary_crossentropy.pos_weight; |
45 | 6 | if (pos_weight == 1) |
46 | 3 | { |
47 | 22 | parallel_for3 (i, batch_size) { |
48 | 22 | int j; |
49 | 22 | const float* const ap = a->data.f32 + i * astep; |
50 | 22 | const float* const bp = b->data.f32 + i * bstep; |
51 | 22 | float* const dp = d->data.f32 + i * dstep; |
52 | 22 | float cp = 0; |
53 | 2.02k | for (j = 0; j < count; j++2.00k ) |
54 | 2.00k | { |
55 | 2.00k | cp += (1 - bp[j]) * ap[j] + log(1. + exp(-ap[j])); |
56 | 2.00k | dp[j] = 1. / (1. + exp(-ap[j])); |
57 | 2.00k | } |
58 | 22 | c->data.f32[i * cstep] = cp; |
59 | 22 | } parallel_endfor |
60 | 3 | } else { |
61 | 3 | const float pos_weight_1 = pos_weight - 1; |
62 | 22 | parallel_for3 (i, batch_size) { |
63 | 22 | int j; |
64 | 22 | const float* const ap = a->data.f32 + i * astep; |
65 | 22 | const float* const bp = b->data.f32 + i * bstep; |
66 | 22 | float* const dp = d->data.f32 + i * dstep; |
67 | 22 | float cp = 0; |
68 | 2.02k | for (j = 0; j < count; j++2.00k ) |
69 | 2.00k | { |
70 | 2.00k | cp += (1 - bp[j]) * ap[j] + (1 + bp[j] * pos_weight_1) * log(1. + exp(-ap[j])); |
71 | 2.00k | dp[j] = 1. / (1. + exp(-ap[j])); |
72 | 2.00k | } |
73 | 22 | c->data.f32[i * cstep] = cp; |
74 | 22 | } parallel_endfor |
75 | 3 | } |
76 | 12 | } else { |
77 | 120 | parallel_for12 (i, batch_size) { |
78 | 120 | int j; |
79 | 120 | const float* const ap = a->data.f32 + i * astep; |
80 | 120 | float* const dp = d->data.f32 + i * dstep; |
81 | 12.1k | for (j = 0; j < count; j++12.0k ) |
82 | 12.0k | dp[j] = 1. / (1. + exp(-ap[j])); |
83 | 120 | } parallel_endfor |
84 | 12 | } |
85 | 18 | return CCV_NNC_EXEC_SUCCESS; |
86 | 18 | } |
87 | | |
88 | | static int _ccv_nnc_sigmoid_binary_crossentropy_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
89 | 10 | { |
90 | 10 | assert(input_size >= 6); |
91 | 10 | assert(output_size >= 1); |
92 | 10 | const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0]; |
93 | 10 | assert(!g || !CCV_IS_TENSOR_VIEW(g)); |
94 | 10 | const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[5]; |
95 | 10 | const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[3]; |
96 | 10 | ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0]; |
97 | 10 | int dim[CCV_NNC_MAX_DIM_ALLOC]; |
98 | 10 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
99 | 10 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
100 | 10 | int hstride[CCV_NNC_MAX_DIM_ALLOC]; |
101 | 10 | ccv_nnc_tensor_view_get_dim(a, dim); |
102 | 10 | assert(ccv_nnc_tensor_view_check_dim(b, dim)); |
103 | 10 | assert(ccv_nnc_tensor_view_check_dim(h, dim)); |
104 | 10 | ccv_nnc_tensor_view_get_stride(a, astride); |
105 | 10 | ccv_nnc_tensor_view_get_stride(b, bstride); |
106 | 10 | ccv_nnc_tensor_view_get_stride(h, hstride); |
107 | 10 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2); |
108 | 10 | const int batch_size = dim[CCV_NNC_MAX_DIM]; |
109 | 10 | const int count = dim[CCV_NNC_MAX_DIM + 1]; |
110 | 10 | const int astep = astride[CCV_NNC_MAX_DIM]; |
111 | 10 | const int bstep = bstride[CCV_NNC_MAX_DIM]; |
112 | 10 | const int hstep = hstride[CCV_NNC_MAX_DIM]; |
113 | 10 | const float pos_weight = cmd.info.binary_crossentropy.pos_weight; |
114 | 10 | if (pos_weight == 1) |
115 | 5 | { |
116 | 5 | if (g) |
117 | 3 | { |
118 | 3 | int gstride[CCV_NNC_MAX_DIM_ALLOC]; |
119 | 3 | ccv_nnc_tensor_view_get_stride(g, gstride); |
120 | 3 | assert(ccv_nnc_tensor_count(g->info) == batch_size); |
121 | 3 | const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 12 : gstride[1 CCV_NNC_MAX_DIM1 ]; |
122 | 22 | parallel_for3 (i, batch_size) { |
123 | 22 | int j; |
124 | 22 | const float gp = g->data.f32[i * gstep]; |
125 | 22 | const float* const ap = a->data.f32 + i * astep; |
126 | 22 | const float* const bp = b->data.f32 + i * bstep; |
127 | 22 | float* const hp = h->data.f32 + i * hstep; |
128 | 2.02k | for (j = 0; j < count; j++2.00k ) |
129 | 2.00k | hp[j] = gp * (ap[j] - bp[j]); |
130 | 22 | } parallel_endfor |
131 | 3 | } else { |
132 | 20 | parallel_for2 (i, batch_size) { |
133 | 20 | int j; |
134 | 20 | const float* const ap = a->data.f32 + i * astep; |
135 | 20 | const float* const bp = b->data.f32 + i * bstep; |
136 | 20 | float* const hp = h->data.f32 + i * hstep; |
137 | 2.02k | for (j = 0; j < count; j++2.00k ) |
138 | 2.00k | hp[j] = ap[j] - bp[j]; |
139 | 20 | } parallel_endfor |
140 | 2 | } |
141 | 5 | } else { |
142 | 5 | if (g) |
143 | 3 | { |
144 | 3 | int gstride[CCV_NNC_MAX_DIM_ALLOC]; |
145 | 3 | ccv_nnc_tensor_view_get_stride(g, gstride); |
146 | 3 | assert(ccv_nnc_tensor_count(g->info) == batch_size); |
147 | 3 | const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 12 : gstride[1 CCV_NNC_MAX_DIM1 ]; |
148 | 22 | parallel_for3 (i, batch_size) { |
149 | 22 | int j; |
150 | 22 | const float gp = g->data.f32[i * gstep]; |
151 | 22 | const float* const ap = a->data.f32 + i * astep; |
152 | 22 | const float* const bp = b->data.f32 + i * bstep; |
153 | 22 | float* const hp = h->data.f32 + i * hstep; |
154 | 2.02k | for (j = 0; j < count; j++2.00k ) |
155 | 2.00k | hp[j] = gp * ((ap[j] - 1) * bp[j] * pos_weight + ap[j] * (1 - bp[j])); |
156 | 22 | } parallel_endfor |
157 | 3 | } else { |
158 | 20 | parallel_for2 (i, batch_size) { |
159 | 20 | int j; |
160 | 20 | const float* const ap = a->data.f32 + i * astep; |
161 | 20 | const float* const bp = b->data.f32 + i * bstep; |
162 | 20 | float* const hp = h->data.f32 + i * hstep; |
163 | 2.02k | for (j = 0; j < count; j++2.00k ) |
164 | 2.00k | hp[j] = (ap[j] - 1) * bp[j] * pos_weight + ap[j] * (1 - bp[j]); |
165 | 20 | } parallel_endfor |
166 | 2 | } |
167 | 5 | } |
168 | 10 | return CCV_NNC_EXEC_SUCCESS; |
169 | 10 | } |
170 | | |
171 | | REGISTER_COMMAND_BACKEND(CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
172 | 1 | { |
173 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; |
174 | 1 | registry->tensor_datatypes = CCV_32F; |
175 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
176 | 1 | registry->algorithms = 1; |
177 | 1 | registry->exec = _ccv_nnc_sigmoid_binary_crossentropy_forw; |
178 | 1 | } |
179 | | |
180 | | REGISTER_COMMAND_BACKEND(CCV_NNC_SIGMOID_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
181 | 1 | { |
182 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; |
183 | 1 | registry->tensor_datatypes = CCV_32F; |
184 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
185 | 1 | registry->algorithms = 1; |
186 | 1 | registry->exec = _ccv_nnc_sigmoid_binary_crossentropy_back; |
187 | 1 | } |