Bug Summary

File:nnc/cmd/ew/ccv_nnc_ew_cpu_ref.c
Warning:line 1963, column 27
The right operand of '*' is a garbage value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_ew_cpu_ref.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -resource-dir /usr/local/lib/clang/19 -I ../../ -I .. -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2026-05-02-115646-1519401-1 -x c ew/ccv_nnc_ew_cpu_ref.c
1#include "ccv.h"
2#include "ccv_internal.h"
3#include "nnc/ccv_nnc.h"
4#include "nnc/ccv_nnc_easy.h"
5#include "nnc/ccv_nnc_internal.h"
6#ifdef USE_OPENMP
7#include <omp.h>
8#endif
9#ifdef USE_DISPATCH
10#include <dispatch/dispatch.h>
11#endif
12
13#include "../_ccv_nnc_cpu_ref.h"
14
15void _ccv_nnc_ewsum_forw_cpu_ref_f32(ccv_nnc_tensor_view_t* const* const inputs, const int input_size, ccv_nnc_tensor_view_t* const* const outputs, const int output_size)
16{
17 if (input_size == 1 && output_size == 1)
18 {
19 _ccv_nnc_tensor_transfer_cpu_ref_f32(inputs[0], outputs[0]);
20 return;
21 }
22 // Assuming this is float 32.
23 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
24 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
25 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
26 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
27 int x, z;
28 int k = 0;
29 // Bad, I promised this can be inplace operation. Need to first find out if there are share the same pointer first.
30 for (z = 1; z < input_size; z++)
31 {
32 ccv_nnc_tensor_view_t* c = outputs[0];
33 ccv_nnc_tensor_view_t* a = inputs[z];
34 if (c->data.f32 == a->data.f32)
35 {
36 k = z;
37 break;
38 }
39 }
40 for (z = 0; z < input_size - 1; z++)
41 {
42 ccv_nnc_tensor_view_t* c = outputs[0];
43 ccv_nnc_tensor_view_t* a = z > 0 ? c : inputs[k];
44 ccv_nnc_tensor_view_t* b = z >= k ? inputs[z + 1] : inputs[z];
45 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 45, __extension__ __PRETTY_FUNCTION__
); }))
;
46 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 46, __extension__ __PRETTY_FUNCTION__
); }))
;
47 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 47, __extension__ __PRETTY_FUNCTION__
); }))
;
48 ccv_nnc_tensor_view_get_dim(a, dim);
49 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 49, __extension__ __PRETTY_FUNCTION__
); }))
;
50 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 50, __extension__ __PRETTY_FUNCTION__
); }))
;
51 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
52 {
53 // Super optimal case, just do one for-loop for sum.
54 const int tensor_count = ccv_nnc_tensor_count(a->info);
55 for (x = 0; x < tensor_count; x++)
56 c->data.f32[x] = a->data.f32[x] + b->data.f32[x];
57 continue;
58 }
59 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 59, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
60 ccv_nnc_tensor_view_get_stride(a, astride);
61 ccv_nnc_tensor_view_get_stride(b, bstride);
62 ccv_nnc_tensor_view_get_stride(c, cstride);
63 int i[CCV_NNC_MAX_DIM(2) + 2];
64 float* const ap = a->data.f32;
65 float* const bp = b->data.f32;
66 float* const cp = c->data.f32;
67 const int count = dim[2] * dim[3];
68 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && astride[3] == 1 && bstride[3] == 1 && cstride[3] == 1)
69 {
70 // Special casing if the ainc[3] is the same as dim[3] (do memcpy for the last two dim)
71 for (i[0] = 0; i[0] < dim[0]; i[0]++)
72 {
73 float* ap0 = ap + i[0] * astride[0];
74 float* bp0 = bp + i[0] * bstride[0];
75 float* cp0 = cp + i[0] * cstride[0];
76 for (i[1] = 0; i[1] < dim[1]; i[1]++)
77 {
78 for (x = 0; x < count; x++)
79 cp0[x] = ap0[x] + bp0[x];
80 ap0 += astride[1];
81 bp0 += bstride[1];
82 cp0 += cstride[1];
83 }
84 }
85 continue;
86 }
87 // Non-optimal case, need to do skip copy.
88 for (i[0] = 0; i[0] < dim[0]; i[0]++)
89 {
90 float* const ap0 = ap + i[0] * astride[0];
91 float* const bp0 = bp + i[0] * bstride[0];
92 float* const cp0 = cp + i[0] * cstride[0];
93 for (i[1] = 0; i[1] < dim[1]; i[1]++)
94 {
95 float* ap1 = ap0 + i[1] * astride[1];
96 float* bp1 = bp0 + i[1] * bstride[1];
97 float* cp1 = cp0 + i[1] * cstride[1];
98 for (i[2] = 0; i[2] < dim[2]; i[2]++)
99 {
100 for (x = 0; x < dim[3]; x++)
101 cp1[x * cstride[3]] = ap1[x * astride[3]] + bp1[x * bstride[3]];
102 ap1 += astride[2];
103 bp1 += bstride[2];
104 cp1 += cstride[2];
105 }
106 }
107 }
108 }
109}
110
111void _ccv_nnc_ewsum_forw_cpu_ref_i32(ccv_nnc_tensor_view_t* const* const inputs, const int input_size, ccv_nnc_tensor_view_t* const* const outputs, const int output_size)
112{
113 if (input_size == 1 && output_size == 1)
114 {
115 _ccv_nnc_tensor_transfer_cpu_ref_f32(inputs[0], outputs[0]);
116 return;
117 }
118 // Assuming this is float 32.
119 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
120 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
121 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
122 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
123 int x, z;
124 int k = 0;
125 // Bad, I promised this can be inplace operation. Need to first find out if there are share the same pointer first.
126 for (z = 1; z < input_size; z++)
127 {
128 ccv_nnc_tensor_view_t* c = outputs[0];
129 ccv_nnc_tensor_view_t* a = inputs[z];
130 if (c->data.f32 == a->data.f32)
131 {
132 k = z;
133 break;
134 }
135 }
136 for (z = 0; z < input_size - 1; z++)
137 {
138 ccv_nnc_tensor_view_t* c = outputs[0];
139 ccv_nnc_tensor_view_t* a = z > 0 ? c : inputs[k];
140 ccv_nnc_tensor_view_t* b = z >= k ? inputs[z + 1] : inputs[z];
141 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 141, __extension__ __PRETTY_FUNCTION__
); }))
;
142 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 142, __extension__ __PRETTY_FUNCTION__
); }))
;
143 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 143, __extension__ __PRETTY_FUNCTION__
); }))
;
144 ccv_nnc_tensor_view_get_dim(a, dim);
145 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 145, __extension__ __PRETTY_FUNCTION__
); }))
;
146 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 146, __extension__ __PRETTY_FUNCTION__
); }))
;
147 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
148 {
149 // Super optimal case, just do one for-loop for sum.
150 const int tensor_count = ccv_nnc_tensor_count(a->info);
151 for (x = 0; x < tensor_count; x++)
152 c->data.f32[x] = a->data.f32[x] + b->data.f32[x];
153 continue;
154 }
155 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 155, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
156 ccv_nnc_tensor_view_get_stride(a, astride);
157 ccv_nnc_tensor_view_get_stride(b, bstride);
158 ccv_nnc_tensor_view_get_stride(c, cstride);
159 int i[CCV_NNC_MAX_DIM(2) + 2];
160 int* const ap = a->data.i32;
161 int* const bp = b->data.i32;
162 int* const cp = c->data.i32;
163 const int count = dim[2] * dim[3];
164 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && astride[3] == 1 && bstride[3] == 1 && cstride[3] == 1)
165 {
166 // Special casing if the ainc[3] is the same as dim[3] (do memcpy for the last two dim)
167 for (i[0] = 0; i[0] < dim[0]; i[0]++)
168 {
169 int* ap0 = ap + i[0] * astride[0];
170 int* bp0 = bp + i[0] * bstride[0];
171 int* cp0 = cp + i[0] * cstride[0];
172 for (i[1] = 0; i[1] < dim[1]; i[1]++)
173 {
174 for (x = 0; x < count; x++)
175 cp0[x] = ap0[x] + bp0[x];
176 ap0 += astride[1];
177 bp0 += bstride[1];
178 cp0 += cstride[1];
179 }
180 }
181 continue;
182 }
183 // Non-optimal case, need to do skip copy.
184 for (i[0] = 0; i[0] < dim[0]; i[0]++)
185 {
186 int* const ap0 = ap + i[0] * astride[0];
187 int* const bp0 = bp + i[0] * bstride[0];
188 int* const cp0 = cp + i[0] * cstride[0];
189 for (i[1] = 0; i[1] < dim[1]; i[1]++)
190 {
191 int* ap1 = ap0 + i[1] * astride[1];
192 int* bp1 = bp0 + i[1] * bstride[1];
193 int* cp1 = cp0 + i[1] * cstride[1];
194 for (i[2] = 0; i[2] < dim[2]; i[2]++)
195 {
196 for (x = 0; x < dim[3]; x++)
197 cp1[x * cstride[3]] = ap1[x * astride[3]] + bp1[x * bstride[3]];
198 ap1 += astride[2];
199 bp1 += bstride[2];
200 cp1 += cstride[2];
201 }
202 }
203 }
204 }
205}
206
207static int _ccv_nnc_ewsum_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
208{
209 if (outputs[0]->info.datatype == CCV_32S)
210 _ccv_nnc_ewsum_forw_cpu_ref_i32((ccv_nnc_tensor_view_t**)inputs, input_size, (ccv_nnc_tensor_view_t**)outputs, output_size);
211 else
212 _ccv_nnc_ewsum_forw_cpu_ref_f32((ccv_nnc_tensor_view_t**)inputs, input_size, (ccv_nnc_tensor_view_t**)outputs, output_size);
213 return CCV_NNC_EXEC_SUCCESS;
214}
215
216static int _ccv_nnc_ewsum_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
217{
218 // D[x + y + z, x] = 1
219 int i;
220 if (inputs[0] == 0)
221 {
222 // Set them to 1.
223 for (i = 0; i < output_size; i++)
224 if (outputs[i])
225 _ccv_nnc_tensor_set_cpu_ref_f32((ccv_nnc_tensor_view_t*)outputs[i], 1);
226 } else {
227 // Copy over the gradient (If they are not pointing to the same tensor already).
228 for (i = 0; i < output_size; i++)
229 if (outputs[i] && inputs[0]->data.f32 != outputs[i]->data.f32)
230 _ccv_nnc_tensor_transfer_cpu_ref_f32((ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)outputs[i]);
231 }
232 return CCV_NNC_EXEC_SUCCESS;
233}
234
235void _ccv_nnc_ewprod_forw_cpu_ref(ccv_nnc_tensor_view_t* const* const inputs, const int input_size, ccv_nnc_tensor_view_t* const* const outputs, const int output_size)
236{
237 if (input_size == 1 && output_size == 1)
238 {
239 _ccv_nnc_tensor_transfer_cpu_ref_f32(inputs[0], outputs[0]);
240 return;
241 }
242 // Assuming this is float 32.
243 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
244 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
245 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
246 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
247 int x, z;
248 int k = 0;
249 // Bad, I promised this can be inplace operation. Need to first find out if there are share the same pointer first.
250 for (z = 1; z < input_size; z++)
251 {
252 ccv_nnc_tensor_view_t* c = outputs[0];
253 ccv_nnc_tensor_view_t* a = inputs[z];
254 if (c->data.f32 == a->data.f32)
255 {
256 k = z;
257 break;
258 }
259 }
260 for (z = 0; z < input_size - 1; z++)
261 {
262 ccv_nnc_tensor_view_t* c = outputs[0];
263 ccv_nnc_tensor_view_t* a = z > 0 ? c : inputs[k];
264 ccv_nnc_tensor_view_t* b = z >= k ? inputs[z + 1] : inputs[z];
265 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 265, __extension__ __PRETTY_FUNCTION__
); }))
;
266 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 266, __extension__ __PRETTY_FUNCTION__
); }))
;
267 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 267, __extension__ __PRETTY_FUNCTION__
); }))
;
268 ccv_nnc_tensor_view_get_dim(a, dim);
269 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 269, __extension__ __PRETTY_FUNCTION__
); }))
;
270 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 270, __extension__ __PRETTY_FUNCTION__
); }))
;
271 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
272 {
273 // Super optimal case, just do one for-loop for sum.
274 const int tensor_count = ccv_nnc_tensor_count(a->info);
275 for (x = 0; x < tensor_count; x++)
276 c->data.f32[x] = a->data.f32[x] * b->data.f32[x];
277 continue;
278 }
279 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 279, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
280 ccv_nnc_tensor_view_get_stride(a, astride);
281 ccv_nnc_tensor_view_get_stride(b, bstride);
282 ccv_nnc_tensor_view_get_stride(c, cstride);
283 int i[CCV_NNC_MAX_DIM(2) + 2];
284 float* const ap = a->data.f32;
285 float* const bp = b->data.f32;
286 float* const cp = c->data.f32;
287 const int count = dim[2] * dim[3];
288 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3])
289 {
290 // Special casing if the ainc[3] is the same as dim[3]
291 for (i[0] = 0; i[0] < dim[0]; i[0]++)
292 {
293 float* ap0 = ap + i[0] * astride[0];
294 float* bp0 = bp + i[0] * bstride[0];
295 float* cp0 = cp + i[0] * cstride[0];
296 for (i[1] = 0; i[1] < dim[1]; i[1]++)
297 {
298 for (x = 0; x < count; x++)
299 cp0[x] = ap0[x] * bp0[x];
300 ap0 += astride[1];
301 bp0 += bstride[1];
302 cp0 += cstride[1];
303 }
304 }
305 continue;
306 }
307 // Non-optimal case, need to do skip copy.
308 for (i[0] = 0; i[0] < dim[0]; i[0]++)
309 {
310 float* const ap0 = ap + i[0] * astride[0];
311 float* const bp0 = bp + i[0] * bstride[0];
312 float* const cp0 = cp + i[0] * cstride[0];
313 for (i[1] = 0; i[1] < dim[1]; i[1]++)
314 {
315 float* ap1 = ap0 + i[1] * astride[1];
316 float* bp1 = bp0 + i[1] * bstride[1];
317 float* cp1 = cp0 + i[1] * cstride[1];
318 for (i[2] = 0; i[2] < dim[2]; i[2]++)
319 {
320 for (x = 0; x < dim[3]; x++)
321 cp1[x] = ap1[x] * bp1[x];
322 ap1 += astride[2];
323 bp1 += bstride[2];
324 cp1 += cstride[2];
325 }
326 }
327 }
328 }
329}
330
331static int _ccv_nnc_ewprod_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
332{
333 _ccv_nnc_ewprod_forw_cpu_ref((ccv_nnc_tensor_view_t**)inputs, input_size, (ccv_nnc_tensor_view_t**)outputs, output_size);
334 return CCV_NNC_EXEC_SUCCESS;
335}
336
337static int _ccv_nnc_ewprod_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
338{
339 // D[x * y * z, x] = y * z
340 // Assuming this is float 32.
341 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
342 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
343 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
344 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
345 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
346 int x, z;
347 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
348 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[output_size + 1];
349 if (g == 0)
350 {
351 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 351, __extension__ __PRETTY_FUNCTION__
); }))
;
352 ccv_nnc_tensor_view_get_dim(b, dim);
353 ccv_nnc_tensor_view_get_stride(b, bstride);
354 for (z = 0; z < output_size; z++)
355 {
356 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[z + 1];
357 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[z];
358 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 358, __extension__ __PRETTY_FUNCTION__
); }))
;
359 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 359, __extension__ __PRETTY_FUNCTION__
); }))
;
360 assert(ccv_nnc_tensor_view_check_dim(a, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(a, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(a, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(a, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 360, __extension__ __PRETTY_FUNCTION__
); }))
;
361 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 361, __extension__ __PRETTY_FUNCTION__
); }))
;
362 ccv_nnc_tensor_view_get_stride(a, astride);
363 ccv_nnc_tensor_view_get_stride(h, hstride);
364 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
365 {
366 // Super optimal case, just do one for-loop for sum.
367 const int tensor_count = ccv_nnc_tensor_count(b->info);
368 for (x = 0; x < tensor_count; x++)
369 h->data.f32[x] = b->data.f32[x] / a->data.f32[x];
370 continue;
371 }
372 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 372, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
373 int i[CCV_NNC_MAX_DIM(2) + 2];
374 float* const ap = a->data.f32;
375 float* const bp = b->data.f32;
376 float* const hp = h->data.f32;
377 const int count = dim[2] * dim[3];
378 if (astride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
379 {
380 // Special casing if the ainc[3] is the same as dim[3]
381 for (i[0] = 0; i[0] < dim[0]; i[0]++)
382 {
383 float* ap0 = ap + i[0] * astride[0];
384 float* bp0 = bp + i[0] * bstride[0];
385 float* hp0 = hp + i[0] * hstride[0];
386 for (i[1] = 0; i[1] < dim[1]; i[1]++)
387 {
388 for (x = 0; x < count; x++)
389 hp0[x] = bp0[x] / ap0[x];
390 ap0 += astride[1];
391 bp0 += bstride[1];
392 hp0 += hstride[1];
393 }
394 }
395 continue;
396 }
397 // Non-optimal case, need to do skip copy.
398 for (i[0] = 0; i[0] < dim[0]; i[0]++)
399 {
400 float* const ap0 = ap + i[0] * astride[0];
401 float* const bp0 = bp + i[0] * bstride[0];
402 float* const hp0 = hp + i[0] * hstride[0];
403 for (i[1] = 0; i[1] < dim[1]; i[1]++)
404 {
405 float* ap1 = ap0 + i[1] * astride[1];
406 float* bp1 = bp0 + i[1] * bstride[1];
407 float* hp1 = hp0 + i[1] * hstride[1];
408 for (i[2] = 0; i[2] < dim[2]; i[2]++)
409 {
410 for (x = 0; x < dim[3]; x++)
411 hp1[x] = bp1[x] / ap1[x];
412 ap1 += astride[2];
413 bp1 += bstride[2];
414 hp1 += hstride[2];
415 }
416 }
417 }
418 }
419 } else {
420 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 420, __extension__ __PRETTY_FUNCTION__
); }))
;
421 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 421, __extension__ __PRETTY_FUNCTION__
); }))
;
422 ccv_nnc_tensor_view_get_dim(b, dim);
423 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 423, __extension__ __PRETTY_FUNCTION__
); }))
;
424 ccv_nnc_tensor_view_get_stride(b, bstride);
425 ccv_nnc_tensor_view_get_stride(g, gstride);
426 for (z = 0; z < output_size; z++)
427 {
428 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[z + 1];
429 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[z];
430 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 430, __extension__ __PRETTY_FUNCTION__
); }))
;
431 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 431, __extension__ __PRETTY_FUNCTION__
); }))
;
432 assert(ccv_nnc_tensor_view_check_dim(a, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(a, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(a, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(a, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 432, __extension__ __PRETTY_FUNCTION__
); }))
;
433 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 433, __extension__ __PRETTY_FUNCTION__
); }))
;
434 ccv_nnc_tensor_view_get_stride(a, astride);
435 ccv_nnc_tensor_view_get_stride(h, hstride);
436 if (!CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
437 {
438 // Super optimal case, just do one for-loop for sum.
439 const int tensor_count = ccv_nnc_tensor_count(g->info);
440 for (x = 0; x < tensor_count; x++)
441 h->data.f32[x] = g->data.f32[x] * b->data.f32[x] / a->data.f32[x];
442 continue;
443 }
444 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 444, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
445 int i[CCV_NNC_MAX_DIM(2) + 2];
446 float* const gp = g->data.f32;
447 float* const ap = a->data.f32;
448 float* const bp = b->data.f32;
449 float* const hp = h->data.f32;
450 const int count = dim[2] * dim[3];
451 if (gstride[2] == dim[3] && astride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
452 {
453 // Special casing if the ainc[3] is the same as dim[3]
454 for (i[0] = 0; i[0] < dim[0]; i[0]++)
455 {
456 float* gp0 = gp + i[0] * gstride[0];
457 float* ap0 = ap + i[0] * astride[0];
458 float* bp0 = bp + i[0] * bstride[0];
459 float* hp0 = hp + i[0] * hstride[0];
460 for (i[1] = 0; i[1] < dim[1]; i[1]++)
461 {
462 for (x = 0; x < count; x++)
463 hp0[x] = gp0[x] * bp0[x] / ap0[x];
464 gp0 += gstride[1];
465 ap0 += astride[1];
466 bp0 += bstride[1];
467 hp0 += hstride[1];
468 }
469 }
470 continue;
471 }
472 // Non-optimal case, need to do skip copy.
473 for (i[0] = 0; i[0] < dim[0]; i[0]++)
474 {
475 float* const gp0 = gp + i[0] * gstride[0];
476 float* const ap0 = ap + i[0] * astride[0];
477 float* const bp0 = bp + i[0] * bstride[0];
478 float* const hp0 = hp + i[0] * hstride[0];
479 for (i[1] = 0; i[1] < dim[1]; i[1]++)
480 {
481 float* gp1 = gp0 + i[1] * gstride[1];
482 float* ap1 = ap0 + i[1] * astride[1];
483 float* bp1 = bp0 + i[1] * bstride[1];
484 float* hp1 = hp0 + i[1] * hstride[1];
485 for (i[2] = 0; i[2] < dim[2]; i[2]++)
486 {
487 for (x = 0; x < dim[3]; x++)
488 hp1[x] = gp1[x] * bp1[x] / ap1[x];
489 gp1 += gstride[2];
490 ap1 += astride[2];
491 bp1 += bstride[2];
492 hp1 += hstride[2];
493 }
494 }
495 }
496 }
497 }
498 return CCV_NNC_EXEC_SUCCESS;
499}
500
501static void _ccv_nnc_ewdiv_forw_cpu_ref(const float p, ccv_nnc_tensor_view_t* const a, ccv_nnc_tensor_view_t* const b, ccv_nnc_tensor_view_t* const c)
502{
503 // Assuming this is float 32.
504 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
505 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
506 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
507 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
508 if (a == 0) // Take 0 as all ones tensor.
509 {
510 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 510, __extension__ __PRETTY_FUNCTION__
); }))
;
511 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 511, __extension__ __PRETTY_FUNCTION__
); }))
;
512 ccv_nnc_tensor_view_get_dim(b, dim);
513 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 513, __extension__ __PRETTY_FUNCTION__
); }))
;
514 int x;
515 if (!CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
516 {
517 // Super optimal case, just do one for-loop for sum.
518 const int tensor_count = ccv_nnc_tensor_count(b->info);
519 for (x = 0; x < tensor_count; x++)
520 c->data.f32[x] = p / b->data.f32[x];
521 return;
522 }
523 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 523, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
524 ccv_nnc_tensor_view_get_stride(b, bstride);
525 ccv_nnc_tensor_view_get_stride(c, cstride);
526 int i[CCV_NNC_MAX_DIM(2) + 2];
527 float* const bp = b->data.f32;
528 float* const cp = c->data.f32;
529 const int count = dim[2] * dim[3];
530 if (bstride[2] == dim[3] && cstride[2] == dim[3])
531 {
532 // Special casing if the ainc[3] is the same as dim[3]
533 for (i[0] = 0; i[0] < dim[0]; i[0]++)
534 {
535 float* bp0 = bp + i[0] * bstride[0];
536 float* cp0 = cp + i[0] * cstride[0];
537 for (i[1] = 0; i[1] < dim[1]; i[1]++)
538 {
539 for (x = 0; x < count; x++)
540 cp0[x] = p / bp0[x];
541 bp0 += bstride[1];
542 cp0 += cstride[1];
543 }
544 }
545 return;
546 }
547 // Non-optimal case, need to do skip copy.
548 for (i[0] = 0; i[0] < dim[0]; i[0]++)
549 {
550 float* const bp0 = bp + i[0] * bstride[0];
551 float* const cp0 = cp + i[0] * cstride[0];
552 for (i[1] = 0; i[1] < dim[1]; i[1]++)
553 {
554 float* bp1 = bp0 + i[1] * bstride[1];
555 float* cp1 = cp0 + i[1] * cstride[1];
556 for (i[2] = 0; i[2] < dim[2]; i[2]++)
557 {
558 for (x = 0; x < dim[3]; x++)
559 cp1[x] = p / bp1[x];
560 bp1 += bstride[2];
561 cp1 += cstride[2];
562 }
563 }
564 }
565 } else {
566 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 566, __extension__ __PRETTY_FUNCTION__
); }))
;
567 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 567, __extension__ __PRETTY_FUNCTION__
); }))
;
568 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 568, __extension__ __PRETTY_FUNCTION__
); }))
;
569 ccv_nnc_tensor_view_get_dim(a, dim);
570 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 570, __extension__ __PRETTY_FUNCTION__
); }))
;
571 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 571, __extension__ __PRETTY_FUNCTION__
); }))
;
572 int x;
573 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
574 {
575 // Super optimal case, just do one for-loop for sum.
576 const int tensor_count = ccv_nnc_tensor_count(a->info);
577 for (x = 0; x < tensor_count; x++)
578 c->data.f32[x] = p * a->data.f32[x] / b->data.f32[x];
579 return;
580 }
581 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 581, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
582 ccv_nnc_tensor_view_get_stride(a, astride);
583 ccv_nnc_tensor_view_get_stride(b, bstride);
584 ccv_nnc_tensor_view_get_stride(c, cstride);
585 int i[CCV_NNC_MAX_DIM(2) + 2];
586 float* const ap = a->data.f32;
587 float* const bp = b->data.f32;
588 float* const cp = c->data.f32;
589 const int count = dim[2] * dim[3];
590 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3])
591 {
592 // Special casing if the ainc[3] is the same as dim[3]
593 for (i[0] = 0; i[0] < dim[0]; i[0]++)
594 {
595 float* ap0 = ap + i[0] * astride[0];
596 float* bp0 = bp + i[0] * bstride[0];
597 float* cp0 = cp + i[0] * cstride[0];
598 for (i[1] = 0; i[1] < dim[1]; i[1]++)
599 {
600 for (x = 0; x < count; x++)
601 cp0[x] = p * ap0[x] / bp0[x];
602 ap0 += astride[1];
603 bp0 += bstride[1];
604 cp0 += cstride[1];
605 }
606 }
607 return;
608 }
609 // Non-optimal case, need to do skip copy.
610 for (i[0] = 0; i[0] < dim[0]; i[0]++)
611 {
612 float* const ap0 = ap + i[0] * astride[0];
613 float* const bp0 = bp + i[0] * bstride[0];
614 float* const cp0 = cp + i[0] * cstride[0];
615 for (i[1] = 0; i[1] < dim[1]; i[1]++)
616 {
617 float* ap1 = ap0 + i[1] * astride[1];
618 float* bp1 = bp0 + i[1] * bstride[1];
619 float* cp1 = cp0 + i[1] * cstride[1];
620 for (i[2] = 0; i[2] < dim[2]; i[2]++)
621 {
622 for (x = 0; x < dim[3]; x++)
623 cp1[x] = p * ap1[x] / bp1[x];
624 ap1 += astride[2];
625 bp1 += bstride[2];
626 cp1 += cstride[2];
627 }
628 }
629 }
630 }
631}
632
633static int _ccv_nnc_ewdiv_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
634{
635 _ccv_nnc_ewdiv_forw_cpu_ref(1, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[1], (ccv_nnc_tensor_view_t*)outputs[0]);
636 return CCV_NNC_EXEC_SUCCESS;
637}
638
639static int _ccv_nnc_ewdiv_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
640{
641 // D[x / y, x] = 1 / y, D[x / y, y] = -x / y^2
642 if (output_size == 1 || outputs[1] == 0)
643 {
644 // When we only need D[x / y, x]
645 _ccv_nnc_ewdiv_forw_cpu_ref(1, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[2], (ccv_nnc_tensor_view_t*)outputs[0]);
646 return CCV_NNC_EXEC_SUCCESS;
647 }
648 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
649 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
650 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
651 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
652 int hastride[CCV_NNC_MAX_DIM_ALLOC(12)];
653 int hbstride[CCV_NNC_MAX_DIM_ALLOC(12)];
654 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
655 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2];
656 ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)inputs[3];
657 ccv_nnc_tensor_view_t* ha = (ccv_nnc_tensor_view_t*)outputs[0];
658 ccv_nnc_tensor_view_t* hb = (ccv_nnc_tensor_view_t*)outputs[1];
659 if (g == 0)
660 {
661 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 661, __extension__ __PRETTY_FUNCTION__
); }))
;
662 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 662, __extension__ __PRETTY_FUNCTION__
); }))
;
663 assert(ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(hb->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(hb->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 663, __extension__ __PRETTY_FUNCTION__
); }))
;
664 ccv_nnc_tensor_view_get_dim(b, dim);
665 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 665, __extension__ __PRETTY_FUNCTION__
); }))
;
666 assert(ccv_nnc_tensor_view_check_dim(hb, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(hb, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(hb, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(hb, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 666, __extension__ __PRETTY_FUNCTION__
); }))
;
667 if (ha)
668 {
669 assert(ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(ha->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(ha->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 669, __extension__ __PRETTY_FUNCTION__
); }))
;
670 assert(ccv_nnc_tensor_view_check_dim(ha, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(ha, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(ha, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(ha, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 670, __extension__ __PRETTY_FUNCTION__
); }))
;
671 }
672 int x;
673 if (!CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW) && (ha == 0 || !CCV_IS_TENSOR_VIEW(ha)((*(int*)(ha)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(hb)((*(int*)(hb)) & CCV_TENSOR_VIEW))
674 {
675 // Super optimal case, just do one for-loop for sum.
676 const int tensor_count = ccv_nnc_tensor_count(b->info);
677 if (ha == 0)
678 {
679 for (x = 0; x < tensor_count; x++)
680 {
681 const float v = 1 / b->data.f32[x];
682 hb->data.f32[x] = -c->data.f32[x] * v;
683 }
684 } else {
685 for (x = 0; x < tensor_count; x++)
686 {
687 const float v = 1 / b->data.f32[x];
688 ha->data.f32[x] = v;
689 hb->data.f32[x] = -c->data.f32[x] * v;
690 }
691 }
692 return CCV_NNC_EXEC_SUCCESS;
693 }
694 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 694, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
695 ccv_nnc_tensor_view_get_stride(b, bstride);
696 ccv_nnc_tensor_view_get_stride(c, cstride);
697 ccv_nnc_tensor_view_get_stride(hb, hbstride);
698 int i[CCV_NNC_MAX_DIM(2) + 2];
699 float* const bp = b->data.f32;
700 float* const cp = c->data.f32;
701 float* const hbp = hb->data.f32;
702 const int count = dim[2] * dim[3];
703 if (ha == 0)
704 {
705 if (bstride[2] == dim[3] && cstride[2] == dim[3] && hbstride[2] == dim[3])
706 {
707 // Special casing if the ainc[3] is the same as dim[3]
708 for (i[0] = 0; i[0] < dim[0]; i[0]++)
709 {
710 float* bp0 = bp + i[0] * bstride[0];
711 float* cp0 = cp + i[0] * cstride[0];
712 float* hbp0 = hbp + i[0] * hbstride[0];
713 for (i[1] = 0; i[1] < dim[1]; i[1]++)
714 {
715 for (x = 0; x < count; x++)
716 {
717 const float v = 1 / bp0[x];
718 hbp0[x] = -cp0[x] * v;
719 }
720 bp0 += bstride[1];
721 cp0 += cstride[1];
722 hbp0 += hbstride[1];
723 }
724 }
725 return CCV_NNC_EXEC_SUCCESS;
726 }
727 // Non-optimal case, need to do skip copy.
728 for (i[0] = 0; i[0] < dim[0]; i[0]++)
729 {
730 float* const bp0 = bp + i[0] * bstride[0];
731 float* const cp0 = cp + i[0] * cstride[0];
732 float* const hbp0 = hbp + i[0] * hbstride[0];
733 for (i[1] = 0; i[1] < dim[1]; i[1]++)
734 {
735 float* bp1 = bp0 + i[1] * bstride[1];
736 float* cp1 = cp0 + i[1] * cstride[1];
737 float* hbp1 = hbp0 + i[1] * hbstride[1];
738 for (i[2] = 0; i[2] < dim[2]; i[2]++)
739 {
740 for (x = 0; x < dim[3]; x++)
741 {
742 const float v = 1 / bp1[x];
743 hbp1[x] = -cp1[x] * v;
744 }
745 bp1 += bstride[2];
746 cp1 += cstride[2];
747 hbp1 += hbstride[2];
748 }
749 }
750 }
751 } else {
752 float* const hap = ha->data.f32;
753 ccv_nnc_tensor_view_get_stride(ha, hastride);
754 if (bstride[2] == dim[3] && cstride[2] == dim[3] && hastride[2] == dim[3] && hbstride[2] == dim[3])
755 {
756 // Special casing if the ainc[3] is the same as dim[3]
757 for (i[0] = 0; i[0] < dim[0]; i[0]++)
758 {
759 float* bp0 = bp + i[0] * bstride[0];
760 float* cp0 = cp + i[0] * cstride[0];
761 float* hap0 = hap + i[0] * hastride[0];
762 float* hbp0 = hbp + i[0] * hbstride[0];
763 for (i[1] = 0; i[1] < dim[1]; i[1]++)
764 {
765 for (x = 0; x < count; x++)
766 {
767 const float v = 1 / bp0[x];
768 hap0[x] = v;
769 hbp0[x] = -cp0[x] * v;
770 }
771 bp0 += bstride[1];
772 cp0 += cstride[1];
773 hap0 += hastride[1];
774 hbp0 += hbstride[1];
775 }
776 }
777 return CCV_NNC_EXEC_SUCCESS;
778 }
779 // Non-optimal case, need to do skip copy.
780 for (i[0] = 0; i[0] < dim[0]; i[0]++)
781 {
782 float* const bp0 = bp + i[0] * bstride[0];
783 float* const cp0 = cp + i[0] * cstride[0];
784 float* const hap0 = hap + i[0] * hastride[0];
785 float* const hbp0 = hbp + i[0] * hbstride[0];
786 for (i[1] = 0; i[1] < dim[1]; i[1]++)
787 {
788 float* bp1 = bp0 + i[1] * bstride[1];
789 float* cp1 = cp0 + i[1] * cstride[1];
790 float* hap1 = hap0 + i[1] * hastride[1];
791 float* hbp1 = hbp0 + i[1] * hbstride[1];
792 for (i[2] = 0; i[2] < dim[2]; i[2]++)
793 {
794 for (x = 0; x < dim[3]; x++)
795 {
796 const float v = 1 / bp1[x];
797 hap1[x] = v;
798 hbp1[x] = -cp1[x] * v;
799 }
800 bp1 += bstride[2];
801 cp1 += cstride[2];
802 hap1 += hastride[2];
803 hbp1 += hbstride[2];
804 }
805 }
806 }
807 }
808 } else {
809 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 809, __extension__ __PRETTY_FUNCTION__
); }))
;
810 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 810, __extension__ __PRETTY_FUNCTION__
); }))
;
811 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 811, __extension__ __PRETTY_FUNCTION__
); }))
;
812 assert(ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(hb->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(hb->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 812, __extension__ __PRETTY_FUNCTION__
); }))
;
813 ccv_nnc_tensor_view_get_dim(b, dim);
814 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 814, __extension__ __PRETTY_FUNCTION__
); }))
;
815 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 815, __extension__ __PRETTY_FUNCTION__
); }))
;
816 assert(ccv_nnc_tensor_view_check_dim(hb, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(hb, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(hb, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(hb, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 816, __extension__ __PRETTY_FUNCTION__
); }))
;
817 if (ha)
818 {
819 assert(ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(ha->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(ha->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 819, __extension__ __PRETTY_FUNCTION__
); }))
;
820 assert(ccv_nnc_tensor_view_check_dim(ha, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(ha, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(ha, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(ha, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 820, __extension__ __PRETTY_FUNCTION__
); }))
;
821 }
822 int x;
823 if (!CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW) && (ha == 0 || !CCV_IS_TENSOR_VIEW(ha)((*(int*)(ha)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(hb)((*(int*)(hb)) & CCV_TENSOR_VIEW))
824 {
825 // Super optimal case, just do one for-loop for sum.
826 const int tensor_count = ccv_nnc_tensor_count(g->info);
827 if (ha == 0)
828 {
829 for (x = 0; x < tensor_count; x++)
830 {
831 const float v = g->data.f32[x] / b->data.f32[x];
832 hb->data.f32[x] = -c->data.f32[x] * v;
833 }
834 } else {
835 for (x = 0; x < tensor_count; x++)
836 {
837 const float v = g->data.f32[x] / b->data.f32[x];
838 ha->data.f32[x] = v;
839 hb->data.f32[x] = -c->data.f32[x] * v;
840 }
841 }
842 return CCV_NNC_EXEC_SUCCESS;
843 }
844 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 844, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
845 ccv_nnc_tensor_view_get_stride(g, gstride);
846 ccv_nnc_tensor_view_get_stride(b, bstride);
847 ccv_nnc_tensor_view_get_stride(c, cstride);
848 ccv_nnc_tensor_view_get_stride(hb, hbstride);
849 int i[CCV_NNC_MAX_DIM(2) + 2];
850 float* const gp = g->data.f32;
851 float* const bp = b->data.f32;
852 float* const cp = c->data.f32;
853 float* const hbp = hb->data.f32;
854 const int count = dim[2] * dim[3];
855 if (ha == 0)
856 {
857 if (gstride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && hbstride[2] == dim[3])
858 {
859 // Special casing if the ainc[3] is the same as dim[3]
860 for (i[0] = 0; i[0] < dim[0]; i[0]++)
861 {
862 float* gp0 = gp + i[0] * gstride[0];
863 float* bp0 = bp + i[0] * bstride[0];
864 float* cp0 = cp + i[0] * cstride[0];
865 float* hbp0 = hbp + i[0] * hbstride[0];
866 for (i[1] = 0; i[1] < dim[1]; i[1]++)
867 {
868 for (x = 0; x < count; x++)
869 {
870 const float v = gp0[x] / bp0[x];
871 hbp0[x] = -cp0[x] * v;
872 }
873 gp0 += gstride[1];
874 bp0 += bstride[1];
875 cp0 += cstride[1];
876 hbp0 += hbstride[1];
877 }
878 }
879 return CCV_NNC_EXEC_SUCCESS;
880 }
881 // Non-optimal case, need to do skip copy.
882 for (i[0] = 0; i[0] < dim[0]; i[0]++)
883 {
884 float* const gp0 = gp + i[0] * gstride[0];
885 float* const bp0 = bp + i[0] * bstride[0];
886 float* const cp0 = cp + i[0] * cstride[0];
887 float* const hbp0 = hbp + i[0] * hbstride[0];
888 for (i[1] = 0; i[1] < dim[1]; i[1]++)
889 {
890 float* gp1 = gp0 + i[1] * gstride[1];
891 float* bp1 = bp0 + i[1] * bstride[1];
892 float* cp1 = cp0 + i[1] * cstride[1];
893 float* hbp1 = hbp0 + i[1] * hbstride[1];
894 for (i[2] = 0; i[2] < dim[2]; i[2]++)
895 {
896 for (x = 0; x < dim[3]; x++)
897 {
898 const float v = gp1[x] / bp1[x];
899 hbp1[x] = -cp1[x] * v;
900 }
901 gp1 += gstride[2];
902 bp1 += bstride[2];
903 cp1 += cstride[2];
904 hbp1 += hbstride[2];
905 }
906 }
907 }
908 } else {
909 ccv_nnc_tensor_view_get_stride(ha, hastride);
910 float* const hap = ha->data.f32;
911 if (gstride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && hastride[2] == dim[3] && hbstride[2] == dim[3])
912 {
913 // Special casing if the ainc[3] is the same as dim[3]
914 for (i[0] = 0; i[0] < dim[0]; i[0]++)
915 {
916 float* gp0 = gp + i[0] * gstride[0];
917 float* bp0 = bp + i[0] * bstride[0];
918 float* cp0 = cp + i[0] * cstride[0];
919 float* hap0 = hap + i[0] * hastride[0];
920 float* hbp0 = hbp + i[0] * hbstride[0];
921 for (i[1] = 0; i[1] < dim[1]; i[1]++)
922 {
923 for (x = 0; x < count; x++)
924 {
925 const float v = gp0[x] / bp0[x];
926 hap0[x] = v;
927 hbp0[x] = -cp0[x] * v;
928 }
929 gp0 += gstride[1];
930 bp0 += bstride[1];
931 cp0 += cstride[1];
932 hap0 += hastride[1];
933 hbp0 += hbstride[1];
934 }
935 }
936 return CCV_NNC_EXEC_SUCCESS;
937 }
938 // Non-optimal case, need to do skip copy.
939 for (i[0] = 0; i[0] < dim[0]; i[0]++)
940 {
941 float* const gp0 = gp + i[0] * gstride[0];
942 float* const bp0 = bp + i[0] * bstride[0];
943 float* const cp0 = cp + i[0] * cstride[0];
944 float* const hap0 = hap + i[0] * hastride[0];
945 float* const hbp0 = hbp + i[0] * hbstride[0];
946 for (i[1] = 0; i[1] < dim[1]; i[1]++)
947 {
948 float* gp1 = gp0 + i[1] * gstride[1];
949 float* bp1 = bp0 + i[1] * bstride[1];
950 float* cp1 = cp0 + i[1] * cstride[1];
951 float* hap1 = hap0 + i[1] * hastride[1];
952 float* hbp1 = hbp0 + i[1] * hbstride[1];
953 for (i[2] = 0; i[2] < dim[2]; i[2]++)
954 {
955 for (x = 0; x < dim[3]; x++)
956 {
957 const float v = gp1[x] / bp1[x];
958 hap1[x] = v;
959 hbp1[x] = -cp1[x] * v;
960 }
961 gp1 += gstride[2];
962 bp1 += bstride[2];
963 cp1 += cstride[2];
964 hap1 += hastride[2];
965 hbp1 += hbstride[2];
966 }
967 }
968 }
969 }
970 }
971 return CCV_NNC_EXEC_SUCCESS;
972}
973
974static int _ccv_nnc_ewexp_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
975{
976 // Assuming this is float 32.
977 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
978 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
979 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
980 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
981 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
982 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 982, __extension__ __PRETTY_FUNCTION__
); }))
;
983 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 983, __extension__ __PRETTY_FUNCTION__
); }))
;
984 ccv_nnc_tensor_view_get_dim(a, dim);
985 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 985, __extension__ __PRETTY_FUNCTION__
); }))
;
986 int x;
987 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
988 {
989 // Super optimal case, just do one for-loop for sum.
990 const int tensor_count = ccv_nnc_tensor_count(a->info);
991 for (x = 0; x < tensor_count; x++)
992 b->data.f32[x] = exp(a->data.f32[x]);
993 return CCV_NNC_EXEC_SUCCESS;
994 }
995 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 995, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
996 ccv_nnc_tensor_view_get_stride(a, astride);
997 ccv_nnc_tensor_view_get_stride(b, bstride);
998 int i[CCV_NNC_MAX_DIM(2) + 2];
999 float* const ap = a->data.f32;
1000 float* const bp = b->data.f32;
1001 const int count = dim[2] * dim[3];
1002 if (astride[2] == dim[3] && bstride[2] == dim[3])
1003 {
1004 // Special casing if the ainc[3] is the same as dim[3]
1005 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1006 {
1007 float* ap0 = ap + i[0] * astride[0];
1008 float* bp0 = bp + i[0] * bstride[0];
1009 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1010 {
1011 for (x = 0; x < count; x++)
1012 bp0[x] = exp(ap0[x]);
1013 ap0 += astride[1];
1014 bp0 += bstride[1];
1015 }
1016 }
1017 return CCV_NNC_EXEC_SUCCESS;
1018 }
1019 // Non-optimal case, need to do skip copy.
1020 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1021 {
1022 float* const ap0 = ap + i[0] * astride[0];
1023 float* const bp0 = bp + i[0] * bstride[0];
1024 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1025 {
1026 float* ap1 = ap0 + i[1] * astride[1];
1027 float* bp1 = bp0 + i[1] * bstride[1];
1028 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1029 {
1030 for (x = 0; x < dim[3]; x++)
1031 bp1[x] = exp(ap1[x]);
1032 ap1 += astride[2];
1033 bp1 += bstride[2];
1034 }
1035 }
1036 }
1037 return CCV_NNC_EXEC_SUCCESS;
1038}
1039
1040static int _ccv_nnc_ewexp_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1041{
1042 // D[Exp[x], x] = Exp[x]
1043 if (inputs[0] == 0)
1044 _ccv_nnc_tensor_transfer_cpu_ref_f32((ccv_nnc_tensor_view_t*)inputs[2], (ccv_nnc_tensor_view_t*)outputs[0]);
1045 else
1046 _ccv_nnc_ewprod_forw_cpu_ref((ccv_nnc_tensor_view_t*[]){
1047 (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[2]
1048 }, 2, (ccv_nnc_tensor_view_t**)outputs, output_size);
1049 return CCV_NNC_EXEC_SUCCESS;
1050}
1051
1052static inline float _ccv_nnc_softplusf(const float x)
1053{
1054 return (x > 0) ? x + log1pf(expf(-x)) : log1pf(expf(x));
1055}
1056
1057static inline float _ccv_nnc_softplus_sigmoidf(const float x)
1058{
1059 if (x >= 0)
1060 {
1061 const float z = expf(-x);
1062 return 1. / (1. + z);
1063 }
1064 const float z = expf(x);
1065 return z / (1. + z);
1066}
1067
1068static int _ccv_nnc_ewsoftplus_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1069{
1070 // Assuming this is float 32.
1071 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1072 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1073 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1074 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1075 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1076 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1076, __extension__ __PRETTY_FUNCTION__
); }))
;
1077 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1077, __extension__ __PRETTY_FUNCTION__
); }))
;
1078 ccv_nnc_tensor_view_get_dim(a, dim);
1079 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1079, __extension__ __PRETTY_FUNCTION__
); }))
;
1080 int x;
1081 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1082 {
1083 const int tensor_count = ccv_nnc_tensor_count(a->info);
1084 for (x = 0; x < tensor_count; x++)
1085 b->data.f32[x] = _ccv_nnc_softplusf(a->data.f32[x]);
1086 return CCV_NNC_EXEC_SUCCESS;
1087 }
1088 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1088, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1089 ccv_nnc_tensor_view_get_stride(a, astride);
1090 ccv_nnc_tensor_view_get_stride(b, bstride);
1091 int i[CCV_NNC_MAX_DIM(2) + 2];
1092 float* const ap = a->data.f32;
1093 float* const bp = b->data.f32;
1094 const int count = dim[2] * dim[3];
1095 if (astride[2] == dim[3] && bstride[2] == dim[3])
1096 {
1097 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1098 {
1099 float* ap0 = ap + i[0] * astride[0];
1100 float* bp0 = bp + i[0] * bstride[0];
1101 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1102 {
1103 for (x = 0; x < count; x++)
1104 bp0[x] = _ccv_nnc_softplusf(ap0[x]);
1105 ap0 += astride[1];
1106 bp0 += bstride[1];
1107 }
1108 }
1109 return CCV_NNC_EXEC_SUCCESS;
1110 }
1111 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1112 {
1113 float* const ap0 = ap + i[0] * astride[0];
1114 float* const bp0 = bp + i[0] * bstride[0];
1115 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1116 {
1117 float* ap1 = ap0 + i[1] * astride[1];
1118 float* bp1 = bp0 + i[1] * bstride[1];
1119 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1120 {
1121 for (x = 0; x < dim[3]; x++)
1122 bp1[x] = _ccv_nnc_softplusf(ap1[x]);
1123 ap1 += astride[2];
1124 bp1 += bstride[2];
1125 }
1126 }
1127 }
1128 return CCV_NNC_EXEC_SUCCESS;
1129}
1130
1131static int _ccv_nnc_ewsoftplus_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1132{
1133 // D[Softplus[x], x] = Sigmoid[x]
1134 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1135 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1136 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1137 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1138 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
1139 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[1];
1140 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
1141 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1141, __extension__ __PRETTY_FUNCTION__
); }))
;
1142 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1142, __extension__ __PRETTY_FUNCTION__
); }))
;
1143 ccv_nnc_tensor_view_get_dim(a, dim);
1144 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1144, __extension__ __PRETTY_FUNCTION__
); }))
;
1145 if (g)
1146 {
1147 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1147, __extension__ __PRETTY_FUNCTION__
); }))
;
1148 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1148, __extension__ __PRETTY_FUNCTION__
); }))
;
1149 }
1150 int x;
1151 if ((!g || !CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
1152 {
1153 const int tensor_count = ccv_nnc_tensor_count(a->info);
1154 if (g)
1155 {
1156 for (x = 0; x < tensor_count; x++)
1157 h->data.f32[x] = g->data.f32[x] * _ccv_nnc_softplus_sigmoidf(a->data.f32[x]);
1158 } else {
1159 for (x = 0; x < tensor_count; x++)
1160 h->data.f32[x] = _ccv_nnc_softplus_sigmoidf(a->data.f32[x]);
1161 }
1162 return CCV_NNC_EXEC_SUCCESS;
1163 }
1164 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1164, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1165 if (g)
1166 ccv_nnc_tensor_view_get_stride(g, gstride);
1167 ccv_nnc_tensor_view_get_stride(a, astride);
1168 ccv_nnc_tensor_view_get_stride(h, hstride);
1169 int i[CCV_NNC_MAX_DIM(2) + 2];
1170 float* const gp = g ? g->data.f32 : 0;
1171 float* const ap = a->data.f32;
1172 float* const hp = h->data.f32;
1173 const int count = dim[2] * dim[3];
1174 if ((!g || gstride[2] == dim[3]) && astride[2] == dim[3] && hstride[2] == dim[3])
1175 {
1176 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1177 {
1178 float* gp0 = g ? gp + i[0] * gstride[0] : 0;
1179 float* ap0 = ap + i[0] * astride[0];
1180 float* hp0 = hp + i[0] * hstride[0];
1181 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1182 {
1183 if (g)
1184 {
1185 for (x = 0; x < count; x++)
1186 hp0[x] = gp0[x] * _ccv_nnc_softplus_sigmoidf(ap0[x]);
1187 gp0 += gstride[1];
1188 } else {
1189 for (x = 0; x < count; x++)
1190 hp0[x] = _ccv_nnc_softplus_sigmoidf(ap0[x]);
1191 }
1192 ap0 += astride[1];
1193 hp0 += hstride[1];
1194 }
1195 }
1196 return CCV_NNC_EXEC_SUCCESS;
1197 }
1198 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1199 {
1200 float* const gp0 = g ? gp + i[0] * gstride[0] : 0;
1201 float* const ap0 = ap + i[0] * astride[0];
1202 float* const hp0 = hp + i[0] * hstride[0];
1203 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1204 {
1205 float* gp1 = g ? gp0 + i[1] * gstride[1] : 0;
1206 float* ap1 = ap0 + i[1] * astride[1];
1207 float* hp1 = hp0 + i[1] * hstride[1];
1208 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1209 {
1210 if (g)
1211 {
1212 for (x = 0; x < dim[3]; x++)
1213 hp1[x] = gp1[x] * _ccv_nnc_softplus_sigmoidf(ap1[x]);
1214 gp1 += gstride[2];
1215 } else {
1216 for (x = 0; x < dim[3]; x++)
1217 hp1[x] = _ccv_nnc_softplus_sigmoidf(ap1[x]);
1218 }
1219 ap1 += astride[2];
1220 hp1 += hstride[2];
1221 }
1222 }
1223 }
1224 return CCV_NNC_EXEC_SUCCESS;
1225}
1226
1227static void _ccv_nnc_ewpow_forw_cpu_ref(ccv_nnc_tensor_view_t* const a, const float exp, ccv_nnc_tensor_view_t* const c)
1228{
1229 // Assuming this is float 32.
1230 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1231 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1232 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1233 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1233, __extension__ __PRETTY_FUNCTION__
); }))
;
1234 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1234, __extension__ __PRETTY_FUNCTION__
); }))
;
1235 ccv_nnc_tensor_view_get_dim(a, dim);
1236 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1236, __extension__ __PRETTY_FUNCTION__
); }))
;
1237 int x;
1238 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
1239 {
1240 const int tensor_count = ccv_nnc_tensor_count(a->info);
1241 for (x = 0; x < tensor_count; x++)
1242 c->data.f32[x] = powf(a->data.f32[x], exp);
1243 return;
1244 }
1245 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1245, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1246 ccv_nnc_tensor_view_get_stride(a, astride);
1247 ccv_nnc_tensor_view_get_stride(c, cstride);
1248 int i[CCV_NNC_MAX_DIM(2) + 2];
1249 float* const ap = a->data.f32;
1250 float* const cp = c->data.f32;
1251 const int count = dim[2] * dim[3];
1252 if (astride[2] == dim[3] && cstride[2] == dim[3])
1253 {
1254 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1255 {
1256 float* ap0 = ap + i[0] * astride[0];
1257 float* cp0 = cp + i[0] * cstride[0];
1258 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1259 {
1260 for (x = 0; x < count; x++)
1261 cp0[x] = powf(ap0[x], exp);
1262 ap0 += astride[1];
1263 cp0 += cstride[1];
1264 }
1265 }
1266 return;
1267 }
1268 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1269 {
1270 float* const ap0 = ap + i[0] * astride[0];
1271 float* const cp0 = cp + i[0] * cstride[0];
1272 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1273 {
1274 float* ap1 = ap0 + i[1] * astride[1];
1275 float* cp1 = cp0 + i[1] * cstride[1];
1276 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1277 {
1278 for (x = 0; x < dim[3]; x++)
1279 cp1[x] = powf(ap1[x], exp);
1280 ap1 += astride[2];
1281 cp1 += cstride[2];
1282 }
1283 }
1284 }
1285}
1286
1287static int _ccv_nnc_ewpow_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1288{
1289 _ccv_nnc_ewpow_forw_cpu_ref((ccv_nnc_tensor_view_t*)inputs[0], cmd.info.pow.exponent, (ccv_nnc_tensor_view_t*)outputs[0]);
1290 return CCV_NNC_EXEC_SUCCESS;
1291}
1292
1293static void _ccv_nnc_ewpow_back_da_cpu_ref(ccv_nnc_tensor_view_t* const g, ccv_nnc_tensor_view_t* const a, const float exp, ccv_nnc_tensor_view_t* const h)
1294{
1295 // D[pow(a, exp), a] = exp * pow(a, exp - 1)
1296 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1297 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1298 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1299 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1300 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1300, __extension__ __PRETTY_FUNCTION__
); }))
;
1301 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1301, __extension__ __PRETTY_FUNCTION__
); }))
;
1302 ccv_nnc_tensor_view_get_dim(a, dim);
1303 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1303, __extension__ __PRETTY_FUNCTION__
); }))
;
1304 if (g)
1305 {
1306 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1306, __extension__ __PRETTY_FUNCTION__
); }))
;
1307 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1307, __extension__ __PRETTY_FUNCTION__
); }))
;
1308 }
1309 int x;
1310 if ((!g || !CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
1311 {
1312 const int tensor_count = ccv_nnc_tensor_count(a->info);
1313 if (g)
1314 {
1315 for (x = 0; x < tensor_count; x++)
1316 h->data.f32[x] = g->data.f32[x] * exp * powf(a->data.f32[x], exp - 1);
1317 } else {
1318 for (x = 0; x < tensor_count; x++)
1319 h->data.f32[x] = exp * powf(a->data.f32[x], exp - 1);
1320 }
1321 return;
1322 }
1323 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1323, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1324 if (g)
1325 ccv_nnc_tensor_view_get_stride(g, gstride);
1326 ccv_nnc_tensor_view_get_stride(a, astride);
1327 ccv_nnc_tensor_view_get_stride(h, hstride);
1328 int i[CCV_NNC_MAX_DIM(2) + 2];
1329 float* const gp = g ? g->data.f32 : 0;
1330 float* const ap = a->data.f32;
1331 float* const hp = h->data.f32;
1332 const int count = dim[2] * dim[3];
1333 if ((!g || gstride[2] == dim[3]) && astride[2] == dim[3] && hstride[2] == dim[3])
1334 {
1335 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1336 {
1337 float* gp0 = g ? gp + i[0] * gstride[0] : 0;
1338 float* ap0 = ap + i[0] * astride[0];
1339 float* hp0 = hp + i[0] * hstride[0];
1340 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1341 {
1342 if (g)
1343 {
1344 for (x = 0; x < count; x++)
1345 hp0[x] = gp0[x] * exp * powf(ap0[x], exp - 1);
1346 gp0 += gstride[1];
1347 } else {
1348 for (x = 0; x < count; x++)
1349 hp0[x] = exp * powf(ap0[x], exp - 1);
1350 }
1351 ap0 += astride[1];
1352 hp0 += hstride[1];
1353 }
1354 }
1355 return;
1356 }
1357 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1358 {
1359 float* const gp0 = g ? gp + i[0] * gstride[0] : 0;
1360 float* const ap0 = ap + i[0] * astride[0];
1361 float* const hp0 = hp + i[0] * hstride[0];
1362 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1363 {
1364 float* gp1 = g ? gp0 + i[1] * gstride[1] : 0;
1365 float* ap1 = ap0 + i[1] * astride[1];
1366 float* hp1 = hp0 + i[1] * hstride[1];
1367 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1368 {
1369 if (g)
1370 {
1371 for (x = 0; x < dim[3]; x++)
1372 hp1[x] = gp1[x] * exp * powf(ap1[x], exp - 1);
1373 gp1 += gstride[2];
1374 } else {
1375 for (x = 0; x < dim[3]; x++)
1376 hp1[x] = exp * powf(ap1[x], exp - 1);
1377 }
1378 ap1 += astride[2];
1379 hp1 += hstride[2];
1380 }
1381 }
1382 }
1383}
1384
1385static int _ccv_nnc_ewpow_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1386{
1387 ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
1388 ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
1389 if (output_size > 0 && outputs[0])
1390 _ccv_nnc_ewpow_back_da_cpu_ref(g, a, cmd.info.pow.exponent, (ccv_nnc_tensor_view_t*)outputs[0]);
1391 return CCV_NNC_EXEC_SUCCESS;
1392}
1393
1394static int _ccv_nnc_ewlog_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1395{
1396 // Assuming this is float 32.
1397 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1398 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1399 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1400 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1401 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1402 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1402, __extension__ __PRETTY_FUNCTION__
); }))
;
1403 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1403, __extension__ __PRETTY_FUNCTION__
); }))
;
1404 ccv_nnc_tensor_view_get_dim(a, dim);
1405 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1405, __extension__ __PRETTY_FUNCTION__
); }))
;
1406 int x;
1407 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1408 {
1409 // Super optimal case, just do one for-loop for sum.
1410 const int tensor_count = ccv_nnc_tensor_count(a->info);
1411 for (x = 0; x < tensor_count; x++)
1412 b->data.f32[x] = log(a->data.f32[x]);
1413 return CCV_NNC_EXEC_SUCCESS;
1414 }
1415 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1415, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1416 ccv_nnc_tensor_view_get_stride(a, astride);
1417 ccv_nnc_tensor_view_get_stride(b, bstride);
1418 int i[CCV_NNC_MAX_DIM(2) + 2];
1419 float* const ap = a->data.f32;
1420 float* const bp = b->data.f32;
1421 const int count = dim[2] * dim[3];
1422 if (astride[2] == dim[3] && bstride[2] == dim[3])
1423 {
1424 // Special casing if the ainc[3] is the same as dim[3]
1425 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1426 {
1427 float* ap0 = ap + i[0] * astride[0];
1428 float* bp0 = bp + i[0] * bstride[0];
1429 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1430 {
1431 for (x = 0; x < count; x++)
1432 bp0[x] = log(ap0[x]);
1433 ap0 += astride[1];
1434 bp0 += bstride[1];
1435 }
1436 }
1437 return CCV_NNC_EXEC_SUCCESS;
1438 }
1439 // Non-optimal case, need to do skip copy.
1440 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1441 {
1442 float* const ap0 = ap + i[0] * astride[0];
1443 float* const bp0 = bp + i[0] * bstride[0];
1444 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1445 {
1446 float* ap1 = ap0 + i[1] * astride[1];
1447 float* bp1 = bp0 + i[1] * bstride[1];
1448 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1449 {
1450 for (x = 0; x < dim[3]; x++)
1451 bp1[x] = log(ap1[x]);
1452 ap1 += astride[2];
1453 bp1 += bstride[2];
1454 }
1455 }
1456 }
1457 return CCV_NNC_EXEC_SUCCESS;
1458}
1459
1460static int _ccv_nnc_ewlog_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1461{
1462 // D[Log[x], x] = 1 / x
1463 _ccv_nnc_ewdiv_forw_cpu_ref(1, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[1], (ccv_nnc_tensor_view_t*)outputs[0]);
1464 return CCV_NNC_EXEC_SUCCESS;
1465}
1466
1467static int _ccv_nnc_ewsqrt_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1468{
1469 // Assuming this is float 32.
1470 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1471 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1472 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1473 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1474 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1475 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1475, __extension__ __PRETTY_FUNCTION__
); }))
;
1476 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1476, __extension__ __PRETTY_FUNCTION__
); }))
;
1477 ccv_nnc_tensor_view_get_dim(a, dim);
1478 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1478, __extension__ __PRETTY_FUNCTION__
); }))
;
1479 int x;
1480 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1481 {
1482 // Super optimal case, just do one for-loop for sum.
1483 const int tensor_count = ccv_nnc_tensor_count(a->info);
1484 for (x = 0; x < tensor_count; x++)
1485 b->data.f32[x] = sqrt(a->data.f32[x]);
1486 return CCV_NNC_EXEC_SUCCESS;
1487 }
1488 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1488, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1489 ccv_nnc_tensor_view_get_stride(a, astride);
1490 ccv_nnc_tensor_view_get_stride(b, bstride);
1491 int i[CCV_NNC_MAX_DIM(2) + 2];
1492 float* const ap = a->data.f32;
1493 float* const bp = b->data.f32;
1494 const int count = dim[2] * dim[3];
1495 if (astride[2] == dim[3] && bstride[2] == dim[3])
1496 {
1497 // Special casing if the ainc[3] is the same as dim[3]
1498 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1499 {
1500 float* ap0 = ap + i[0] * astride[0];
1501 float* bp0 = bp + i[0] * bstride[0];
1502 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1503 {
1504 for (x = 0; x < count; x++)
1505 bp0[x] = sqrt(ap0[x]);
1506 ap0 += astride[1];
1507 bp0 += bstride[1];
1508 }
1509 }
1510 return CCV_NNC_EXEC_SUCCESS;
1511 }
1512 // Non-optimal case, need to do skip copy.
1513 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1514 {
1515 float* const ap0 = ap + i[0] * astride[0];
1516 float* const bp0 = bp + i[0] * bstride[0];
1517 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1518 {
1519 float* ap1 = ap0 + i[1] * astride[1];
1520 float* bp1 = bp0 + i[1] * bstride[1];
1521 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1522 {
1523 for (x = 0; x < dim[3]; x++)
1524 bp1[x] = sqrt(ap1[x]);
1525 ap1 += astride[2];
1526 bp1 += bstride[2];
1527 }
1528 }
1529 }
1530 return CCV_NNC_EXEC_SUCCESS;
1531}
1532
1533static int _ccv_nnc_ewsqrt_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1534{
1535 // D[Sqrt[x], x] = 0.5 / Sqrt[x]
1536 _ccv_nnc_ewdiv_forw_cpu_ref(0.5, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[2], (ccv_nnc_tensor_view_t*)outputs[0]);
1537 return CCV_NNC_EXEC_SUCCESS;
1538}
1539
1540static int _ccv_nnc_ewsin_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1541{
1542 // Assuming this is float 32.
1543 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1544 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1545 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1546 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1547 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1548 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1548, __extension__ __PRETTY_FUNCTION__
); }))
;
1549 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1549, __extension__ __PRETTY_FUNCTION__
); }))
;
1550 ccv_nnc_tensor_view_get_dim(a, dim);
1551 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1551, __extension__ __PRETTY_FUNCTION__
); }))
;
1552 int x;
1553 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1554 {
1555 const int tensor_count = ccv_nnc_tensor_count(a->info);
1556 for (x = 0; x < tensor_count; x++)
1557 b->data.f32[x] = sinf(a->data.f32[x]);
1558 return CCV_NNC_EXEC_SUCCESS;
1559 }
1560 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1560, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1561 ccv_nnc_tensor_view_get_stride(a, astride);
1562 ccv_nnc_tensor_view_get_stride(b, bstride);
1563 int i[CCV_NNC_MAX_DIM(2) + 2];
1564 float* const ap = a->data.f32;
1565 float* const bp = b->data.f32;
1566 const int count = dim[2] * dim[3];
1567 if (astride[2] == dim[3] && bstride[2] == dim[3])
1568 {
1569 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1570 {
1571 float* ap0 = ap + i[0] * astride[0];
1572 float* bp0 = bp + i[0] * bstride[0];
1573 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1574 {
1575 for (x = 0; x < count; x++)
1576 bp0[x] = sinf(ap0[x]);
1577 ap0 += astride[1];
1578 bp0 += bstride[1];
1579 }
1580 }
1581 return CCV_NNC_EXEC_SUCCESS;
1582 }
1583 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1584 {
1585 float* const ap0 = ap + i[0] * astride[0];
1586 float* const bp0 = bp + i[0] * bstride[0];
1587 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1588 {
1589 float* ap1 = ap0 + i[1] * astride[1];
1590 float* bp1 = bp0 + i[1] * bstride[1];
1591 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1592 {
1593 for (x = 0; x < dim[3]; x++)
1594 bp1[x] = sinf(ap1[x]);
1595 ap1 += astride[2];
1596 bp1 += bstride[2];
1597 }
1598 }
1599 }
1600 return CCV_NNC_EXEC_SUCCESS;
1601}
1602
1603static int _ccv_nnc_ewsin_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1604{
1605 // D[Sin[x], x] = Cos[x]
1606 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1607 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1608 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1609 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1610 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
1611 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[1];
1612 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
1613 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1613, __extension__ __PRETTY_FUNCTION__
); }))
;
1614 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1614, __extension__ __PRETTY_FUNCTION__
); }))
;
1615 ccv_nnc_tensor_view_get_dim(a, dim);
1616 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1616, __extension__ __PRETTY_FUNCTION__
); }))
;
1617 if (g)
1618 {
1619 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1619, __extension__ __PRETTY_FUNCTION__
); }))
;
1620 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1620, __extension__ __PRETTY_FUNCTION__
); }))
;
1621 }
1622 int x;
1623 if ((!g || !CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
1624 {
1625 const int tensor_count = ccv_nnc_tensor_count(a->info);
1626 if (g)
1627 {
1628 for (x = 0; x < tensor_count; x++)
1629 h->data.f32[x] = g->data.f32[x] * cosf(a->data.f32[x]);
1630 } else {
1631 for (x = 0; x < tensor_count; x++)
1632 h->data.f32[x] = cosf(a->data.f32[x]);
1633 }
1634 return CCV_NNC_EXEC_SUCCESS;
1635 }
1636 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1636, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1637 if (g)
1638 ccv_nnc_tensor_view_get_stride(g, gstride);
1639 ccv_nnc_tensor_view_get_stride(a, astride);
1640 ccv_nnc_tensor_view_get_stride(h, hstride);
1641 int i[CCV_NNC_MAX_DIM(2) + 2];
1642 float* const gp = g ? g->data.f32 : 0;
1643 float* const ap = a->data.f32;
1644 float* const hp = h->data.f32;
1645 const int count = dim[2] * dim[3];
1646 if ((!g || gstride[2] == dim[3]) && astride[2] == dim[3] && hstride[2] == dim[3])
1647 {
1648 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1649 {
1650 float* gp0 = g ? gp + i[0] * gstride[0] : 0;
1651 float* ap0 = ap + i[0] * astride[0];
1652 float* hp0 = hp + i[0] * hstride[0];
1653 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1654 {
1655 if (g)
1656 {
1657 for (x = 0; x < count; x++)
1658 hp0[x] = gp0[x] * cosf(ap0[x]);
1659 gp0 += gstride[1];
1660 } else {
1661 for (x = 0; x < count; x++)
1662 hp0[x] = cosf(ap0[x]);
1663 }
1664 ap0 += astride[1];
1665 hp0 += hstride[1];
1666 }
1667 }
1668 return CCV_NNC_EXEC_SUCCESS;
1669 }
1670 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1671 {
1672 float* const gp0 = g ? gp + i[0] * gstride[0] : 0;
1673 float* const ap0 = ap + i[0] * astride[0];
1674 float* const hp0 = hp + i[0] * hstride[0];
1675 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1676 {
1677 float* gp1 = g ? gp0 + i[1] * gstride[1] : 0;
1678 float* ap1 = ap0 + i[1] * astride[1];
1679 float* hp1 = hp0 + i[1] * hstride[1];
1680 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1681 {
1682 if (g)
1683 {
1684 for (x = 0; x < dim[3]; x++)
1685 hp1[x] = gp1[x] * cosf(ap1[x]);
1686 gp1 += gstride[2];
1687 } else {
1688 for (x = 0; x < dim[3]; x++)
1689 hp1[x] = cosf(ap1[x]);
1690 }
1691 ap1 += astride[2];
1692 hp1 += hstride[2];
1693 }
1694 }
1695 }
1696 return CCV_NNC_EXEC_SUCCESS;
1697}
1698
1699static int _ccv_nnc_ewcos_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1700{
1701 // Assuming this is float 32.
1702 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1703 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1704 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1705 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1706 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1707 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1707, __extension__ __PRETTY_FUNCTION__
); }))
;
1708 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1708, __extension__ __PRETTY_FUNCTION__
); }))
;
1709 ccv_nnc_tensor_view_get_dim(a, dim);
1710 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1710, __extension__ __PRETTY_FUNCTION__
); }))
;
1711 int x;
1712 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1713 {
1714 const int tensor_count = ccv_nnc_tensor_count(a->info);
1715 for (x = 0; x < tensor_count; x++)
1716 b->data.f32[x] = cosf(a->data.f32[x]);
1717 return CCV_NNC_EXEC_SUCCESS;
1718 }
1719 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1719, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1720 ccv_nnc_tensor_view_get_stride(a, astride);
1721 ccv_nnc_tensor_view_get_stride(b, bstride);
1722 int i[CCV_NNC_MAX_DIM(2) + 2];
1723 float* const ap = a->data.f32;
1724 float* const bp = b->data.f32;
1725 const int count = dim[2] * dim[3];
1726 if (astride[2] == dim[3] && bstride[2] == dim[3])
1727 {
1728 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1729 {
1730 float* ap0 = ap + i[0] * astride[0];
1731 float* bp0 = bp + i[0] * bstride[0];
1732 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1733 {
1734 for (x = 0; x < count; x++)
1735 bp0[x] = cosf(ap0[x]);
1736 ap0 += astride[1];
1737 bp0 += bstride[1];
1738 }
1739 }
1740 return CCV_NNC_EXEC_SUCCESS;
1741 }
1742 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1743 {
1744 float* const ap0 = ap + i[0] * astride[0];
1745 float* const bp0 = bp + i[0] * bstride[0];
1746 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1747 {
1748 float* ap1 = ap0 + i[1] * astride[1];
1749 float* bp1 = bp0 + i[1] * bstride[1];
1750 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1751 {
1752 for (x = 0; x < dim[3]; x++)
1753 bp1[x] = cosf(ap1[x]);
1754 ap1 += astride[2];
1755 bp1 += bstride[2];
1756 }
1757 }
1758 }
1759 return CCV_NNC_EXEC_SUCCESS;
1760}
1761
1762static int _ccv_nnc_ewcos_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1763{
1764 // D[Cos[x], x] = -Sin[x]
1765 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1766 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1767 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1768 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1769 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
1770 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[1];
1771 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
1772 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1772, __extension__ __PRETTY_FUNCTION__
); }))
;
1773 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1773, __extension__ __PRETTY_FUNCTION__
); }))
;
1774 ccv_nnc_tensor_view_get_dim(a, dim);
1775 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1775, __extension__ __PRETTY_FUNCTION__
); }))
;
1776 if (g)
1777 {
1778 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1778, __extension__ __PRETTY_FUNCTION__
); }))
;
1779 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1779, __extension__ __PRETTY_FUNCTION__
); }))
;
1780 }
1781 int x;
1782 if ((!g || !CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
1783 {
1784 const int tensor_count = ccv_nnc_tensor_count(a->info);
1785 if (g)
1786 {
1787 for (x = 0; x < tensor_count; x++)
1788 h->data.f32[x] = -g->data.f32[x] * sinf(a->data.f32[x]);
1789 } else {
1790 for (x = 0; x < tensor_count; x++)
1791 h->data.f32[x] = -sinf(a->data.f32[x]);
1792 }
1793 return CCV_NNC_EXEC_SUCCESS;
1794 }
1795 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1795, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1796 if (g)
1797 ccv_nnc_tensor_view_get_stride(g, gstride);
1798 ccv_nnc_tensor_view_get_stride(a, astride);
1799 ccv_nnc_tensor_view_get_stride(h, hstride);
1800 int i[CCV_NNC_MAX_DIM(2) + 2];
1801 float* const gp = g ? g->data.f32 : 0;
1802 float* const ap = a->data.f32;
1803 float* const hp = h->data.f32;
1804 const int count = dim[2] * dim[3];
1805 if ((!g || gstride[2] == dim[3]) && astride[2] == dim[3] && hstride[2] == dim[3])
1806 {
1807 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1808 {
1809 float* gp0 = g ? gp + i[0] * gstride[0] : 0;
1810 float* ap0 = ap + i[0] * astride[0];
1811 float* hp0 = hp + i[0] * hstride[0];
1812 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1813 {
1814 if (g)
1815 {
1816 for (x = 0; x < count; x++)
1817 hp0[x] = -gp0[x] * sinf(ap0[x]);
1818 gp0 += gstride[1];
1819 } else {
1820 for (x = 0; x < count; x++)
1821 hp0[x] = -sinf(ap0[x]);
1822 }
1823 ap0 += astride[1];
1824 hp0 += hstride[1];
1825 }
1826 }
1827 return CCV_NNC_EXEC_SUCCESS;
1828 }
1829 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1830 {
1831 float* const gp0 = g ? gp + i[0] * gstride[0] : 0;
1832 float* const ap0 = ap + i[0] * astride[0];
1833 float* const hp0 = hp + i[0] * hstride[0];
1834 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1835 {
1836 float* gp1 = g ? gp0 + i[1] * gstride[1] : 0;
1837 float* ap1 = ap0 + i[1] * astride[1];
1838 float* hp1 = hp0 + i[1] * hstride[1];
1839 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1840 {
1841 if (g)
1842 {
1843 for (x = 0; x < dim[3]; x++)
1844 hp1[x] = -gp1[x] * sinf(ap1[x]);
1845 gp1 += gstride[2];
1846 } else {
1847 for (x = 0; x < dim[3]; x++)
1848 hp1[x] = -sinf(ap1[x]);
1849 }
1850 ap1 += astride[2];
1851 hp1 += hstride[2];
1852 }
1853 }
1854 }
1855 return CCV_NNC_EXEC_SUCCESS;
1856}
1857
1858static int _ccv_nnc_ewabs_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1859{
1860 // Assuming this is float 32.
1861 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1862 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1863 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1864 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1865 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1866 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1866, __extension__ __PRETTY_FUNCTION__
); }))
;
1867 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1867, __extension__ __PRETTY_FUNCTION__
); }))
;
1868 ccv_nnc_tensor_view_get_dim(a, dim);
1869 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1869, __extension__ __PRETTY_FUNCTION__
); }))
;
1870 int x;
1871 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1872 {
1873 // Super optimal case, just do one for-loop for sum.
1874 const int tensor_count = ccv_nnc_tensor_count(a->info);
1875 for (x = 0; x < tensor_count; x++)
1876 b->data.f32[x] = fabs(a->data.f32[x]);
1877 return CCV_NNC_EXEC_SUCCESS;
1878 }
1879 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1879, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1880 ccv_nnc_tensor_view_get_stride(a, astride);
1881 ccv_nnc_tensor_view_get_stride(b, bstride);
1882 int i[CCV_NNC_MAX_DIM(2) + 2];
1883 float* const ap = a->data.f32;
1884 float* const bp = b->data.f32;
1885 const int count = dim[2] * dim[3];
1886 if (astride[2] == dim[3] && bstride[2] == dim[3])
1887 {
1888 // Special casing if the ainc[3] is the same as dim[3]
1889 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1890 {
1891 float* ap0 = ap + i[0] * astride[0];
1892 float* bp0 = bp + i[0] * bstride[0];
1893 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1894 {
1895 for (x = 0; x < count; x++)
1896 bp0[x] = fabs(ap0[x]);
1897 ap0 += astride[1];
1898 bp0 += bstride[1];
1899 }
1900 }
1901 return CCV_NNC_EXEC_SUCCESS;
1902 }
1903 // Non-optimal case, need to do skip copy.
1904 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1905 {
1906 float* const ap0 = ap + i[0] * astride[0];
1907 float* const bp0 = bp + i[0] * bstride[0];
1908 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1909 {
1910 float* ap1 = ap0 + i[1] * astride[1];
1911 float* bp1 = bp0 + i[1] * bstride[1];
1912 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1913 {
1914 for (x = 0; x < dim[3]; x++)
1915 bp1[x] = fabs(ap1[x]);
1916 ap1 += astride[2];
1917 bp1 += bstride[2];
1918 }
1919 }
1920 }
1921 return CCV_NNC_EXEC_SUCCESS;
1922}
1923
1924static int _ccv_nnc_ewabs_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1925{
1926 // Assuming this is float 32.
1927 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1928 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1929 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1930 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1931 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
1932 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[1];
1933 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1934 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1934, __extension__ __PRETTY_FUNCTION__
); }))
;
1
Assuming the condition is true
2
Taking true branch
1935 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1935, __extension__ __PRETTY_FUNCTION__
); }))
;
3
Assuming the condition is true
4
Taking true branch
1936 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1936, __extension__ __PRETTY_FUNCTION__
); }))
;
5
Assuming the condition is true
6
Taking true branch
1937 ccv_nnc_tensor_view_get_dim(a, dim);
1938 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1938, __extension__ __PRETTY_FUNCTION__
); }))
;
7
Assuming the condition is true
8
Taking true branch
1939 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1939, __extension__ __PRETTY_FUNCTION__
); }))
;
9
Assuming the condition is true
10
Taking true branch
1940 int x;
1941 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW))
11
Assuming the condition is false
1942 {
1943 // Super optimal case, just do one for-loop for sum.
1944 const int tensor_count = ccv_nnc_tensor_count(a->info);
1945 for (x = 0; x < tensor_count; x++)
1946 b->data.f32[x] = a->data.f32[x] >= 0 ? g->data.f32[x] : -g->data.f32[x];
1947 return CCV_NNC_EXEC_SUCCESS;
1948 }
1949 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1949, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
12
Taking true branch
1950 ccv_nnc_tensor_view_get_stride(g, astride);
1951 ccv_nnc_tensor_view_get_stride(a, astride);
1952 ccv_nnc_tensor_view_get_stride(b, bstride);
1953 int i[CCV_NNC_MAX_DIM(2) + 2];
1954 float* const gp = g->data.f32;
1955 float* const ap = a->data.f32;
1956 float* const bp = b->data.f32;
1957 const int count = dim[2] * dim[3];
1958 if (astride[2] == dim[3] && bstride[2] == dim[3])
13
Assuming the condition is true
14
Assuming the condition is true
15
Taking true branch
1959 {
1960 // Special casing if the ainc[3] is the same as dim[3]
1961 for (i[0] = 0; i[0] < dim[0]; i[0]++)
16
Assuming the condition is true
17
Loop condition is true. Entering loop body
1962 {
1963 float* gp0 = gp + i[0] * gstride[0];
18
The right operand of '*' is a garbage value
1964 float* ap0 = ap + i[0] * astride[0];
1965 float* bp0 = bp + i[0] * bstride[0];
1966 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1967 {
1968 for (x = 0; x < count; x++)
1969 bp0[x] = ap0[x] >= 0 ? gp0[x] : -gp0[x];
1970 gp0 += gstride[1];
1971 ap0 += astride[1];
1972 bp0 += bstride[1];
1973 }
1974 }
1975 return CCV_NNC_EXEC_SUCCESS;
1976 }
1977 // Non-optimal case, need to do skip copy.
1978 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1979 {
1980 float* const gp0 = gp + i[0] * gstride[0];
1981 float* const ap0 = ap + i[0] * astride[0];
1982 float* const bp0 = bp + i[0] * bstride[0];
1983 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1984 {
1985 float* gp1 = gp0 + i[1] * gstride[1];
1986 float* ap1 = ap0 + i[1] * astride[1];
1987 float* bp1 = bp0 + i[1] * bstride[1];
1988 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1989 {
1990 for (x = 0; x < dim[3]; x++)
1991 bp1[x] = ap1[x] >= 0 ? gp1[x] : -gp1[x];
1992 gp1 += gstride[2];
1993 ap1 += astride[2];
1994 bp1 += bstride[2];
1995 }
1996 }
1997 }
1998 return CCV_NNC_EXEC_SUCCESS;
1999}
2000
2001static int _ccv_nnc_clamp_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
2002{
2003 // Assuming this is float 32.
2004 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
2005 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
2006 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
2007 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
2008 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
2009 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 2009, __extension__ __PRETTY_FUNCTION__
); }))
;
2010 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 2010, __extension__ __PRETTY_FUNCTION__
); }))
;
2011 ccv_nnc_tensor_view_get_dim(a, dim);
2012 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 2012, __extension__ __PRETTY_FUNCTION__
); }))
;
2013 int x;
2014 const float min = cmd.info.clamp.min;
2015 const float max = cmd.info.clamp.max;
2016 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 2016, __extension__ __PRETTY_FUNCTION__
); }))
;
2017 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
2018 {
2019 // Super optimal case, just do one for-loop for sum.
2020 const int tensor_count = ccv_nnc_tensor_count(a->info);
2021 if (isnan(min)__builtin_isnan (min))
2022 {
2023 for (x = 0; x < tensor_count; x++)
2024 b->data.f32[x] = ccv_min(a->data.f32[x], max)({ typeof (a->data.f32[x]) _a = (a->data.f32[x]); typeof
(max) _b = (max); (_a < _b) ? _a : _b; })
;
2025 } else if (isnan(max)__builtin_isnan (max)) {
2026 for (x = 0; x < tensor_count; x++)
2027 b->data.f32[x] = ccv_max(a->data.f32[x], min)({ typeof (a->data.f32[x]) _a = (a->data.f32[x]); typeof
(min) _b = (min); (_a > _b) ? _a : _b; })
;
2028 } else {
2029 for (x = 0; x < tensor_count; x++)
2030 b->data.f32[x] = ccv_clamp(a->data.f32[x], min, max)({ typeof (min) _a = (min); typeof (max) _b = (max); typeof (
a->data.f32[x]) _x = (a->data.f32[x]); (_x < _a) ? _a
: ((_x > _b) ? _b : _x); })
;
2031 }
2032 return CCV_NNC_EXEC_SUCCESS;
2033 }
2034 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 2034, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
2035 ccv_nnc_tensor_view_get_stride(a, astride);
2036 ccv_nnc_tensor_view_get_stride(b, bstride);
2037 int i[CCV_NNC_MAX_DIM(2) + 2];
2038 float* const ap = a->data.f32;
2039 float* const bp = b->data.f32;
2040 const int count = dim[2] * dim[3];
2041 if (isnan(min)__builtin_isnan (min))
2042 {
2043 if (astride[2] == dim[3] && bstride[2] == dim[3])
2044 {
2045 // Special casing if the ainc[3] is the same as dim[3]
2046 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2047 {
2048 float* ap0 = ap + i[0] * astride[0];
2049 float* bp0 = bp + i[0] * bstride[0];
2050 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2051 {
2052 for (x = 0; x < count; x++)
2053 bp0[x] = ccv_min(ap0[x], max)({ typeof (ap0[x]) _a = (ap0[x]); typeof (max) _b = (max); (_a
< _b) ? _a : _b; })
;
2054 ap0 += astride[1];
2055 bp0 += bstride[1];
2056 }
2057 }
2058 return CCV_NNC_EXEC_SUCCESS;
2059 }
2060 // Non-optimal case, need to do skip copy.
2061 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2062 {
2063 float* const ap0 = ap + i[0] * astride[0];
2064 float* const bp0 = bp + i[0] * bstride[0];
2065 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2066 {
2067 float* ap1 = ap0 + i[1] * astride[1];
2068 float* bp1 = bp0 + i[1] * bstride[1];
2069 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2070 {
2071 for (x = 0; x < dim[3]; x++)
2072 bp1[x] = ccv_min(ap1[x], max)({ typeof (ap1[x]) _a = (ap1[x]); typeof (max) _b = (max); (_a
< _b) ? _a : _b; })
;
2073 ap1 += astride[2];
2074 bp1 += bstride[2];
2075 }
2076 }
2077 }
2078 } else if (isnan(max)__builtin_isnan (max)) {
2079 if (astride[2] == dim[3] && bstride[2] == dim[3])
2080 {
2081 // Special casing if the ainc[3] is the same as dim[3]
2082 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2083 {
2084 float* ap0 = ap + i[0] * astride[0];
2085 float* bp0 = bp + i[0] * bstride[0];
2086 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2087 {
2088 for (x = 0; x < count; x++)
2089 bp0[x] = ccv_max(ap0[x], min)({ typeof (ap0[x]) _a = (ap0[x]); typeof (min) _b = (min); (_a
> _b) ? _a : _b; })
;
2090 ap0 += astride[1];
2091 bp0 += bstride[1];
2092 }
2093 }
2094 return CCV_NNC_EXEC_SUCCESS;
2095 }
2096 // Non-optimal case, need to do skip copy.
2097 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2098 {
2099 float* const ap0 = ap + i[0] * astride[0];
2100 float* const bp0 = bp + i[0] * bstride[0];
2101 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2102 {
2103 float* ap1 = ap0 + i[1] * astride[1];
2104 float* bp1 = bp0 + i[1] * bstride[1];
2105 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2106 {
2107 for (x = 0; x < dim[3]; x++)
2108 bp1[x] = ccv_max(ap1[x], min)({ typeof (ap1[x]) _a = (ap1[x]); typeof (min) _b = (min); (_a
> _b) ? _a : _b; })
;
2109 ap1 += astride[2];
2110 bp1 += bstride[2];
2111 }
2112 }
2113 }
2114 } else {
2115 if (astride[2] == dim[3] && bstride[2] == dim[3])
2116 {
2117 // Special casing if the ainc[3] is the same as dim[3]
2118 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2119 {
2120 float* ap0 = ap + i[0] * astride[0];
2121 float* bp0 = bp + i[0] * bstride[0];
2122 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2123 {
2124 for (x = 0; x < count; x++)
2125 bp0[x] = ccv_clamp(ap0[x], min, max)({ typeof (min) _a = (min); typeof (max) _b = (max); typeof (
ap0[x]) _x = (ap0[x]); (_x < _a) ? _a : ((_x > _b) ? _b
: _x); })
;
2126 ap0 += astride[1];
2127 bp0 += bstride[1];
2128 }
2129 }
2130 return CCV_NNC_EXEC_SUCCESS;
2131 }
2132 // Non-optimal case, need to do skip copy.
2133 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2134 {
2135 float* const ap0 = ap + i[0] * astride[0];
2136 float* const bp0 = bp + i[0] * bstride[0];
2137 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2138 {
2139 float* ap1 = ap0 + i[1] * astride[1];
2140 float* bp1 = bp0 + i[1] * bstride[1];
2141 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2142 {
2143 for (x = 0; x < dim[3]; x++)
2144 bp1[x] = ccv_clamp(ap1[x], min, max)({ typeof (min) _a = (min); typeof (max) _b = (max); typeof (
ap1[x]) _x = (ap1[x]); (_x < _a) ? _a : ((_x > _b) ? _b
: _x); })
;
2145 ap1 += astride[2];
2146 bp1 += bstride[2];
2147 }
2148 }
2149 }
2150 }
2151 return CCV_NNC_EXEC_SUCCESS;
2152}
2153
2154static int _ccv_nnc_clamp_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
2155{
2156 assert(input_size == 3)((void) sizeof ((input_size == 3) ? 1 : 0), __extension__ ({ if
(input_size == 3) ; else __assert_fail ("input_size == 3", "ew/ccv_nnc_ew_cpu_ref.c"
, 2156, __extension__ __PRETTY_FUNCTION__); }))
;
2157 const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0]; // gradient
2158 const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2];
2159 assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({
if (output_size == 1) ; else __assert_fail ("output_size == 1"
, "ew/ccv_nnc_ew_cpu_ref.c", 2159, __extension__ __PRETTY_FUNCTION__
); }))
;
2160 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
2161 // Assuming this is float 32.
2162 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
2163 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
2164 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
2165 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 2165, __extension__ __PRETTY_FUNCTION__
); }))
;
2166 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 2166, __extension__ __PRETTY_FUNCTION__
); }))
;
2167 ccv_nnc_tensor_view_get_dim(g, dim);
2168 ccv_nnc_tensor_view_get_dim(h, dim);
2169 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 2169, __extension__ __PRETTY_FUNCTION__
); }))
;
2170 int x;
2171 const float min = cmd.info.clamp.min;
2172 const float max = cmd.info.clamp.max;
2173 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 2173, __extension__ __PRETTY_FUNCTION__
); }))
;
2174 if (g)
2175 {
2176 if (!CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
2177 {
2178 // Super optimal case, just do one for-loop for sum.
2179 const int tensor_count = ccv_nnc_tensor_count(g->info);
2180 if (isnan(min)__builtin_isnan (min))
2181 {
2182 for (x = 0; x < tensor_count; x++)
2183 h->data.f32[x] = b->data.f32[x] >= max ? 0 : g->data.f32[x];
2184 } else if (isnan(max)__builtin_isnan (max)) {
2185 for (x = 0; x < tensor_count; x++)
2186 h->data.f32[x] = b->data.f32[x] <= min ? 0 : g->data.f32[x];
2187 } else {
2188 for (x = 0; x < tensor_count; x++)
2189 h->data.f32[x] = (b->data.f32[x] >= max || b->data.f32[x] <= min) ? 0 : g->data.f32[x];
2190 }
2191 return CCV_NNC_EXEC_SUCCESS;
2192 }
2193 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
2194 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 2194, __extension__ __PRETTY_FUNCTION__
); }))
;
2195 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 2195, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
2196 ccv_nnc_tensor_view_get_stride(g, gstride);
2197 ccv_nnc_tensor_view_get_stride(b, bstride);
2198 ccv_nnc_tensor_view_get_stride(h, hstride);
2199 int i[CCV_NNC_MAX_DIM(2) + 2];
2200 float* const gp = g->data.f32;
2201 float* const bp = b->data.f32;
2202 float* const hp = h->data.f32;
2203 const int count = dim[2] * dim[3];
2204 const float min = cmd.info.clamp.min;
2205 const float max = cmd.info.clamp.max;
2206 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 2206, __extension__ __PRETTY_FUNCTION__
); }))
;
2207 if (isnan(min)__builtin_isnan (min))
2208 {
2209 if (gstride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
2210 {
2211 // Special casing if the ginc[3] is the same as dim[3]
2212 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2213 {
2214 float* gp0 = gp + i[0] * gstride[0];
2215 float* bp0 = bp + i[0] * bstride[0];
2216 float* hp0 = hp + i[0] * hstride[0];
2217 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2218 {
2219 for (x = 0; x < count; x++)
2220 hp0[x] = bp0[x] >= max ? 0 : gp0[x];
2221 gp0 += gstride[1];
2222 bp0 += bstride[1];
2223 hp0 += hstride[1];
2224 }
2225 }
2226 return CCV_NNC_EXEC_SUCCESS;
2227 }
2228 // Non-optimal case, need to do skip copy.
2229 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2230 {
2231 float* const gp0 = gp + i[0] * gstride[0];
2232 float* const bp0 = bp + i[0] * bstride[0];
2233 float* const hp0 = hp + i[0] * hstride[0];
2234 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2235 {
2236 float* gp1 = gp0 + i[1] * gstride[1];
2237 float* bp1 = bp0 + i[1] * bstride[1];
2238 float* hp1 = hp0 + i[1] * hstride[1];
2239 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2240 {
2241 for (x = 0; x < dim[3]; x++)
2242 hp1[x] = bp1[x] >= max ? 0 : gp1[x];
2243 gp1 += gstride[2];
2244 bp1 += bstride[2];
2245 hp1 += hstride[2];
2246 }
2247 }
2248 }
2249 } else if (isnan(max)__builtin_isnan (max)) {
2250 if (gstride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
2251 {
2252 // Special casing if the ginc[3] is the same as dim[3]
2253 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2254 {
2255 float* gp0 = gp + i[0] * gstride[0];
2256 float* bp0 = bp + i[0] * bstride[0];
2257 float* hp0 = hp + i[0] * hstride[0];
2258 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2259 {
2260 for (x = 0; x < count; x++)
2261 hp0[x] = bp0[x] <= min ? 0 : gp0[x];
2262 gp0 += gstride[1];
2263 bp0 += bstride[1];
2264 hp0 += hstride[1];
2265 }
2266 }
2267 return CCV_NNC_EXEC_SUCCESS;
2268 }
2269 // Non-optimal case, need to do skip copy.
2270 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2271 {
2272 float* const gp0 = gp + i[0] * gstride[0];
2273 float* const bp0 = bp + i[0] * bstride[0];
2274 float* const hp0 = hp + i[0] * hstride[0];
2275 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2276 {
2277 float* gp1 = gp0 + i[1] * gstride[1];
2278 float* bp1 = bp0 + i[1] * bstride[1];
2279 float* hp1 = hp0 + i[1] * hstride[1];
2280 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2281 {
2282 for (x = 0; x < dim[3]; x++)
2283 hp1[x] = bp1[x] <= min ? 0 : gp1[x];
2284 gp1 += gstride[2];
2285 bp1 += bstride[2];
2286 hp1 += hstride[2];
2287 }
2288 }
2289 }
2290 } else {
2291 if (gstride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
2292 {
2293 // Special casing if the ginc[3] is the same as dim[3]
2294 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2295 {
2296 float* gp0 = gp + i[0] * gstride[0];
2297 float* bp0 = bp + i[0] * bstride[0];
2298 float* hp0 = hp + i[0] * hstride[0];
2299 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2300 {
2301 for (x = 0; x < count; x++)
2302 hp0[x] = (bp0[x] >= max || bp0[x] <= min) ? 0 : gp0[x];
2303 gp0 += gstride[1];
2304 bp0 += bstride[1];
2305 hp0 += hstride[1];
2306 }
2307 }
2308 return CCV_NNC_EXEC_SUCCESS;
2309 }
2310 // Non-optimal case, need to do skip copy.
2311 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2312 {
2313 float* const gp0 = gp + i[0] * gstride[0];
2314 float* const bp0 = bp + i[0] * bstride[0];
2315 float* const hp0 = hp + i[0] * hstride[0];
2316 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2317 {
2318 float* gp1 = gp0 + i[1] * gstride[1];
2319 float* bp1 = bp0 + i[1] * bstride[1];
2320 float* hp1 = hp0 + i[1] * hstride[1];
2321 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2322 {
2323 for (x = 0; x < dim[3]; x++)
2324 hp1[x] = (bp1[x] >= max || bp1[x] <= min) ? 0 : gp1[x];
2325 gp1 += gstride[2];
2326 bp1 += bstride[2];
2327 hp1 += hstride[2];
2328 }
2329 }
2330 }
2331 }
2332 } else {
2333 if (!CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
2334 {
2335 // Super optimal case, just do one for-loop for sum.
2336 const int tensor_count = ccv_nnc_tensor_count(h->info);
2337 if (isnan(min)__builtin_isnan (min))
2338 {
2339 for (x = 0; x < tensor_count; x++)
2340 h->data.f32[x] = b->data.f32[x] >= max ? 0 : 1;
2341 } else if (isnan(max)__builtin_isnan (max)) {
2342 for (x = 0; x < tensor_count; x++)
2343 h->data.f32[x] = b->data.f32[x] <= min ? 0 : 1;
2344 } else {
2345 for (x = 0; x < tensor_count; x++)
2346 h->data.f32[x] = (b->data.f32[x] >= max || b->data.f32[x] <= min) ? 0 : 1;
2347 }
2348 return CCV_NNC_EXEC_SUCCESS;
2349 }
2350 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 2350, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
2351 ccv_nnc_tensor_view_get_stride(b, bstride);
2352 ccv_nnc_tensor_view_get_stride(h, hstride);
2353 int i[CCV_NNC_MAX_DIM(2) + 2];
2354 float* const bp = b->data.f32;
2355 float* const hp = h->data.f32;
2356 const int count = dim[2] * dim[3];
2357 const float min = cmd.info.clamp.min;
2358 const float max = cmd.info.clamp.max;
2359 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 2359, __extension__ __PRETTY_FUNCTION__
); }))
;
2360 if (isnan(min)__builtin_isnan (min))
2361 {
2362 if (bstride[2] == dim[3] && hstride[2] == dim[3])
2363 {
2364 // Special casing if the binc[3] is the same as dim[3]
2365 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2366 {
2367 float* bp0 = bp + i[0] * bstride[0];
2368 float* hp0 = hp + i[0] * hstride[0];
2369 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2370 {
2371 for (x = 0; x < count; x++)
2372 hp0[x] = bp0[x] >= max ? 0 : 1;
2373 bp0 += bstride[1];
2374 hp0 += hstride[1];
2375 }
2376 }
2377 return CCV_NNC_EXEC_SUCCESS;
2378 }
2379 // Non-optimal case, need to do skip copy.
2380 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2381 {
2382 float* const bp0 = bp + i[0] * bstride[0];
2383 float* const hp0 = hp + i[0] * hstride[0];
2384 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2385 {
2386 float* bp1 = bp0 + i[1] * bstride[1];
2387 float* hp1 = hp0 + i[1] * hstride[1];
2388 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2389 {
2390 for (x = 0; x < dim[3]; x++)
2391 hp1[x] = bp1[x] >= max ? 0 : 1;
2392 bp1 += bstride[2];
2393 hp1 += hstride[2];
2394 }
2395 }
2396 }
2397 } else if (isnan(max)__builtin_isnan (max)) {
2398 if (bstride[2] == dim[3] && hstride[2] == dim[3])
2399 {
2400 // Special casing if the binc[3] is the same as dim[3]
2401 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2402 {
2403 float* bp0 = bp + i[0] * bstride[0];
2404 float* hp0 = hp + i[0] * hstride[0];
2405 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2406 {
2407 for (x = 0; x < count; x++)
2408 hp0[x] = bp0[x] <= min ? 0 : 1;
2409 bp0 += bstride[1];
2410 hp0 += hstride[1];
2411 }
2412 }
2413 return CCV_NNC_EXEC_SUCCESS;
2414 }
2415 // Non-optimal case, need to do skip copy.
2416 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2417 {
2418 float* const bp0 = bp + i[0] * bstride[0];
2419 float* const hp0 = hp + i[0] * hstride[0];
2420 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2421 {
2422 float* bp1 = bp0 + i[1] * bstride[1];
2423 float* hp1 = hp0 + i[1] * hstride[1];
2424 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2425 {
2426 for (x = 0; x < dim[3]; x++)
2427 hp1[x] = bp1[x] <= min ? 0 : 1;
2428 bp1 += bstride[2];
2429 hp1 += hstride[2];
2430 }
2431 }
2432 }
2433 } else {
2434 if (bstride[2] == dim[3] && hstride[2] == dim[3])
2435 {
2436 // Special casing if the binc[3] is the same as dim[3]
2437 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2438 {
2439 float* bp0 = bp + i[0] * bstride[0];
2440 float* hp0 = hp + i[0] * hstride[0];
2441 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2442 {
2443 for (x = 0; x < count; x++)
2444 hp0[x] = (bp0[x] >= max || bp0[x] <= min) ? 0 : 1;
2445 bp0 += bstride[1];
2446 hp0 += hstride[1];
2447 }
2448 }
2449 return CCV_NNC_EXEC_SUCCESS;
2450 }
2451 // Non-optimal case, need to do skip copy.
2452 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2453 {
2454 float* const bp0 = bp + i[0] * bstride[0];
2455 float* const hp0 = hp + i[0] * hstride[0];
2456 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2457 {
2458 float* bp1 = bp0 + i[1] * bstride[1];
2459 float* hp1 = hp0 + i[1] * hstride[1];
2460 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2461 {
2462 for (x = 0; x < dim[3]; x++)
2463 hp1[x] = (bp1[x] >= max || bp1[x] <= min) ? 0 : 1;
2464 bp1 += bstride[2];
2465 hp1 += hstride[2];
2466 }
2467 }
2468 }
2469 }
2470 }
2471 return CCV_NNC_EXEC_SUCCESS;
2472}
2473
2474REGISTER_COMMAND_BACKEND(CCV_NNC_EWSUM_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSUM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2475{
2476 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2477 registry->tensor_datatypes = CCV_32F;
2478 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2479 registry->algorithms = 1;
2480 registry->exec = _ccv_nnc_ewsum_forw;
2481}
2482
2483REGISTER_COMMAND_BACKEND(CCV_NNC_EWSUM_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSUM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2484{
2485 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2486 registry->tensor_datatypes = CCV_32F;
2487 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2488 registry->algorithms = 1;
2489 registry->exec = _ccv_nnc_ewsum_back;
2490}
2491
2492REGISTER_COMMAND_BACKEND(CCV_NNC_EWPROD_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWPROD_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2493{
2494 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2495 registry->tensor_datatypes = CCV_32F;
2496 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2497 registry->algorithms = 1;
2498 registry->exec = _ccv_nnc_ewprod_forw;
2499}
2500
2501REGISTER_COMMAND_BACKEND(CCV_NNC_EWPROD_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWPROD_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2502{
2503 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2504 registry->tensor_datatypes = CCV_32F;
2505 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2506 registry->algorithms = 1;
2507 registry->exec = _ccv_nnc_ewprod_back;
2508}
2509
2510REGISTER_COMMAND_BACKEND(CCV_NNC_EWDIV_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWDIV_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2511{
2512 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2513 registry->tensor_datatypes = CCV_32F;
2514 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2515 registry->algorithms = 1;
2516 registry->exec = _ccv_nnc_ewdiv_forw;
2517}
2518
2519REGISTER_COMMAND_BACKEND(CCV_NNC_EWDIV_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWDIV_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2520{
2521 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2522 registry->tensor_datatypes = CCV_32F;
2523 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2524 registry->algorithms = 1;
2525 registry->exec = _ccv_nnc_ewdiv_back;
2526}
2527
2528REGISTER_COMMAND_BACKEND(CCV_NNC_EWEXP_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWEXP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2529{
2530 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2531 registry->tensor_datatypes = CCV_32F;
2532 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2533 registry->algorithms = 1;
2534 registry->exec = _ccv_nnc_ewexp_forw;
2535}
2536
2537REGISTER_COMMAND_BACKEND(CCV_NNC_EWEXP_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWEXP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2538{
2539 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2540 registry->tensor_datatypes = CCV_32F;
2541 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2542 registry->algorithms = 1;
2543 registry->exec = _ccv_nnc_ewexp_back;
2544}
2545
2546REGISTER_COMMAND_BACKEND(CCV_NNC_EWSOFTPLUS_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSOFTPLUS_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2547{
2548 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2549 registry->tensor_datatypes = CCV_32F;
2550 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2551 registry->algorithms = 1;
2552 registry->exec = _ccv_nnc_ewsoftplus_forw;
2553}
2554
2555REGISTER_COMMAND_BACKEND(CCV_NNC_EWSOFTPLUS_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSOFTPLUS_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2556{
2557 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2558 registry->tensor_datatypes = CCV_32F;
2559 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2560 registry->algorithms = 1;
2561 registry->exec = _ccv_nnc_ewsoftplus_back;
2562}
2563
2564REGISTER_COMMAND_BACKEND(CCV_NNC_EWPOW_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWPOW_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2565{
2566 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2567 registry->tensor_datatypes = CCV_32F;
2568 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2569 registry->algorithms = 1;
2570 registry->exec = _ccv_nnc_ewpow_forw;
2571}
2572
2573REGISTER_COMMAND_BACKEND(CCV_NNC_EWPOW_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWPOW_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2574{
2575 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2576 registry->tensor_datatypes = CCV_32F;
2577 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2578 registry->algorithms = 1;
2579 registry->exec = _ccv_nnc_ewpow_back;
2580}
2581
2582REGISTER_COMMAND_BACKEND(CCV_NNC_EWLOG_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWLOG_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2583{
2584 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2585 registry->tensor_datatypes = CCV_32F;
2586 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2587 registry->algorithms = 1;
2588 registry->exec = _ccv_nnc_ewlog_forw;
2589}
2590
2591REGISTER_COMMAND_BACKEND(CCV_NNC_EWLOG_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWLOG_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2592{
2593 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2594 registry->tensor_datatypes = CCV_32F;
2595 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2596 registry->algorithms = 1;
2597 registry->exec = _ccv_nnc_ewlog_back;
2598}
2599
2600REGISTER_COMMAND_BACKEND(CCV_NNC_EWSQRT_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSQRT_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2601{
2602 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2603 registry->tensor_datatypes = CCV_32F;
2604 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2605 registry->algorithms = 1;
2606 registry->exec = _ccv_nnc_ewsqrt_forw;
2607}
2608
2609REGISTER_COMMAND_BACKEND(CCV_NNC_EWSQRT_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSQRT_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2610{
2611 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2612 registry->tensor_datatypes = CCV_32F;
2613 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2614 registry->algorithms = 1;
2615 registry->exec = _ccv_nnc_ewsqrt_back;
2616}
2617
2618REGISTER_COMMAND_BACKEND(CCV_NNC_EWSIN_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSIN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2619{
2620 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2621 registry->tensor_datatypes = CCV_32F;
2622 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2623 registry->algorithms = 1;
2624 registry->exec = _ccv_nnc_ewsin_forw;
2625}
2626
2627REGISTER_COMMAND_BACKEND(CCV_NNC_EWSIN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSIN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2628{
2629 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2630 registry->tensor_datatypes = CCV_32F;
2631 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2632 registry->algorithms = 1;
2633 registry->exec = _ccv_nnc_ewsin_back;
2634}
2635
2636REGISTER_COMMAND_BACKEND(CCV_NNC_EWCOS_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWCOS_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2637{
2638 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2639 registry->tensor_datatypes = CCV_32F;
2640 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2641 registry->algorithms = 1;
2642 registry->exec = _ccv_nnc_ewcos_forw;
2643}
2644
2645REGISTER_COMMAND_BACKEND(CCV_NNC_EWCOS_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWCOS_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2646{
2647 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2648 registry->tensor_datatypes = CCV_32F;
2649 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2650 registry->algorithms = 1;
2651 registry->exec = _ccv_nnc_ewcos_back;
2652}
2653
2654REGISTER_COMMAND_BACKEND(CCV_NNC_EWABS_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWABS_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2655{
2656 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2657 registry->tensor_datatypes = CCV_32F;
2658 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2659 registry->algorithms = 1;
2660 registry->exec = _ccv_nnc_ewabs_forw;
2661}
2662
2663REGISTER_COMMAND_BACKEND(CCV_NNC_EWABS_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWABS_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2664{
2665 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2666 registry->tensor_datatypes = CCV_32F;
2667 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2668 registry->algorithms = 1;
2669 registry->exec = _ccv_nnc_ewabs_back;
2670}
2671
2672REGISTER_COMMAND_BACKEND(CCV_NNC_CLAMP_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_CLAMP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2673{
2674 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2675 registry->tensor_datatypes = CCV_32F;
2676 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2677 registry->algorithms = 1;
2678 registry->exec = _ccv_nnc_clamp_forw;
2679}
2680
2681REGISTER_COMMAND_BACKEND(CCV_NNC_CLAMP_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_CLAMP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2682{
2683 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2684 registry->tensor_datatypes = CCV_32F;
2685 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2686 registry->algorithms = 1;
2687 registry->exec = _ccv_nnc_clamp_back;
2688}