Bug Summary

File:nnc/cmd/ew/ccv_nnc_ew_cpu_ref.c
Warning:line 1805, column 32
The right operand of '*' is a garbage value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_ew_cpu_ref.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -resource-dir /usr/local/lib/clang/19 -I ../../ -I .. -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2026-03-16-132208-2687886-1 -x c ew/ccv_nnc_ew_cpu_ref.c
1#include "ccv.h"
2#include "ccv_internal.h"
3#include "nnc/ccv_nnc.h"
4#include "nnc/ccv_nnc_easy.h"
5#include "nnc/ccv_nnc_internal.h"
6#ifdef USE_OPENMP
7#include <omp.h>
8#endif
9#ifdef USE_DISPATCH
10#include <dispatch/dispatch.h>
11#endif
12
13#include "../_ccv_nnc_cpu_ref.h"
14
15void _ccv_nnc_ewsum_forw_cpu_ref_f32(ccv_nnc_tensor_view_t* const* const inputs, const int input_size, ccv_nnc_tensor_view_t* const* const outputs, const int output_size)
16{
17 if (input_size == 1 && output_size == 1)
18 {
19 _ccv_nnc_tensor_transfer_cpu_ref_f32(inputs[0], outputs[0]);
20 return;
21 }
22 // Assuming this is float 32.
23 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
24 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
25 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
26 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
27 int x, z;
28 int k = 0;
29 // Bad, I promised this can be inplace operation. Need to first find out if there are share the same pointer first.
30 for (z = 1; z < input_size; z++)
31 {
32 ccv_nnc_tensor_view_t* c = outputs[0];
33 ccv_nnc_tensor_view_t* a = inputs[z];
34 if (c->data.f32 == a->data.f32)
35 {
36 k = z;
37 break;
38 }
39 }
40 for (z = 0; z < input_size - 1; z++)
41 {
42 ccv_nnc_tensor_view_t* c = outputs[0];
43 ccv_nnc_tensor_view_t* a = z > 0 ? c : inputs[k];
44 ccv_nnc_tensor_view_t* b = z >= k ? inputs[z + 1] : inputs[z];
45 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 45, __extension__ __PRETTY_FUNCTION__
); }))
;
46 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 46, __extension__ __PRETTY_FUNCTION__
); }))
;
47 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 47, __extension__ __PRETTY_FUNCTION__
); }))
;
48 ccv_nnc_tensor_view_get_dim(a, dim);
49 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 49, __extension__ __PRETTY_FUNCTION__
); }))
;
50 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 50, __extension__ __PRETTY_FUNCTION__
); }))
;
51 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
52 {
53 // Super optimal case, just do one for-loop for sum.
54 const int tensor_count = ccv_nnc_tensor_count(a->info);
55 for (x = 0; x < tensor_count; x++)
56 c->data.f32[x] = a->data.f32[x] + b->data.f32[x];
57 continue;
58 }
59 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 59, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
60 ccv_nnc_tensor_view_get_stride(a, astride);
61 ccv_nnc_tensor_view_get_stride(b, bstride);
62 ccv_nnc_tensor_view_get_stride(c, cstride);
63 int i[CCV_NNC_MAX_DIM(2) + 2];
64 float* const ap = a->data.f32;
65 float* const bp = b->data.f32;
66 float* const cp = c->data.f32;
67 const int count = dim[2] * dim[3];
68 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && astride[3] == 1 && bstride[3] == 1 && cstride[3] == 1)
69 {
70 // Special casing if the ainc[3] is the same as dim[3] (do memcpy for the last two dim)
71 for (i[0] = 0; i[0] < dim[0]; i[0]++)
72 {
73 float* ap0 = ap + i[0] * astride[0];
74 float* bp0 = bp + i[0] * bstride[0];
75 float* cp0 = cp + i[0] * cstride[0];
76 for (i[1] = 0; i[1] < dim[1]; i[1]++)
77 {
78 for (x = 0; x < count; x++)
79 cp0[x] = ap0[x] + bp0[x];
80 ap0 += astride[1];
81 bp0 += bstride[1];
82 cp0 += cstride[1];
83 }
84 }
85 continue;
86 }
87 // Non-optimal case, need to do skip copy.
88 for (i[0] = 0; i[0] < dim[0]; i[0]++)
89 {
90 float* const ap0 = ap + i[0] * astride[0];
91 float* const bp0 = bp + i[0] * bstride[0];
92 float* const cp0 = cp + i[0] * cstride[0];
93 for (i[1] = 0; i[1] < dim[1]; i[1]++)
94 {
95 float* ap1 = ap0 + i[1] * astride[1];
96 float* bp1 = bp0 + i[1] * bstride[1];
97 float* cp1 = cp0 + i[1] * cstride[1];
98 for (i[2] = 0; i[2] < dim[2]; i[2]++)
99 {
100 for (x = 0; x < dim[3]; x++)
101 cp1[x * cstride[3]] = ap1[x * astride[3]] + bp1[x * bstride[3]];
102 ap1 += astride[2];
103 bp1 += bstride[2];
104 cp1 += cstride[2];
105 }
106 }
107 }
108 }
109}
110
111void _ccv_nnc_ewsum_forw_cpu_ref_i32(ccv_nnc_tensor_view_t* const* const inputs, const int input_size, ccv_nnc_tensor_view_t* const* const outputs, const int output_size)
112{
113 if (input_size == 1 && output_size == 1)
114 {
115 _ccv_nnc_tensor_transfer_cpu_ref_f32(inputs[0], outputs[0]);
116 return;
117 }
118 // Assuming this is float 32.
119 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
120 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
121 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
122 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
123 int x, z;
124 int k = 0;
125 // Bad, I promised this can be inplace operation. Need to first find out if there are share the same pointer first.
126 for (z = 1; z < input_size; z++)
127 {
128 ccv_nnc_tensor_view_t* c = outputs[0];
129 ccv_nnc_tensor_view_t* a = inputs[z];
130 if (c->data.f32 == a->data.f32)
131 {
132 k = z;
133 break;
134 }
135 }
136 for (z = 0; z < input_size - 1; z++)
137 {
138 ccv_nnc_tensor_view_t* c = outputs[0];
139 ccv_nnc_tensor_view_t* a = z > 0 ? c : inputs[k];
140 ccv_nnc_tensor_view_t* b = z >= k ? inputs[z + 1] : inputs[z];
141 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 141, __extension__ __PRETTY_FUNCTION__
); }))
;
142 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 142, __extension__ __PRETTY_FUNCTION__
); }))
;
143 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 143, __extension__ __PRETTY_FUNCTION__
); }))
;
144 ccv_nnc_tensor_view_get_dim(a, dim);
145 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 145, __extension__ __PRETTY_FUNCTION__
); }))
;
146 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 146, __extension__ __PRETTY_FUNCTION__
); }))
;
147 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
148 {
149 // Super optimal case, just do one for-loop for sum.
150 const int tensor_count = ccv_nnc_tensor_count(a->info);
151 for (x = 0; x < tensor_count; x++)
152 c->data.f32[x] = a->data.f32[x] + b->data.f32[x];
153 continue;
154 }
155 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 155, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
156 ccv_nnc_tensor_view_get_stride(a, astride);
157 ccv_nnc_tensor_view_get_stride(b, bstride);
158 ccv_nnc_tensor_view_get_stride(c, cstride);
159 int i[CCV_NNC_MAX_DIM(2) + 2];
160 int* const ap = a->data.i32;
161 int* const bp = b->data.i32;
162 int* const cp = c->data.i32;
163 const int count = dim[2] * dim[3];
164 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && astride[3] == 1 && bstride[3] == 1 && cstride[3] == 1)
165 {
166 // Special casing if the ainc[3] is the same as dim[3] (do memcpy for the last two dim)
167 for (i[0] = 0; i[0] < dim[0]; i[0]++)
168 {
169 int* ap0 = ap + i[0] * astride[0];
170 int* bp0 = bp + i[0] * bstride[0];
171 int* cp0 = cp + i[0] * cstride[0];
172 for (i[1] = 0; i[1] < dim[1]; i[1]++)
173 {
174 for (x = 0; x < count; x++)
175 cp0[x] = ap0[x] + bp0[x];
176 ap0 += astride[1];
177 bp0 += bstride[1];
178 cp0 += cstride[1];
179 }
180 }
181 continue;
182 }
183 // Non-optimal case, need to do skip copy.
184 for (i[0] = 0; i[0] < dim[0]; i[0]++)
185 {
186 int* const ap0 = ap + i[0] * astride[0];
187 int* const bp0 = bp + i[0] * bstride[0];
188 int* const cp0 = cp + i[0] * cstride[0];
189 for (i[1] = 0; i[1] < dim[1]; i[1]++)
190 {
191 int* ap1 = ap0 + i[1] * astride[1];
192 int* bp1 = bp0 + i[1] * bstride[1];
193 int* cp1 = cp0 + i[1] * cstride[1];
194 for (i[2] = 0; i[2] < dim[2]; i[2]++)
195 {
196 for (x = 0; x < dim[3]; x++)
197 cp1[x * cstride[3]] = ap1[x * astride[3]] + bp1[x * bstride[3]];
198 ap1 += astride[2];
199 bp1 += bstride[2];
200 cp1 += cstride[2];
201 }
202 }
203 }
204 }
205}
206
207static int _ccv_nnc_ewsum_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
208{
209 if (outputs[0]->info.datatype == CCV_32S)
210 _ccv_nnc_ewsum_forw_cpu_ref_i32((ccv_nnc_tensor_view_t**)inputs, input_size, (ccv_nnc_tensor_view_t**)outputs, output_size);
211 else
212 _ccv_nnc_ewsum_forw_cpu_ref_f32((ccv_nnc_tensor_view_t**)inputs, input_size, (ccv_nnc_tensor_view_t**)outputs, output_size);
213 return CCV_NNC_EXEC_SUCCESS;
214}
215
216static int _ccv_nnc_ewsum_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
217{
218 // D[x + y + z, x] = 1
219 int i;
220 if (inputs[0] == 0)
221 {
222 // Set them to 1.
223 for (i = 0; i < output_size; i++)
224 if (outputs[i])
225 _ccv_nnc_tensor_set_cpu_ref_f32((ccv_nnc_tensor_view_t*)outputs[i], 1);
226 } else {
227 // Copy over the gradient (If they are not pointing to the same tensor already).
228 for (i = 0; i < output_size; i++)
229 if (outputs[i] && inputs[0]->data.f32 != outputs[i]->data.f32)
230 _ccv_nnc_tensor_transfer_cpu_ref_f32((ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)outputs[i]);
231 }
232 return CCV_NNC_EXEC_SUCCESS;
233}
234
235void _ccv_nnc_ewprod_forw_cpu_ref(ccv_nnc_tensor_view_t* const* const inputs, const int input_size, ccv_nnc_tensor_view_t* const* const outputs, const int output_size)
236{
237 if (input_size == 1 && output_size == 1)
238 {
239 _ccv_nnc_tensor_transfer_cpu_ref_f32(inputs[0], outputs[0]);
240 return;
241 }
242 // Assuming this is float 32.
243 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
244 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
245 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
246 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
247 int x, z;
248 int k = 0;
249 // Bad, I promised this can be inplace operation. Need to first find out if there are share the same pointer first.
250 for (z = 1; z < input_size; z++)
251 {
252 ccv_nnc_tensor_view_t* c = outputs[0];
253 ccv_nnc_tensor_view_t* a = inputs[z];
254 if (c->data.f32 == a->data.f32)
255 {
256 k = z;
257 break;
258 }
259 }
260 for (z = 0; z < input_size - 1; z++)
261 {
262 ccv_nnc_tensor_view_t* c = outputs[0];
263 ccv_nnc_tensor_view_t* a = z > 0 ? c : inputs[k];
264 ccv_nnc_tensor_view_t* b = z >= k ? inputs[z + 1] : inputs[z];
265 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 265, __extension__ __PRETTY_FUNCTION__
); }))
;
266 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 266, __extension__ __PRETTY_FUNCTION__
); }))
;
267 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 267, __extension__ __PRETTY_FUNCTION__
); }))
;
268 ccv_nnc_tensor_view_get_dim(a, dim);
269 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 269, __extension__ __PRETTY_FUNCTION__
); }))
;
270 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 270, __extension__ __PRETTY_FUNCTION__
); }))
;
271 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
272 {
273 // Super optimal case, just do one for-loop for sum.
274 const int tensor_count = ccv_nnc_tensor_count(a->info);
275 for (x = 0; x < tensor_count; x++)
276 c->data.f32[x] = a->data.f32[x] * b->data.f32[x];
277 continue;
278 }
279 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 279, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
280 ccv_nnc_tensor_view_get_stride(a, astride);
281 ccv_nnc_tensor_view_get_stride(b, bstride);
282 ccv_nnc_tensor_view_get_stride(c, cstride);
283 int i[CCV_NNC_MAX_DIM(2) + 2];
284 float* const ap = a->data.f32;
285 float* const bp = b->data.f32;
286 float* const cp = c->data.f32;
287 const int count = dim[2] * dim[3];
288 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3])
289 {
290 // Special casing if the ainc[3] is the same as dim[3]
291 for (i[0] = 0; i[0] < dim[0]; i[0]++)
292 {
293 float* ap0 = ap + i[0] * astride[0];
294 float* bp0 = bp + i[0] * bstride[0];
295 float* cp0 = cp + i[0] * cstride[0];
296 for (i[1] = 0; i[1] < dim[1]; i[1]++)
297 {
298 for (x = 0; x < count; x++)
299 cp0[x] = ap0[x] * bp0[x];
300 ap0 += astride[1];
301 bp0 += bstride[1];
302 cp0 += cstride[1];
303 }
304 }
305 continue;
306 }
307 // Non-optimal case, need to do skip copy.
308 for (i[0] = 0; i[0] < dim[0]; i[0]++)
309 {
310 float* const ap0 = ap + i[0] * astride[0];
311 float* const bp0 = bp + i[0] * bstride[0];
312 float* const cp0 = cp + i[0] * cstride[0];
313 for (i[1] = 0; i[1] < dim[1]; i[1]++)
314 {
315 float* ap1 = ap0 + i[1] * astride[1];
316 float* bp1 = bp0 + i[1] * bstride[1];
317 float* cp1 = cp0 + i[1] * cstride[1];
318 for (i[2] = 0; i[2] < dim[2]; i[2]++)
319 {
320 for (x = 0; x < dim[3]; x++)
321 cp1[x] = ap1[x] * bp1[x];
322 ap1 += astride[2];
323 bp1 += bstride[2];
324 cp1 += cstride[2];
325 }
326 }
327 }
328 }
329}
330
331static int _ccv_nnc_ewprod_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
332{
333 _ccv_nnc_ewprod_forw_cpu_ref((ccv_nnc_tensor_view_t**)inputs, input_size, (ccv_nnc_tensor_view_t**)outputs, output_size);
334 return CCV_NNC_EXEC_SUCCESS;
335}
336
337static int _ccv_nnc_ewprod_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
338{
339 // D[x * y * z, x] = y * z
340 // Assuming this is float 32.
341 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
342 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
343 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
344 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
345 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
346 int x, z;
347 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
348 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[output_size + 1];
349 if (g == 0)
350 {
351 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 351, __extension__ __PRETTY_FUNCTION__
); }))
;
352 ccv_nnc_tensor_view_get_dim(b, dim);
353 ccv_nnc_tensor_view_get_stride(b, bstride);
354 for (z = 0; z < output_size; z++)
355 {
356 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[z + 1];
357 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[z];
358 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 358, __extension__ __PRETTY_FUNCTION__
); }))
;
359 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 359, __extension__ __PRETTY_FUNCTION__
); }))
;
360 assert(ccv_nnc_tensor_view_check_dim(a, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(a, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(a, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(a, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 360, __extension__ __PRETTY_FUNCTION__
); }))
;
361 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 361, __extension__ __PRETTY_FUNCTION__
); }))
;
362 ccv_nnc_tensor_view_get_stride(a, astride);
363 ccv_nnc_tensor_view_get_stride(h, hstride);
364 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
365 {
366 // Super optimal case, just do one for-loop for sum.
367 const int tensor_count = ccv_nnc_tensor_count(b->info);
368 for (x = 0; x < tensor_count; x++)
369 h->data.f32[x] = b->data.f32[x] / a->data.f32[x];
370 continue;
371 }
372 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 372, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
373 int i[CCV_NNC_MAX_DIM(2) + 2];
374 float* const ap = a->data.f32;
375 float* const bp = b->data.f32;
376 float* const hp = h->data.f32;
377 const int count = dim[2] * dim[3];
378 if (astride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
379 {
380 // Special casing if the ainc[3] is the same as dim[3]
381 for (i[0] = 0; i[0] < dim[0]; i[0]++)
382 {
383 float* ap0 = ap + i[0] * astride[0];
384 float* bp0 = bp + i[0] * bstride[0];
385 float* hp0 = hp + i[0] * hstride[0];
386 for (i[1] = 0; i[1] < dim[1]; i[1]++)
387 {
388 for (x = 0; x < count; x++)
389 hp0[x] = bp0[x] / ap0[x];
390 ap0 += astride[1];
391 bp0 += bstride[1];
392 hp0 += hstride[1];
393 }
394 }
395 continue;
396 }
397 // Non-optimal case, need to do skip copy.
398 for (i[0] = 0; i[0] < dim[0]; i[0]++)
399 {
400 float* const ap0 = ap + i[0] * astride[0];
401 float* const bp0 = bp + i[0] * bstride[0];
402 float* const hp0 = hp + i[0] * hstride[0];
403 for (i[1] = 0; i[1] < dim[1]; i[1]++)
404 {
405 float* ap1 = ap0 + i[1] * astride[1];
406 float* bp1 = bp0 + i[1] * bstride[1];
407 float* hp1 = hp0 + i[1] * hstride[1];
408 for (i[2] = 0; i[2] < dim[2]; i[2]++)
409 {
410 for (x = 0; x < dim[3]; x++)
411 hp1[x] = bp1[x] / ap1[x];
412 ap1 += astride[2];
413 bp1 += bstride[2];
414 hp1 += hstride[2];
415 }
416 }
417 }
418 }
419 } else {
420 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 420, __extension__ __PRETTY_FUNCTION__
); }))
;
421 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 421, __extension__ __PRETTY_FUNCTION__
); }))
;
422 ccv_nnc_tensor_view_get_dim(b, dim);
423 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 423, __extension__ __PRETTY_FUNCTION__
); }))
;
424 ccv_nnc_tensor_view_get_stride(b, bstride);
425 ccv_nnc_tensor_view_get_stride(g, gstride);
426 for (z = 0; z < output_size; z++)
427 {
428 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[z + 1];
429 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[z];
430 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 430, __extension__ __PRETTY_FUNCTION__
); }))
;
431 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 431, __extension__ __PRETTY_FUNCTION__
); }))
;
432 assert(ccv_nnc_tensor_view_check_dim(a, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(a, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(a, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(a, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 432, __extension__ __PRETTY_FUNCTION__
); }))
;
433 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 433, __extension__ __PRETTY_FUNCTION__
); }))
;
434 ccv_nnc_tensor_view_get_stride(a, astride);
435 ccv_nnc_tensor_view_get_stride(h, hstride);
436 if (!CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
437 {
438 // Super optimal case, just do one for-loop for sum.
439 const int tensor_count = ccv_nnc_tensor_count(g->info);
440 for (x = 0; x < tensor_count; x++)
441 h->data.f32[x] = g->data.f32[x] * b->data.f32[x] / a->data.f32[x];
442 continue;
443 }
444 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 444, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
445 int i[CCV_NNC_MAX_DIM(2) + 2];
446 float* const gp = g->data.f32;
447 float* const ap = a->data.f32;
448 float* const bp = b->data.f32;
449 float* const hp = h->data.f32;
450 const int count = dim[2] * dim[3];
451 if (gstride[2] == dim[3] && astride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
452 {
453 // Special casing if the ainc[3] is the same as dim[3]
454 for (i[0] = 0; i[0] < dim[0]; i[0]++)
455 {
456 float* gp0 = gp + i[0] * gstride[0];
457 float* ap0 = ap + i[0] * astride[0];
458 float* bp0 = bp + i[0] * bstride[0];
459 float* hp0 = hp + i[0] * hstride[0];
460 for (i[1] = 0; i[1] < dim[1]; i[1]++)
461 {
462 for (x = 0; x < count; x++)
463 hp0[x] = gp0[x] * bp0[x] / ap0[x];
464 gp0 += gstride[1];
465 ap0 += astride[1];
466 bp0 += bstride[1];
467 hp0 += hstride[1];
468 }
469 }
470 continue;
471 }
472 // Non-optimal case, need to do skip copy.
473 for (i[0] = 0; i[0] < dim[0]; i[0]++)
474 {
475 float* const gp0 = gp + i[0] * gstride[0];
476 float* const ap0 = ap + i[0] * astride[0];
477 float* const bp0 = bp + i[0] * bstride[0];
478 float* const hp0 = hp + i[0] * hstride[0];
479 for (i[1] = 0; i[1] < dim[1]; i[1]++)
480 {
481 float* gp1 = gp0 + i[1] * gstride[1];
482 float* ap1 = ap0 + i[1] * astride[1];
483 float* bp1 = bp0 + i[1] * bstride[1];
484 float* hp1 = hp0 + i[1] * hstride[1];
485 for (i[2] = 0; i[2] < dim[2]; i[2]++)
486 {
487 for (x = 0; x < dim[3]; x++)
488 hp1[x] = gp1[x] * bp1[x] / ap1[x];
489 gp1 += gstride[2];
490 ap1 += astride[2];
491 bp1 += bstride[2];
492 hp1 += hstride[2];
493 }
494 }
495 }
496 }
497 }
498 return CCV_NNC_EXEC_SUCCESS;
499}
500
501static void _ccv_nnc_ewdiv_forw_cpu_ref(const float p, ccv_nnc_tensor_view_t* const a, ccv_nnc_tensor_view_t* const b, ccv_nnc_tensor_view_t* const c)
502{
503 // Assuming this is float 32.
504 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
505 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
506 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
507 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
508 if (a == 0) // Take 0 as all ones tensor.
509 {
510 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 510, __extension__ __PRETTY_FUNCTION__
); }))
;
511 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 511, __extension__ __PRETTY_FUNCTION__
); }))
;
512 ccv_nnc_tensor_view_get_dim(b, dim);
513 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 513, __extension__ __PRETTY_FUNCTION__
); }))
;
514 int x;
515 if (!CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
516 {
517 // Super optimal case, just do one for-loop for sum.
518 const int tensor_count = ccv_nnc_tensor_count(b->info);
519 for (x = 0; x < tensor_count; x++)
520 c->data.f32[x] = p / b->data.f32[x];
521 return;
522 }
523 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 523, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
524 ccv_nnc_tensor_view_get_stride(b, bstride);
525 ccv_nnc_tensor_view_get_stride(c, cstride);
526 int i[CCV_NNC_MAX_DIM(2) + 2];
527 float* const bp = b->data.f32;
528 float* const cp = c->data.f32;
529 const int count = dim[2] * dim[3];
530 if (bstride[2] == dim[3] && cstride[2] == dim[3])
531 {
532 // Special casing if the ainc[3] is the same as dim[3]
533 for (i[0] = 0; i[0] < dim[0]; i[0]++)
534 {
535 float* bp0 = bp + i[0] * bstride[0];
536 float* cp0 = cp + i[0] * cstride[0];
537 for (i[1] = 0; i[1] < dim[1]; i[1]++)
538 {
539 for (x = 0; x < count; x++)
540 cp0[x] = p / bp0[x];
541 bp0 += bstride[1];
542 cp0 += cstride[1];
543 }
544 }
545 return;
546 }
547 // Non-optimal case, need to do skip copy.
548 for (i[0] = 0; i[0] < dim[0]; i[0]++)
549 {
550 float* const bp0 = bp + i[0] * bstride[0];
551 float* const cp0 = cp + i[0] * cstride[0];
552 for (i[1] = 0; i[1] < dim[1]; i[1]++)
553 {
554 float* bp1 = bp0 + i[1] * bstride[1];
555 float* cp1 = cp0 + i[1] * cstride[1];
556 for (i[2] = 0; i[2] < dim[2]; i[2]++)
557 {
558 for (x = 0; x < dim[3]; x++)
559 cp1[x] = p / bp1[x];
560 bp1 += bstride[2];
561 cp1 += cstride[2];
562 }
563 }
564 }
565 } else {
566 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 566, __extension__ __PRETTY_FUNCTION__
); }))
;
567 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 567, __extension__ __PRETTY_FUNCTION__
); }))
;
568 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 568, __extension__ __PRETTY_FUNCTION__
); }))
;
569 ccv_nnc_tensor_view_get_dim(a, dim);
570 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 570, __extension__ __PRETTY_FUNCTION__
); }))
;
571 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 571, __extension__ __PRETTY_FUNCTION__
); }))
;
572 int x;
573 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
574 {
575 // Super optimal case, just do one for-loop for sum.
576 const int tensor_count = ccv_nnc_tensor_count(a->info);
577 for (x = 0; x < tensor_count; x++)
578 c->data.f32[x] = p * a->data.f32[x] / b->data.f32[x];
579 return;
580 }
581 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 581, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
582 ccv_nnc_tensor_view_get_stride(a, astride);
583 ccv_nnc_tensor_view_get_stride(b, bstride);
584 ccv_nnc_tensor_view_get_stride(c, cstride);
585 int i[CCV_NNC_MAX_DIM(2) + 2];
586 float* const ap = a->data.f32;
587 float* const bp = b->data.f32;
588 float* const cp = c->data.f32;
589 const int count = dim[2] * dim[3];
590 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3])
591 {
592 // Special casing if the ainc[3] is the same as dim[3]
593 for (i[0] = 0; i[0] < dim[0]; i[0]++)
594 {
595 float* ap0 = ap + i[0] * astride[0];
596 float* bp0 = bp + i[0] * bstride[0];
597 float* cp0 = cp + i[0] * cstride[0];
598 for (i[1] = 0; i[1] < dim[1]; i[1]++)
599 {
600 for (x = 0; x < count; x++)
601 cp0[x] = p * ap0[x] / bp0[x];
602 ap0 += astride[1];
603 bp0 += bstride[1];
604 cp0 += cstride[1];
605 }
606 }
607 return;
608 }
609 // Non-optimal case, need to do skip copy.
610 for (i[0] = 0; i[0] < dim[0]; i[0]++)
611 {
612 float* const ap0 = ap + i[0] * astride[0];
613 float* const bp0 = bp + i[0] * bstride[0];
614 float* const cp0 = cp + i[0] * cstride[0];
615 for (i[1] = 0; i[1] < dim[1]; i[1]++)
616 {
617 float* ap1 = ap0 + i[1] * astride[1];
618 float* bp1 = bp0 + i[1] * bstride[1];
619 float* cp1 = cp0 + i[1] * cstride[1];
620 for (i[2] = 0; i[2] < dim[2]; i[2]++)
621 {
622 for (x = 0; x < dim[3]; x++)
623 cp1[x] = p * ap1[x] / bp1[x];
624 ap1 += astride[2];
625 bp1 += bstride[2];
626 cp1 += cstride[2];
627 }
628 }
629 }
630 }
631}
632
633static int _ccv_nnc_ewdiv_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
634{
635 _ccv_nnc_ewdiv_forw_cpu_ref(1, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[1], (ccv_nnc_tensor_view_t*)outputs[0]);
636 return CCV_NNC_EXEC_SUCCESS;
637}
638
639static int _ccv_nnc_ewdiv_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
640{
641 // D[x / y, x] = 1 / y, D[x / y, y] = -x / y^2
642 if (output_size == 1 || outputs[1] == 0)
643 {
644 // When we only need D[x / y, x]
645 _ccv_nnc_ewdiv_forw_cpu_ref(1, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[2], (ccv_nnc_tensor_view_t*)outputs[0]);
646 return CCV_NNC_EXEC_SUCCESS;
647 }
648 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
649 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
650 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
651 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
652 int hastride[CCV_NNC_MAX_DIM_ALLOC(12)];
653 int hbstride[CCV_NNC_MAX_DIM_ALLOC(12)];
654 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
655 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2];
656 ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)inputs[3];
657 ccv_nnc_tensor_view_t* ha = (ccv_nnc_tensor_view_t*)outputs[0];
658 ccv_nnc_tensor_view_t* hb = (ccv_nnc_tensor_view_t*)outputs[1];
659 if (g == 0)
660 {
661 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 661, __extension__ __PRETTY_FUNCTION__
); }))
;
662 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 662, __extension__ __PRETTY_FUNCTION__
); }))
;
663 assert(ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(hb->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(hb->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 663, __extension__ __PRETTY_FUNCTION__
); }))
;
664 ccv_nnc_tensor_view_get_dim(b, dim);
665 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 665, __extension__ __PRETTY_FUNCTION__
); }))
;
666 assert(ccv_nnc_tensor_view_check_dim(hb, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(hb, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(hb, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(hb, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 666, __extension__ __PRETTY_FUNCTION__
); }))
;
667 if (ha)
668 {
669 assert(ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(ha->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(ha->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 669, __extension__ __PRETTY_FUNCTION__
); }))
;
670 assert(ccv_nnc_tensor_view_check_dim(ha, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(ha, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(ha, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(ha, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 670, __extension__ __PRETTY_FUNCTION__
); }))
;
671 }
672 int x;
673 if (!CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW) && (ha == 0 || !CCV_IS_TENSOR_VIEW(ha)((*(int*)(ha)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(hb)((*(int*)(hb)) & CCV_TENSOR_VIEW))
674 {
675 // Super optimal case, just do one for-loop for sum.
676 const int tensor_count = ccv_nnc_tensor_count(b->info);
677 if (ha == 0)
678 {
679 for (x = 0; x < tensor_count; x++)
680 {
681 const float v = 1 / b->data.f32[x];
682 hb->data.f32[x] = -c->data.f32[x] * v;
683 }
684 } else {
685 for (x = 0; x < tensor_count; x++)
686 {
687 const float v = 1 / b->data.f32[x];
688 ha->data.f32[x] = v;
689 hb->data.f32[x] = -c->data.f32[x] * v;
690 }
691 }
692 return CCV_NNC_EXEC_SUCCESS;
693 }
694 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 694, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
695 ccv_nnc_tensor_view_get_stride(b, bstride);
696 ccv_nnc_tensor_view_get_stride(c, cstride);
697 ccv_nnc_tensor_view_get_stride(hb, hbstride);
698 int i[CCV_NNC_MAX_DIM(2) + 2];
699 float* const bp = b->data.f32;
700 float* const cp = c->data.f32;
701 float* const hbp = hb->data.f32;
702 const int count = dim[2] * dim[3];
703 if (ha == 0)
704 {
705 if (bstride[2] == dim[3] && cstride[2] == dim[3] && hbstride[2] == dim[3])
706 {
707 // Special casing if the ainc[3] is the same as dim[3]
708 for (i[0] = 0; i[0] < dim[0]; i[0]++)
709 {
710 float* bp0 = bp + i[0] * bstride[0];
711 float* cp0 = cp + i[0] * cstride[0];
712 float* hbp0 = hbp + i[0] * hbstride[0];
713 for (i[1] = 0; i[1] < dim[1]; i[1]++)
714 {
715 for (x = 0; x < count; x++)
716 {
717 const float v = 1 / bp0[x];
718 hbp0[x] = -cp0[x] * v;
719 }
720 bp0 += bstride[1];
721 cp0 += cstride[1];
722 hbp0 += hbstride[1];
723 }
724 }
725 return CCV_NNC_EXEC_SUCCESS;
726 }
727 // Non-optimal case, need to do skip copy.
728 for (i[0] = 0; i[0] < dim[0]; i[0]++)
729 {
730 float* const bp0 = bp + i[0] * bstride[0];
731 float* const cp0 = cp + i[0] * cstride[0];
732 float* const hbp0 = hbp + i[0] * hbstride[0];
733 for (i[1] = 0; i[1] < dim[1]; i[1]++)
734 {
735 float* bp1 = bp0 + i[1] * bstride[1];
736 float* cp1 = cp0 + i[1] * cstride[1];
737 float* hbp1 = hbp0 + i[1] * hbstride[1];
738 for (i[2] = 0; i[2] < dim[2]; i[2]++)
739 {
740 for (x = 0; x < dim[3]; x++)
741 {
742 const float v = 1 / bp1[x];
743 hbp1[x] = -cp1[x] * v;
744 }
745 bp1 += bstride[2];
746 cp1 += cstride[2];
747 hbp1 += hbstride[2];
748 }
749 }
750 }
751 } else {
752 float* const hap = ha->data.f32;
753 ccv_nnc_tensor_view_get_stride(ha, hastride);
754 if (bstride[2] == dim[3] && cstride[2] == dim[3] && hastride[2] == dim[3] && hbstride[2] == dim[3])
755 {
756 // Special casing if the ainc[3] is the same as dim[3]
757 for (i[0] = 0; i[0] < dim[0]; i[0]++)
758 {
759 float* bp0 = bp + i[0] * bstride[0];
760 float* cp0 = cp + i[0] * cstride[0];
761 float* hap0 = hap + i[0] * hastride[0];
762 float* hbp0 = hbp + i[0] * hbstride[0];
763 for (i[1] = 0; i[1] < dim[1]; i[1]++)
764 {
765 for (x = 0; x < count; x++)
766 {
767 const float v = 1 / bp0[x];
768 hap0[x] = v;
769 hbp0[x] = -cp0[x] * v;
770 }
771 bp0 += bstride[1];
772 cp0 += cstride[1];
773 hap0 += hastride[1];
774 hbp0 += hbstride[1];
775 }
776 }
777 return CCV_NNC_EXEC_SUCCESS;
778 }
779 // Non-optimal case, need to do skip copy.
780 for (i[0] = 0; i[0] < dim[0]; i[0]++)
781 {
782 float* const bp0 = bp + i[0] * bstride[0];
783 float* const cp0 = cp + i[0] * cstride[0];
784 float* const hap0 = hap + i[0] * hastride[0];
785 float* const hbp0 = hbp + i[0] * hbstride[0];
786 for (i[1] = 0; i[1] < dim[1]; i[1]++)
787 {
788 float* bp1 = bp0 + i[1] * bstride[1];
789 float* cp1 = cp0 + i[1] * cstride[1];
790 float* hap1 = hap0 + i[1] * hastride[1];
791 float* hbp1 = hbp0 + i[1] * hbstride[1];
792 for (i[2] = 0; i[2] < dim[2]; i[2]++)
793 {
794 for (x = 0; x < dim[3]; x++)
795 {
796 const float v = 1 / bp1[x];
797 hap1[x] = v;
798 hbp1[x] = -cp1[x] * v;
799 }
800 bp1 += bstride[2];
801 cp1 += cstride[2];
802 hap1 += hastride[2];
803 hbp1 += hbstride[2];
804 }
805 }
806 }
807 }
808 } else {
809 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 809, __extension__ __PRETTY_FUNCTION__
); }))
;
810 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 810, __extension__ __PRETTY_FUNCTION__
); }))
;
811 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 811, __extension__ __PRETTY_FUNCTION__
); }))
;
812 assert(ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(hb->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(hb->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 812, __extension__ __PRETTY_FUNCTION__
); }))
;
813 ccv_nnc_tensor_view_get_dim(b, dim);
814 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 814, __extension__ __PRETTY_FUNCTION__
); }))
;
815 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 815, __extension__ __PRETTY_FUNCTION__
); }))
;
816 assert(ccv_nnc_tensor_view_check_dim(hb, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(hb, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(hb, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(hb, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 816, __extension__ __PRETTY_FUNCTION__
); }))
;
817 if (ha)
818 {
819 assert(ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(ha->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(ha->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 819, __extension__ __PRETTY_FUNCTION__
); }))
;
820 assert(ccv_nnc_tensor_view_check_dim(ha, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(ha, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(ha, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(ha, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 820, __extension__ __PRETTY_FUNCTION__
); }))
;
821 }
822 int x;
823 if (!CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW) && (ha == 0 || !CCV_IS_TENSOR_VIEW(ha)((*(int*)(ha)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(hb)((*(int*)(hb)) & CCV_TENSOR_VIEW))
824 {
825 // Super optimal case, just do one for-loop for sum.
826 const int tensor_count = ccv_nnc_tensor_count(g->info);
827 if (ha == 0)
828 {
829 for (x = 0; x < tensor_count; x++)
830 {
831 const float v = g->data.f32[x] / b->data.f32[x];
832 hb->data.f32[x] = -c->data.f32[x] * v;
833 }
834 } else {
835 for (x = 0; x < tensor_count; x++)
836 {
837 const float v = g->data.f32[x] / b->data.f32[x];
838 ha->data.f32[x] = v;
839 hb->data.f32[x] = -c->data.f32[x] * v;
840 }
841 }
842 return CCV_NNC_EXEC_SUCCESS;
843 }
844 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 844, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
845 ccv_nnc_tensor_view_get_stride(g, gstride);
846 ccv_nnc_tensor_view_get_stride(b, bstride);
847 ccv_nnc_tensor_view_get_stride(c, cstride);
848 ccv_nnc_tensor_view_get_stride(hb, hbstride);
849 int i[CCV_NNC_MAX_DIM(2) + 2];
850 float* const gp = g->data.f32;
851 float* const bp = b->data.f32;
852 float* const cp = c->data.f32;
853 float* const hbp = hb->data.f32;
854 const int count = dim[2] * dim[3];
855 if (ha == 0)
856 {
857 if (gstride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && hbstride[2] == dim[3])
858 {
859 // Special casing if the ainc[3] is the same as dim[3]
860 for (i[0] = 0; i[0] < dim[0]; i[0]++)
861 {
862 float* gp0 = gp + i[0] * gstride[0];
863 float* bp0 = bp + i[0] * bstride[0];
864 float* cp0 = cp + i[0] * cstride[0];
865 float* hbp0 = hbp + i[0] * hbstride[0];
866 for (i[1] = 0; i[1] < dim[1]; i[1]++)
867 {
868 for (x = 0; x < count; x++)
869 {
870 const float v = gp0[x] / bp0[x];
871 hbp0[x] = -cp0[x] * v;
872 }
873 gp0 += gstride[1];
874 bp0 += bstride[1];
875 cp0 += cstride[1];
876 hbp0 += hbstride[1];
877 }
878 }
879 return CCV_NNC_EXEC_SUCCESS;
880 }
881 // Non-optimal case, need to do skip copy.
882 for (i[0] = 0; i[0] < dim[0]; i[0]++)
883 {
884 float* const gp0 = gp + i[0] * gstride[0];
885 float* const bp0 = bp + i[0] * bstride[0];
886 float* const cp0 = cp + i[0] * cstride[0];
887 float* const hbp0 = hbp + i[0] * hbstride[0];
888 for (i[1] = 0; i[1] < dim[1]; i[1]++)
889 {
890 float* gp1 = gp0 + i[1] * gstride[1];
891 float* bp1 = bp0 + i[1] * bstride[1];
892 float* cp1 = cp0 + i[1] * cstride[1];
893 float* hbp1 = hbp0 + i[1] * hbstride[1];
894 for (i[2] = 0; i[2] < dim[2]; i[2]++)
895 {
896 for (x = 0; x < dim[3]; x++)
897 {
898 const float v = gp1[x] / bp1[x];
899 hbp1[x] = -cp1[x] * v;
900 }
901 gp1 += gstride[2];
902 bp1 += bstride[2];
903 cp1 += cstride[2];
904 hbp1 += hbstride[2];
905 }
906 }
907 }
908 } else {
909 ccv_nnc_tensor_view_get_stride(ha, hastride);
910 float* const hap = ha->data.f32;
911 if (gstride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && hastride[2] == dim[3] && hbstride[2] == dim[3])
912 {
913 // Special casing if the ainc[3] is the same as dim[3]
914 for (i[0] = 0; i[0] < dim[0]; i[0]++)
915 {
916 float* gp0 = gp + i[0] * gstride[0];
917 float* bp0 = bp + i[0] * bstride[0];
918 float* cp0 = cp + i[0] * cstride[0];
919 float* hap0 = hap + i[0] * hastride[0];
920 float* hbp0 = hbp + i[0] * hbstride[0];
921 for (i[1] = 0; i[1] < dim[1]; i[1]++)
922 {
923 for (x = 0; x < count; x++)
924 {
925 const float v = gp0[x] / bp0[x];
926 hap0[x] = v;
927 hbp0[x] = -cp0[x] * v;
928 }
929 gp0 += gstride[1];
930 bp0 += bstride[1];
931 cp0 += cstride[1];
932 hap0 += hastride[1];
933 hbp0 += hbstride[1];
934 }
935 }
936 return CCV_NNC_EXEC_SUCCESS;
937 }
938 // Non-optimal case, need to do skip copy.
939 for (i[0] = 0; i[0] < dim[0]; i[0]++)
940 {
941 float* const gp0 = gp + i[0] * gstride[0];
942 float* const bp0 = bp + i[0] * bstride[0];
943 float* const cp0 = cp + i[0] * cstride[0];
944 float* const hap0 = hap + i[0] * hastride[0];
945 float* const hbp0 = hbp + i[0] * hbstride[0];
946 for (i[1] = 0; i[1] < dim[1]; i[1]++)
947 {
948 float* gp1 = gp0 + i[1] * gstride[1];
949 float* bp1 = bp0 + i[1] * bstride[1];
950 float* cp1 = cp0 + i[1] * cstride[1];
951 float* hap1 = hap0 + i[1] * hastride[1];
952 float* hbp1 = hbp0 + i[1] * hbstride[1];
953 for (i[2] = 0; i[2] < dim[2]; i[2]++)
954 {
955 for (x = 0; x < dim[3]; x++)
956 {
957 const float v = gp1[x] / bp1[x];
958 hap1[x] = v;
959 hbp1[x] = -cp1[x] * v;
960 }
961 gp1 += gstride[2];
962 bp1 += bstride[2];
963 cp1 += cstride[2];
964 hap1 += hastride[2];
965 hbp1 += hbstride[2];
966 }
967 }
968 }
969 }
970 }
971 return CCV_NNC_EXEC_SUCCESS;
972}
973
974static int _ccv_nnc_ewexp_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
975{
976 // Assuming this is float 32.
977 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
978 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
979 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
980 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
981 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
982 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 982, __extension__ __PRETTY_FUNCTION__
); }))
;
983 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 983, __extension__ __PRETTY_FUNCTION__
); }))
;
984 ccv_nnc_tensor_view_get_dim(a, dim);
985 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 985, __extension__ __PRETTY_FUNCTION__
); }))
;
986 int x;
987 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
988 {
989 // Super optimal case, just do one for-loop for sum.
990 const int tensor_count = ccv_nnc_tensor_count(a->info);
991 for (x = 0; x < tensor_count; x++)
992 b->data.f32[x] = exp(a->data.f32[x]);
993 return CCV_NNC_EXEC_SUCCESS;
994 }
995 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 995, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
996 ccv_nnc_tensor_view_get_stride(a, astride);
997 ccv_nnc_tensor_view_get_stride(b, bstride);
998 int i[CCV_NNC_MAX_DIM(2) + 2];
999 float* const ap = a->data.f32;
1000 float* const bp = b->data.f32;
1001 const int count = dim[2] * dim[3];
1002 if (astride[2] == dim[3] && bstride[2] == dim[3])
1003 {
1004 // Special casing if the ainc[3] is the same as dim[3]
1005 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1006 {
1007 float* ap0 = ap + i[0] * astride[0];
1008 float* bp0 = bp + i[0] * bstride[0];
1009 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1010 {
1011 for (x = 0; x < count; x++)
1012 bp0[x] = exp(ap0[x]);
1013 ap0 += astride[1];
1014 bp0 += bstride[1];
1015 }
1016 }
1017 return CCV_NNC_EXEC_SUCCESS;
1018 }
1019 // Non-optimal case, need to do skip copy.
1020 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1021 {
1022 float* const ap0 = ap + i[0] * astride[0];
1023 float* const bp0 = bp + i[0] * bstride[0];
1024 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1025 {
1026 float* ap1 = ap0 + i[1] * astride[1];
1027 float* bp1 = bp0 + i[1] * bstride[1];
1028 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1029 {
1030 for (x = 0; x < dim[3]; x++)
1031 bp1[x] = exp(ap1[x]);
1032 ap1 += astride[2];
1033 bp1 += bstride[2];
1034 }
1035 }
1036 }
1037 return CCV_NNC_EXEC_SUCCESS;
1038}
1039
1040static int _ccv_nnc_ewexp_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1041{
1042 // D[Exp[x], x] = Exp[x]
1043 if (inputs[0] == 0)
1044 _ccv_nnc_tensor_transfer_cpu_ref_f32((ccv_nnc_tensor_view_t*)inputs[2], (ccv_nnc_tensor_view_t*)outputs[0]);
1045 else
1046 _ccv_nnc_ewprod_forw_cpu_ref((ccv_nnc_tensor_view_t*[]){
1047 (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[2]
1048 }, 2, (ccv_nnc_tensor_view_t**)outputs, output_size);
1049 return CCV_NNC_EXEC_SUCCESS;
1050}
1051
1052static void _ccv_nnc_ewpow_forw_cpu_ref(ccv_nnc_tensor_view_t* const a, const float exp, ccv_nnc_tensor_view_t* const c)
1053{
1054 // Assuming this is float 32.
1055 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1056 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1057 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1058 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1058, __extension__ __PRETTY_FUNCTION__
); }))
;
1059 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1059, __extension__ __PRETTY_FUNCTION__
); }))
;
1060 ccv_nnc_tensor_view_get_dim(a, dim);
1061 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1061, __extension__ __PRETTY_FUNCTION__
); }))
;
1062 int x;
1063 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
1064 {
1065 const int tensor_count = ccv_nnc_tensor_count(a->info);
1066 for (x = 0; x < tensor_count; x++)
1067 c->data.f32[x] = powf(a->data.f32[x], exp);
1068 return;
1069 }
1070 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1070, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1071 ccv_nnc_tensor_view_get_stride(a, astride);
1072 ccv_nnc_tensor_view_get_stride(c, cstride);
1073 int i[CCV_NNC_MAX_DIM(2) + 2];
1074 float* const ap = a->data.f32;
1075 float* const cp = c->data.f32;
1076 const int count = dim[2] * dim[3];
1077 if (astride[2] == dim[3] && cstride[2] == dim[3])
1078 {
1079 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1080 {
1081 float* ap0 = ap + i[0] * astride[0];
1082 float* cp0 = cp + i[0] * cstride[0];
1083 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1084 {
1085 for (x = 0; x < count; x++)
1086 cp0[x] = powf(ap0[x], exp);
1087 ap0 += astride[1];
1088 cp0 += cstride[1];
1089 }
1090 }
1091 return;
1092 }
1093 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1094 {
1095 float* const ap0 = ap + i[0] * astride[0];
1096 float* const cp0 = cp + i[0] * cstride[0];
1097 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1098 {
1099 float* ap1 = ap0 + i[1] * astride[1];
1100 float* cp1 = cp0 + i[1] * cstride[1];
1101 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1102 {
1103 for (x = 0; x < dim[3]; x++)
1104 cp1[x] = powf(ap1[x], exp);
1105 ap1 += astride[2];
1106 cp1 += cstride[2];
1107 }
1108 }
1109 }
1110}
1111
1112static int _ccv_nnc_ewpow_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1113{
1114 _ccv_nnc_ewpow_forw_cpu_ref((ccv_nnc_tensor_view_t*)inputs[0], cmd.info.pow.exponent, (ccv_nnc_tensor_view_t*)outputs[0]);
1115 return CCV_NNC_EXEC_SUCCESS;
1116}
1117
1118static void _ccv_nnc_ewpow_back_da_cpu_ref(ccv_nnc_tensor_view_t* const g, ccv_nnc_tensor_view_t* const a, const float exp, ccv_nnc_tensor_view_t* const h)
1119{
1120 // D[pow(a, exp), a] = exp * pow(a, exp - 1)
1121 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1122 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1123 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1124 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1125 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1125, __extension__ __PRETTY_FUNCTION__
); }))
;
1126 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1126, __extension__ __PRETTY_FUNCTION__
); }))
;
1127 ccv_nnc_tensor_view_get_dim(a, dim);
1128 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1128, __extension__ __PRETTY_FUNCTION__
); }))
;
1129 if (g)
1130 {
1131 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1131, __extension__ __PRETTY_FUNCTION__
); }))
;
1132 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1132, __extension__ __PRETTY_FUNCTION__
); }))
;
1133 }
1134 int x;
1135 if ((!g || !CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
1136 {
1137 const int tensor_count = ccv_nnc_tensor_count(a->info);
1138 if (g)
1139 {
1140 for (x = 0; x < tensor_count; x++)
1141 h->data.f32[x] = g->data.f32[x] * exp * powf(a->data.f32[x], exp - 1);
1142 } else {
1143 for (x = 0; x < tensor_count; x++)
1144 h->data.f32[x] = exp * powf(a->data.f32[x], exp - 1);
1145 }
1146 return;
1147 }
1148 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1148, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1149 if (g)
1150 ccv_nnc_tensor_view_get_stride(g, gstride);
1151 ccv_nnc_tensor_view_get_stride(a, astride);
1152 ccv_nnc_tensor_view_get_stride(h, hstride);
1153 int i[CCV_NNC_MAX_DIM(2) + 2];
1154 float* const gp = g ? g->data.f32 : 0;
1155 float* const ap = a->data.f32;
1156 float* const hp = h->data.f32;
1157 const int count = dim[2] * dim[3];
1158 if ((!g || gstride[2] == dim[3]) && astride[2] == dim[3] && hstride[2] == dim[3])
1159 {
1160 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1161 {
1162 float* gp0 = g ? gp + i[0] * gstride[0] : 0;
1163 float* ap0 = ap + i[0] * astride[0];
1164 float* hp0 = hp + i[0] * hstride[0];
1165 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1166 {
1167 if (g)
1168 {
1169 for (x = 0; x < count; x++)
1170 hp0[x] = gp0[x] * exp * powf(ap0[x], exp - 1);
1171 gp0 += gstride[1];
1172 } else {
1173 for (x = 0; x < count; x++)
1174 hp0[x] = exp * powf(ap0[x], exp - 1);
1175 }
1176 ap0 += astride[1];
1177 hp0 += hstride[1];
1178 }
1179 }
1180 return;
1181 }
1182 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1183 {
1184 float* const gp0 = g ? gp + i[0] * gstride[0] : 0;
1185 float* const ap0 = ap + i[0] * astride[0];
1186 float* const hp0 = hp + i[0] * hstride[0];
1187 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1188 {
1189 float* gp1 = g ? gp0 + i[1] * gstride[1] : 0;
1190 float* ap1 = ap0 + i[1] * astride[1];
1191 float* hp1 = hp0 + i[1] * hstride[1];
1192 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1193 {
1194 if (g)
1195 {
1196 for (x = 0; x < dim[3]; x++)
1197 hp1[x] = gp1[x] * exp * powf(ap1[x], exp - 1);
1198 gp1 += gstride[2];
1199 } else {
1200 for (x = 0; x < dim[3]; x++)
1201 hp1[x] = exp * powf(ap1[x], exp - 1);
1202 }
1203 ap1 += astride[2];
1204 hp1 += hstride[2];
1205 }
1206 }
1207 }
1208}
1209
1210static int _ccv_nnc_ewpow_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1211{
1212 ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
1213 ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
1214 if (output_size > 0 && outputs[0])
1215 _ccv_nnc_ewpow_back_da_cpu_ref(g, a, cmd.info.pow.exponent, (ccv_nnc_tensor_view_t*)outputs[0]);
1216 return CCV_NNC_EXEC_SUCCESS;
1217}
1218
1219static int _ccv_nnc_ewlog_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1220{
1221 // Assuming this is float 32.
1222 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1223 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1224 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1225 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1226 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1227 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1227, __extension__ __PRETTY_FUNCTION__
); }))
;
1228 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1228, __extension__ __PRETTY_FUNCTION__
); }))
;
1229 ccv_nnc_tensor_view_get_dim(a, dim);
1230 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1230, __extension__ __PRETTY_FUNCTION__
); }))
;
1231 int x;
1232 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1233 {
1234 // Super optimal case, just do one for-loop for sum.
1235 const int tensor_count = ccv_nnc_tensor_count(a->info);
1236 for (x = 0; x < tensor_count; x++)
1237 b->data.f32[x] = log(a->data.f32[x]);
1238 return CCV_NNC_EXEC_SUCCESS;
1239 }
1240 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1240, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1241 ccv_nnc_tensor_view_get_stride(a, astride);
1242 ccv_nnc_tensor_view_get_stride(b, bstride);
1243 int i[CCV_NNC_MAX_DIM(2) + 2];
1244 float* const ap = a->data.f32;
1245 float* const bp = b->data.f32;
1246 const int count = dim[2] * dim[3];
1247 if (astride[2] == dim[3] && bstride[2] == dim[3])
1248 {
1249 // Special casing if the ainc[3] is the same as dim[3]
1250 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1251 {
1252 float* ap0 = ap + i[0] * astride[0];
1253 float* bp0 = bp + i[0] * bstride[0];
1254 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1255 {
1256 for (x = 0; x < count; x++)
1257 bp0[x] = log(ap0[x]);
1258 ap0 += astride[1];
1259 bp0 += bstride[1];
1260 }
1261 }
1262 return CCV_NNC_EXEC_SUCCESS;
1263 }
1264 // Non-optimal case, need to do skip copy.
1265 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1266 {
1267 float* const ap0 = ap + i[0] * astride[0];
1268 float* const bp0 = bp + i[0] * bstride[0];
1269 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1270 {
1271 float* ap1 = ap0 + i[1] * astride[1];
1272 float* bp1 = bp0 + i[1] * bstride[1];
1273 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1274 {
1275 for (x = 0; x < dim[3]; x++)
1276 bp1[x] = log(ap1[x]);
1277 ap1 += astride[2];
1278 bp1 += bstride[2];
1279 }
1280 }
1281 }
1282 return CCV_NNC_EXEC_SUCCESS;
1283}
1284
1285static int _ccv_nnc_ewlog_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1286{
1287 // D[Log[x], x] = 1 / x
1288 _ccv_nnc_ewdiv_forw_cpu_ref(1, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[1], (ccv_nnc_tensor_view_t*)outputs[0]);
1289 return CCV_NNC_EXEC_SUCCESS;
1290}
1291
1292static int _ccv_nnc_ewsqrt_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1293{
1294 // Assuming this is float 32.
1295 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1296 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1297 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1298 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1299 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1300 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1300, __extension__ __PRETTY_FUNCTION__
); }))
;
1301 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1301, __extension__ __PRETTY_FUNCTION__
); }))
;
1302 ccv_nnc_tensor_view_get_dim(a, dim);
1303 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1303, __extension__ __PRETTY_FUNCTION__
); }))
;
1304 int x;
1305 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1306 {
1307 // Super optimal case, just do one for-loop for sum.
1308 const int tensor_count = ccv_nnc_tensor_count(a->info);
1309 for (x = 0; x < tensor_count; x++)
1310 b->data.f32[x] = sqrt(a->data.f32[x]);
1311 return CCV_NNC_EXEC_SUCCESS;
1312 }
1313 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1313, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1314 ccv_nnc_tensor_view_get_stride(a, astride);
1315 ccv_nnc_tensor_view_get_stride(b, bstride);
1316 int i[CCV_NNC_MAX_DIM(2) + 2];
1317 float* const ap = a->data.f32;
1318 float* const bp = b->data.f32;
1319 const int count = dim[2] * dim[3];
1320 if (astride[2] == dim[3] && bstride[2] == dim[3])
1321 {
1322 // Special casing if the ainc[3] is the same as dim[3]
1323 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1324 {
1325 float* ap0 = ap + i[0] * astride[0];
1326 float* bp0 = bp + i[0] * bstride[0];
1327 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1328 {
1329 for (x = 0; x < count; x++)
1330 bp0[x] = sqrt(ap0[x]);
1331 ap0 += astride[1];
1332 bp0 += bstride[1];
1333 }
1334 }
1335 return CCV_NNC_EXEC_SUCCESS;
1336 }
1337 // Non-optimal case, need to do skip copy.
1338 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1339 {
1340 float* const ap0 = ap + i[0] * astride[0];
1341 float* const bp0 = bp + i[0] * bstride[0];
1342 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1343 {
1344 float* ap1 = ap0 + i[1] * astride[1];
1345 float* bp1 = bp0 + i[1] * bstride[1];
1346 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1347 {
1348 for (x = 0; x < dim[3]; x++)
1349 bp1[x] = sqrt(ap1[x]);
1350 ap1 += astride[2];
1351 bp1 += bstride[2];
1352 }
1353 }
1354 }
1355 return CCV_NNC_EXEC_SUCCESS;
1356}
1357
1358static int _ccv_nnc_ewsqrt_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1359{
1360 // D[Sqrt[x], x] = 0.5 / Sqrt[x]
1361 _ccv_nnc_ewdiv_forw_cpu_ref(0.5, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[2], (ccv_nnc_tensor_view_t*)outputs[0]);
1362 return CCV_NNC_EXEC_SUCCESS;
1363}
1364
1365static int _ccv_nnc_ewsin_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1366{
1367 // Assuming this is float 32.
1368 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1369 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1370 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1371 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1372 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1373 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1373, __extension__ __PRETTY_FUNCTION__
); }))
;
1374 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1374, __extension__ __PRETTY_FUNCTION__
); }))
;
1375 ccv_nnc_tensor_view_get_dim(a, dim);
1376 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1376, __extension__ __PRETTY_FUNCTION__
); }))
;
1377 int x;
1378 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1379 {
1380 const int tensor_count = ccv_nnc_tensor_count(a->info);
1381 for (x = 0; x < tensor_count; x++)
1382 b->data.f32[x] = sinf(a->data.f32[x]);
1383 return CCV_NNC_EXEC_SUCCESS;
1384 }
1385 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1385, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1386 ccv_nnc_tensor_view_get_stride(a, astride);
1387 ccv_nnc_tensor_view_get_stride(b, bstride);
1388 int i[CCV_NNC_MAX_DIM(2) + 2];
1389 float* const ap = a->data.f32;
1390 float* const bp = b->data.f32;
1391 const int count = dim[2] * dim[3];
1392 if (astride[2] == dim[3] && bstride[2] == dim[3])
1393 {
1394 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1395 {
1396 float* ap0 = ap + i[0] * astride[0];
1397 float* bp0 = bp + i[0] * bstride[0];
1398 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1399 {
1400 for (x = 0; x < count; x++)
1401 bp0[x] = sinf(ap0[x]);
1402 ap0 += astride[1];
1403 bp0 += bstride[1];
1404 }
1405 }
1406 return CCV_NNC_EXEC_SUCCESS;
1407 }
1408 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1409 {
1410 float* const ap0 = ap + i[0] * astride[0];
1411 float* const bp0 = bp + i[0] * bstride[0];
1412 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1413 {
1414 float* ap1 = ap0 + i[1] * astride[1];
1415 float* bp1 = bp0 + i[1] * bstride[1];
1416 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1417 {
1418 for (x = 0; x < dim[3]; x++)
1419 bp1[x] = sinf(ap1[x]);
1420 ap1 += astride[2];
1421 bp1 += bstride[2];
1422 }
1423 }
1424 }
1425 return CCV_NNC_EXEC_SUCCESS;
1426}
1427
1428static int _ccv_nnc_ewsin_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1429{
1430 // D[Sin[x], x] = Cos[x]
1431 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1432 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1433 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1434 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1435 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
1436 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[1];
1437 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
1438 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1438, __extension__ __PRETTY_FUNCTION__
); }))
;
1439 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1439, __extension__ __PRETTY_FUNCTION__
); }))
;
1440 ccv_nnc_tensor_view_get_dim(a, dim);
1441 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1441, __extension__ __PRETTY_FUNCTION__
); }))
;
1442 if (g)
1443 {
1444 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1444, __extension__ __PRETTY_FUNCTION__
); }))
;
1445 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1445, __extension__ __PRETTY_FUNCTION__
); }))
;
1446 }
1447 int x;
1448 if ((!g || !CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
1449 {
1450 const int tensor_count = ccv_nnc_tensor_count(a->info);
1451 if (g)
1452 {
1453 for (x = 0; x < tensor_count; x++)
1454 h->data.f32[x] = g->data.f32[x] * cosf(a->data.f32[x]);
1455 } else {
1456 for (x = 0; x < tensor_count; x++)
1457 h->data.f32[x] = cosf(a->data.f32[x]);
1458 }
1459 return CCV_NNC_EXEC_SUCCESS;
1460 }
1461 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1461, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1462 if (g)
1463 ccv_nnc_tensor_view_get_stride(g, gstride);
1464 ccv_nnc_tensor_view_get_stride(a, astride);
1465 ccv_nnc_tensor_view_get_stride(h, hstride);
1466 int i[CCV_NNC_MAX_DIM(2) + 2];
1467 float* const gp = g ? g->data.f32 : 0;
1468 float* const ap = a->data.f32;
1469 float* const hp = h->data.f32;
1470 const int count = dim[2] * dim[3];
1471 if ((!g || gstride[2] == dim[3]) && astride[2] == dim[3] && hstride[2] == dim[3])
1472 {
1473 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1474 {
1475 float* gp0 = g ? gp + i[0] * gstride[0] : 0;
1476 float* ap0 = ap + i[0] * astride[0];
1477 float* hp0 = hp + i[0] * hstride[0];
1478 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1479 {
1480 if (g)
1481 {
1482 for (x = 0; x < count; x++)
1483 hp0[x] = gp0[x] * cosf(ap0[x]);
1484 gp0 += gstride[1];
1485 } else {
1486 for (x = 0; x < count; x++)
1487 hp0[x] = cosf(ap0[x]);
1488 }
1489 ap0 += astride[1];
1490 hp0 += hstride[1];
1491 }
1492 }
1493 return CCV_NNC_EXEC_SUCCESS;
1494 }
1495 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1496 {
1497 float* const gp0 = g ? gp + i[0] * gstride[0] : 0;
1498 float* const ap0 = ap + i[0] * astride[0];
1499 float* const hp0 = hp + i[0] * hstride[0];
1500 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1501 {
1502 float* gp1 = g ? gp0 + i[1] * gstride[1] : 0;
1503 float* ap1 = ap0 + i[1] * astride[1];
1504 float* hp1 = hp0 + i[1] * hstride[1];
1505 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1506 {
1507 if (g)
1508 {
1509 for (x = 0; x < dim[3]; x++)
1510 hp1[x] = gp1[x] * cosf(ap1[x]);
1511 gp1 += gstride[2];
1512 } else {
1513 for (x = 0; x < dim[3]; x++)
1514 hp1[x] = cosf(ap1[x]);
1515 }
1516 ap1 += astride[2];
1517 hp1 += hstride[2];
1518 }
1519 }
1520 }
1521 return CCV_NNC_EXEC_SUCCESS;
1522}
1523
1524static int _ccv_nnc_ewcos_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1525{
1526 // Assuming this is float 32.
1527 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1528 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1529 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1530 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1531 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1532 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1532, __extension__ __PRETTY_FUNCTION__
); }))
;
1533 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1533, __extension__ __PRETTY_FUNCTION__
); }))
;
1534 ccv_nnc_tensor_view_get_dim(a, dim);
1535 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1535, __extension__ __PRETTY_FUNCTION__
); }))
;
1536 int x;
1537 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1538 {
1539 const int tensor_count = ccv_nnc_tensor_count(a->info);
1540 for (x = 0; x < tensor_count; x++)
1541 b->data.f32[x] = cosf(a->data.f32[x]);
1542 return CCV_NNC_EXEC_SUCCESS;
1543 }
1544 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1544, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1545 ccv_nnc_tensor_view_get_stride(a, astride);
1546 ccv_nnc_tensor_view_get_stride(b, bstride);
1547 int i[CCV_NNC_MAX_DIM(2) + 2];
1548 float* const ap = a->data.f32;
1549 float* const bp = b->data.f32;
1550 const int count = dim[2] * dim[3];
1551 if (astride[2] == dim[3] && bstride[2] == dim[3])
1552 {
1553 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1554 {
1555 float* ap0 = ap + i[0] * astride[0];
1556 float* bp0 = bp + i[0] * bstride[0];
1557 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1558 {
1559 for (x = 0; x < count; x++)
1560 bp0[x] = cosf(ap0[x]);
1561 ap0 += astride[1];
1562 bp0 += bstride[1];
1563 }
1564 }
1565 return CCV_NNC_EXEC_SUCCESS;
1566 }
1567 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1568 {
1569 float* const ap0 = ap + i[0] * astride[0];
1570 float* const bp0 = bp + i[0] * bstride[0];
1571 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1572 {
1573 float* ap1 = ap0 + i[1] * astride[1];
1574 float* bp1 = bp0 + i[1] * bstride[1];
1575 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1576 {
1577 for (x = 0; x < dim[3]; x++)
1578 bp1[x] = cosf(ap1[x]);
1579 ap1 += astride[2];
1580 bp1 += bstride[2];
1581 }
1582 }
1583 }
1584 return CCV_NNC_EXEC_SUCCESS;
1585}
1586
1587static int _ccv_nnc_ewcos_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1588{
1589 // D[Cos[x], x] = -Sin[x]
1590 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1591 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1592 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1593 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1594 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
1595 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[1];
1596 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
1597 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1597, __extension__ __PRETTY_FUNCTION__
); }))
;
1598 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1598, __extension__ __PRETTY_FUNCTION__
); }))
;
1599 ccv_nnc_tensor_view_get_dim(a, dim);
1600 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1600, __extension__ __PRETTY_FUNCTION__
); }))
;
1601 if (g)
1602 {
1603 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1603, __extension__ __PRETTY_FUNCTION__
); }))
;
1604 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1604, __extension__ __PRETTY_FUNCTION__
); }))
;
1605 }
1606 int x;
1607 if ((!g || !CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
1608 {
1609 const int tensor_count = ccv_nnc_tensor_count(a->info);
1610 if (g)
1611 {
1612 for (x = 0; x < tensor_count; x++)
1613 h->data.f32[x] = -g->data.f32[x] * sinf(a->data.f32[x]);
1614 } else {
1615 for (x = 0; x < tensor_count; x++)
1616 h->data.f32[x] = -sinf(a->data.f32[x]);
1617 }
1618 return CCV_NNC_EXEC_SUCCESS;
1619 }
1620 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1620, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1621 if (g)
1622 ccv_nnc_tensor_view_get_stride(g, gstride);
1623 ccv_nnc_tensor_view_get_stride(a, astride);
1624 ccv_nnc_tensor_view_get_stride(h, hstride);
1625 int i[CCV_NNC_MAX_DIM(2) + 2];
1626 float* const gp = g ? g->data.f32 : 0;
1627 float* const ap = a->data.f32;
1628 float* const hp = h->data.f32;
1629 const int count = dim[2] * dim[3];
1630 if ((!g || gstride[2] == dim[3]) && astride[2] == dim[3] && hstride[2] == dim[3])
1631 {
1632 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1633 {
1634 float* gp0 = g ? gp + i[0] * gstride[0] : 0;
1635 float* ap0 = ap + i[0] * astride[0];
1636 float* hp0 = hp + i[0] * hstride[0];
1637 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1638 {
1639 if (g)
1640 {
1641 for (x = 0; x < count; x++)
1642 hp0[x] = -gp0[x] * sinf(ap0[x]);
1643 gp0 += gstride[1];
1644 } else {
1645 for (x = 0; x < count; x++)
1646 hp0[x] = -sinf(ap0[x]);
1647 }
1648 ap0 += astride[1];
1649 hp0 += hstride[1];
1650 }
1651 }
1652 return CCV_NNC_EXEC_SUCCESS;
1653 }
1654 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1655 {
1656 float* const gp0 = g ? gp + i[0] * gstride[0] : 0;
1657 float* const ap0 = ap + i[0] * astride[0];
1658 float* const hp0 = hp + i[0] * hstride[0];
1659 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1660 {
1661 float* gp1 = g ? gp0 + i[1] * gstride[1] : 0;
1662 float* ap1 = ap0 + i[1] * astride[1];
1663 float* hp1 = hp0 + i[1] * hstride[1];
1664 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1665 {
1666 if (g)
1667 {
1668 for (x = 0; x < dim[3]; x++)
1669 hp1[x] = -gp1[x] * sinf(ap1[x]);
1670 gp1 += gstride[2];
1671 } else {
1672 for (x = 0; x < dim[3]; x++)
1673 hp1[x] = -sinf(ap1[x]);
1674 }
1675 ap1 += astride[2];
1676 hp1 += hstride[2];
1677 }
1678 }
1679 }
1680 return CCV_NNC_EXEC_SUCCESS;
1681}
1682
1683static int _ccv_nnc_ewabs_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1684{
1685 // Assuming this is float 32.
1686 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1687 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1688 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1689 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1690 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1691 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1691, __extension__ __PRETTY_FUNCTION__
); }))
;
1692 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1692, __extension__ __PRETTY_FUNCTION__
); }))
;
1693 ccv_nnc_tensor_view_get_dim(a, dim);
1694 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1694, __extension__ __PRETTY_FUNCTION__
); }))
;
1695 int x;
1696 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1697 {
1698 // Super optimal case, just do one for-loop for sum.
1699 const int tensor_count = ccv_nnc_tensor_count(a->info);
1700 for (x = 0; x < tensor_count; x++)
1701 b->data.f32[x] = fabs(a->data.f32[x]);
1702 return CCV_NNC_EXEC_SUCCESS;
1703 }
1704 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1704, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1705 ccv_nnc_tensor_view_get_stride(a, astride);
1706 ccv_nnc_tensor_view_get_stride(b, bstride);
1707 int i[CCV_NNC_MAX_DIM(2) + 2];
1708 float* const ap = a->data.f32;
1709 float* const bp = b->data.f32;
1710 const int count = dim[2] * dim[3];
1711 if (astride[2] == dim[3] && bstride[2] == dim[3])
1712 {
1713 // Special casing if the ainc[3] is the same as dim[3]
1714 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1715 {
1716 float* ap0 = ap + i[0] * astride[0];
1717 float* bp0 = bp + i[0] * bstride[0];
1718 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1719 {
1720 for (x = 0; x < count; x++)
1721 bp0[x] = fabs(ap0[x]);
1722 ap0 += astride[1];
1723 bp0 += bstride[1];
1724 }
1725 }
1726 return CCV_NNC_EXEC_SUCCESS;
1727 }
1728 // Non-optimal case, need to do skip copy.
1729 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1730 {
1731 float* const ap0 = ap + i[0] * astride[0];
1732 float* const bp0 = bp + i[0] * bstride[0];
1733 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1734 {
1735 float* ap1 = ap0 + i[1] * astride[1];
1736 float* bp1 = bp0 + i[1] * bstride[1];
1737 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1738 {
1739 for (x = 0; x < dim[3]; x++)
1740 bp1[x] = fabs(ap1[x]);
1741 ap1 += astride[2];
1742 bp1 += bstride[2];
1743 }
1744 }
1745 }
1746 return CCV_NNC_EXEC_SUCCESS;
1747}
1748
1749static int _ccv_nnc_ewabs_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1750{
1751 // Assuming this is float 32.
1752 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1753 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1754 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1755 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1756 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
1757 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[1];
1758 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1759 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1759, __extension__ __PRETTY_FUNCTION__
); }))
;
1
Assuming the condition is true
2
Taking true branch
1760 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1760, __extension__ __PRETTY_FUNCTION__
); }))
;
3
Assuming the condition is true
4
Taking true branch
1761 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1761, __extension__ __PRETTY_FUNCTION__
); }))
;
5
Assuming the condition is true
6
Taking true branch
1762 ccv_nnc_tensor_view_get_dim(a, dim);
1763 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1763, __extension__ __PRETTY_FUNCTION__
); }))
;
7
Assuming the condition is true
8
Taking true branch
1764 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1764, __extension__ __PRETTY_FUNCTION__
); }))
;
9
Assuming the condition is true
10
Taking true branch
1765 int x;
1766 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW))
11
Assuming the condition is false
1767 {
1768 // Super optimal case, just do one for-loop for sum.
1769 const int tensor_count = ccv_nnc_tensor_count(a->info);
1770 for (x = 0; x < tensor_count; x++)
1771 b->data.f32[x] = a->data.f32[x] >= 0 ? g->data.f32[x] : -g->data.f32[x];
1772 return CCV_NNC_EXEC_SUCCESS;
1773 }
1774 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1774, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
12
Taking true branch
1775 ccv_nnc_tensor_view_get_stride(g, astride);
1776 ccv_nnc_tensor_view_get_stride(a, astride);
1777 ccv_nnc_tensor_view_get_stride(b, bstride);
1778 int i[CCV_NNC_MAX_DIM(2) + 2];
1779 float* const gp = g->data.f32;
1780 float* const ap = a->data.f32;
1781 float* const bp = b->data.f32;
1782 const int count = dim[2] * dim[3];
1783 if (astride[2] == dim[3] && bstride[2] == dim[3])
13
Assuming the condition is false
1784 {
1785 // Special casing if the ainc[3] is the same as dim[3]
1786 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1787 {
1788 float* gp0 = gp + i[0] * gstride[0];
1789 float* ap0 = ap + i[0] * astride[0];
1790 float* bp0 = bp + i[0] * bstride[0];
1791 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1792 {
1793 for (x = 0; x < count; x++)
1794 bp0[x] = ap0[x] >= 0 ? gp0[x] : -gp0[x];
1795 gp0 += gstride[1];
1796 ap0 += astride[1];
1797 bp0 += bstride[1];
1798 }
1799 }
1800 return CCV_NNC_EXEC_SUCCESS;
1801 }
1802 // Non-optimal case, need to do skip copy.
1803 for (i[0] = 0; i[0] < dim[0]; i[0]++)
14
Assuming the condition is true
15
Loop condition is true. Entering loop body
1804 {
1805 float* const gp0 = gp + i[0] * gstride[0];
16
The right operand of '*' is a garbage value
1806 float* const ap0 = ap + i[0] * astride[0];
1807 float* const bp0 = bp + i[0] * bstride[0];
1808 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1809 {
1810 float* gp1 = gp0 + i[1] * gstride[1];
1811 float* ap1 = ap0 + i[1] * astride[1];
1812 float* bp1 = bp0 + i[1] * bstride[1];
1813 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1814 {
1815 for (x = 0; x < dim[3]; x++)
1816 bp1[x] = ap1[x] >= 0 ? gp1[x] : -gp1[x];
1817 gp1 += gstride[2];
1818 ap1 += astride[2];
1819 bp1 += bstride[2];
1820 }
1821 }
1822 }
1823 return CCV_NNC_EXEC_SUCCESS;
1824}
1825
1826static int _ccv_nnc_clamp_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1827{
1828 // Assuming this is float 32.
1829 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1830 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1831 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1832 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1833 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1834 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1834, __extension__ __PRETTY_FUNCTION__
); }))
;
1835 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1835, __extension__ __PRETTY_FUNCTION__
); }))
;
1836 ccv_nnc_tensor_view_get_dim(a, dim);
1837 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1837, __extension__ __PRETTY_FUNCTION__
); }))
;
1838 int x;
1839 const float min = cmd.info.clamp.min;
1840 const float max = cmd.info.clamp.max;
1841 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1841, __extension__ __PRETTY_FUNCTION__
); }))
;
1842 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1843 {
1844 // Super optimal case, just do one for-loop for sum.
1845 const int tensor_count = ccv_nnc_tensor_count(a->info);
1846 if (isnan(min)__builtin_isnan (min))
1847 {
1848 for (x = 0; x < tensor_count; x++)
1849 b->data.f32[x] = ccv_min(a->data.f32[x], max)({ typeof (a->data.f32[x]) _a = (a->data.f32[x]); typeof
(max) _b = (max); (_a < _b) ? _a : _b; })
;
1850 } else if (isnan(max)__builtin_isnan (max)) {
1851 for (x = 0; x < tensor_count; x++)
1852 b->data.f32[x] = ccv_max(a->data.f32[x], min)({ typeof (a->data.f32[x]) _a = (a->data.f32[x]); typeof
(min) _b = (min); (_a > _b) ? _a : _b; })
;
1853 } else {
1854 for (x = 0; x < tensor_count; x++)
1855 b->data.f32[x] = ccv_clamp(a->data.f32[x], min, max)({ typeof (min) _a = (min); typeof (max) _b = (max); typeof (
a->data.f32[x]) _x = (a->data.f32[x]); (_x < _a) ? _a
: ((_x > _b) ? _b : _x); })
;
1856 }
1857 return CCV_NNC_EXEC_SUCCESS;
1858 }
1859 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1859, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1860 ccv_nnc_tensor_view_get_stride(a, astride);
1861 ccv_nnc_tensor_view_get_stride(b, bstride);
1862 int i[CCV_NNC_MAX_DIM(2) + 2];
1863 float* const ap = a->data.f32;
1864 float* const bp = b->data.f32;
1865 const int count = dim[2] * dim[3];
1866 if (isnan(min)__builtin_isnan (min))
1867 {
1868 if (astride[2] == dim[3] && bstride[2] == dim[3])
1869 {
1870 // Special casing if the ainc[3] is the same as dim[3]
1871 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1872 {
1873 float* ap0 = ap + i[0] * astride[0];
1874 float* bp0 = bp + i[0] * bstride[0];
1875 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1876 {
1877 for (x = 0; x < count; x++)
1878 bp0[x] = ccv_min(ap0[x], max)({ typeof (ap0[x]) _a = (ap0[x]); typeof (max) _b = (max); (_a
< _b) ? _a : _b; })
;
1879 ap0 += astride[1];
1880 bp0 += bstride[1];
1881 }
1882 }
1883 return CCV_NNC_EXEC_SUCCESS;
1884 }
1885 // Non-optimal case, need to do skip copy.
1886 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1887 {
1888 float* const ap0 = ap + i[0] * astride[0];
1889 float* const bp0 = bp + i[0] * bstride[0];
1890 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1891 {
1892 float* ap1 = ap0 + i[1] * astride[1];
1893 float* bp1 = bp0 + i[1] * bstride[1];
1894 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1895 {
1896 for (x = 0; x < dim[3]; x++)
1897 bp1[x] = ccv_min(ap1[x], max)({ typeof (ap1[x]) _a = (ap1[x]); typeof (max) _b = (max); (_a
< _b) ? _a : _b; })
;
1898 ap1 += astride[2];
1899 bp1 += bstride[2];
1900 }
1901 }
1902 }
1903 } else if (isnan(max)__builtin_isnan (max)) {
1904 if (astride[2] == dim[3] && bstride[2] == dim[3])
1905 {
1906 // Special casing if the ainc[3] is the same as dim[3]
1907 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1908 {
1909 float* ap0 = ap + i[0] * astride[0];
1910 float* bp0 = bp + i[0] * bstride[0];
1911 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1912 {
1913 for (x = 0; x < count; x++)
1914 bp0[x] = ccv_max(ap0[x], min)({ typeof (ap0[x]) _a = (ap0[x]); typeof (min) _b = (min); (_a
> _b) ? _a : _b; })
;
1915 ap0 += astride[1];
1916 bp0 += bstride[1];
1917 }
1918 }
1919 return CCV_NNC_EXEC_SUCCESS;
1920 }
1921 // Non-optimal case, need to do skip copy.
1922 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1923 {
1924 float* const ap0 = ap + i[0] * astride[0];
1925 float* const bp0 = bp + i[0] * bstride[0];
1926 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1927 {
1928 float* ap1 = ap0 + i[1] * astride[1];
1929 float* bp1 = bp0 + i[1] * bstride[1];
1930 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1931 {
1932 for (x = 0; x < dim[3]; x++)
1933 bp1[x] = ccv_max(ap1[x], min)({ typeof (ap1[x]) _a = (ap1[x]); typeof (min) _b = (min); (_a
> _b) ? _a : _b; })
;
1934 ap1 += astride[2];
1935 bp1 += bstride[2];
1936 }
1937 }
1938 }
1939 } else {
1940 if (astride[2] == dim[3] && bstride[2] == dim[3])
1941 {
1942 // Special casing if the ainc[3] is the same as dim[3]
1943 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1944 {
1945 float* ap0 = ap + i[0] * astride[0];
1946 float* bp0 = bp + i[0] * bstride[0];
1947 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1948 {
1949 for (x = 0; x < count; x++)
1950 bp0[x] = ccv_clamp(ap0[x], min, max)({ typeof (min) _a = (min); typeof (max) _b = (max); typeof (
ap0[x]) _x = (ap0[x]); (_x < _a) ? _a : ((_x > _b) ? _b
: _x); })
;
1951 ap0 += astride[1];
1952 bp0 += bstride[1];
1953 }
1954 }
1955 return CCV_NNC_EXEC_SUCCESS;
1956 }
1957 // Non-optimal case, need to do skip copy.
1958 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1959 {
1960 float* const ap0 = ap + i[0] * astride[0];
1961 float* const bp0 = bp + i[0] * bstride[0];
1962 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1963 {
1964 float* ap1 = ap0 + i[1] * astride[1];
1965 float* bp1 = bp0 + i[1] * bstride[1];
1966 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1967 {
1968 for (x = 0; x < dim[3]; x++)
1969 bp1[x] = ccv_clamp(ap1[x], min, max)({ typeof (min) _a = (min); typeof (max) _b = (max); typeof (
ap1[x]) _x = (ap1[x]); (_x < _a) ? _a : ((_x > _b) ? _b
: _x); })
;
1970 ap1 += astride[2];
1971 bp1 += bstride[2];
1972 }
1973 }
1974 }
1975 }
1976 return CCV_NNC_EXEC_SUCCESS;
1977}
1978
1979static int _ccv_nnc_clamp_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1980{
1981 assert(input_size == 3)((void) sizeof ((input_size == 3) ? 1 : 0), __extension__ ({ if
(input_size == 3) ; else __assert_fail ("input_size == 3", "ew/ccv_nnc_ew_cpu_ref.c"
, 1981, __extension__ __PRETTY_FUNCTION__); }))
;
1982 const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0]; // gradient
1983 const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2];
1984 assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({
if (output_size == 1) ; else __assert_fail ("output_size == 1"
, "ew/ccv_nnc_ew_cpu_ref.c", 1984, __extension__ __PRETTY_FUNCTION__
); }))
;
1985 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
1986 // Assuming this is float 32.
1987 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1988 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1989 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1990 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1990, __extension__ __PRETTY_FUNCTION__
); }))
;
1991 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1991, __extension__ __PRETTY_FUNCTION__
); }))
;
1992 ccv_nnc_tensor_view_get_dim(g, dim);
1993 ccv_nnc_tensor_view_get_dim(h, dim);
1994 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1994, __extension__ __PRETTY_FUNCTION__
); }))
;
1995 int x;
1996 const float min = cmd.info.clamp.min;
1997 const float max = cmd.info.clamp.max;
1998 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1998, __extension__ __PRETTY_FUNCTION__
); }))
;
1999 if (g)
2000 {
2001 if (!CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
2002 {
2003 // Super optimal case, just do one for-loop for sum.
2004 const int tensor_count = ccv_nnc_tensor_count(g->info);
2005 if (isnan(min)__builtin_isnan (min))
2006 {
2007 for (x = 0; x < tensor_count; x++)
2008 h->data.f32[x] = b->data.f32[x] >= max ? 0 : g->data.f32[x];
2009 } else if (isnan(max)__builtin_isnan (max)) {
2010 for (x = 0; x < tensor_count; x++)
2011 h->data.f32[x] = b->data.f32[x] <= min ? 0 : g->data.f32[x];
2012 } else {
2013 for (x = 0; x < tensor_count; x++)
2014 h->data.f32[x] = (b->data.f32[x] >= max || b->data.f32[x] <= min) ? 0 : g->data.f32[x];
2015 }
2016 return CCV_NNC_EXEC_SUCCESS;
2017 }
2018 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
2019 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 2019, __extension__ __PRETTY_FUNCTION__
); }))
;
2020 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 2020, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
2021 ccv_nnc_tensor_view_get_stride(g, gstride);
2022 ccv_nnc_tensor_view_get_stride(b, bstride);
2023 ccv_nnc_tensor_view_get_stride(h, hstride);
2024 int i[CCV_NNC_MAX_DIM(2) + 2];
2025 float* const gp = g->data.f32;
2026 float* const bp = b->data.f32;
2027 float* const hp = h->data.f32;
2028 const int count = dim[2] * dim[3];
2029 const float min = cmd.info.clamp.min;
2030 const float max = cmd.info.clamp.max;
2031 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 2031, __extension__ __PRETTY_FUNCTION__
); }))
;
2032 if (isnan(min)__builtin_isnan (min))
2033 {
2034 if (gstride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
2035 {
2036 // Special casing if the ginc[3] is the same as dim[3]
2037 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2038 {
2039 float* gp0 = gp + i[0] * gstride[0];
2040 float* bp0 = bp + i[0] * bstride[0];
2041 float* hp0 = hp + i[0] * hstride[0];
2042 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2043 {
2044 for (x = 0; x < count; x++)
2045 hp0[x] = bp0[x] >= max ? 0 : gp0[x];
2046 gp0 += gstride[1];
2047 bp0 += bstride[1];
2048 hp0 += hstride[1];
2049 }
2050 }
2051 return CCV_NNC_EXEC_SUCCESS;
2052 }
2053 // Non-optimal case, need to do skip copy.
2054 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2055 {
2056 float* const gp0 = gp + i[0] * gstride[0];
2057 float* const bp0 = bp + i[0] * bstride[0];
2058 float* const hp0 = hp + i[0] * hstride[0];
2059 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2060 {
2061 float* gp1 = gp0 + i[1] * gstride[1];
2062 float* bp1 = bp0 + i[1] * bstride[1];
2063 float* hp1 = hp0 + i[1] * hstride[1];
2064 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2065 {
2066 for (x = 0; x < dim[3]; x++)
2067 hp1[x] = bp1[x] >= max ? 0 : gp1[x];
2068 gp1 += gstride[2];
2069 bp1 += bstride[2];
2070 hp1 += hstride[2];
2071 }
2072 }
2073 }
2074 } else if (isnan(max)__builtin_isnan (max)) {
2075 if (gstride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
2076 {
2077 // Special casing if the ginc[3] is the same as dim[3]
2078 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2079 {
2080 float* gp0 = gp + i[0] * gstride[0];
2081 float* bp0 = bp + i[0] * bstride[0];
2082 float* hp0 = hp + i[0] * hstride[0];
2083 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2084 {
2085 for (x = 0; x < count; x++)
2086 hp0[x] = bp0[x] <= min ? 0 : gp0[x];
2087 gp0 += gstride[1];
2088 bp0 += bstride[1];
2089 hp0 += hstride[1];
2090 }
2091 }
2092 return CCV_NNC_EXEC_SUCCESS;
2093 }
2094 // Non-optimal case, need to do skip copy.
2095 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2096 {
2097 float* const gp0 = gp + i[0] * gstride[0];
2098 float* const bp0 = bp + i[0] * bstride[0];
2099 float* const hp0 = hp + i[0] * hstride[0];
2100 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2101 {
2102 float* gp1 = gp0 + i[1] * gstride[1];
2103 float* bp1 = bp0 + i[1] * bstride[1];
2104 float* hp1 = hp0 + i[1] * hstride[1];
2105 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2106 {
2107 for (x = 0; x < dim[3]; x++)
2108 hp1[x] = bp1[x] <= min ? 0 : gp1[x];
2109 gp1 += gstride[2];
2110 bp1 += bstride[2];
2111 hp1 += hstride[2];
2112 }
2113 }
2114 }
2115 } else {
2116 if (gstride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
2117 {
2118 // Special casing if the ginc[3] is the same as dim[3]
2119 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2120 {
2121 float* gp0 = gp + i[0] * gstride[0];
2122 float* bp0 = bp + i[0] * bstride[0];
2123 float* hp0 = hp + i[0] * hstride[0];
2124 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2125 {
2126 for (x = 0; x < count; x++)
2127 hp0[x] = (bp0[x] >= max || bp0[x] <= min) ? 0 : gp0[x];
2128 gp0 += gstride[1];
2129 bp0 += bstride[1];
2130 hp0 += hstride[1];
2131 }
2132 }
2133 return CCV_NNC_EXEC_SUCCESS;
2134 }
2135 // Non-optimal case, need to do skip copy.
2136 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2137 {
2138 float* const gp0 = gp + i[0] * gstride[0];
2139 float* const bp0 = bp + i[0] * bstride[0];
2140 float* const hp0 = hp + i[0] * hstride[0];
2141 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2142 {
2143 float* gp1 = gp0 + i[1] * gstride[1];
2144 float* bp1 = bp0 + i[1] * bstride[1];
2145 float* hp1 = hp0 + i[1] * hstride[1];
2146 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2147 {
2148 for (x = 0; x < dim[3]; x++)
2149 hp1[x] = (bp1[x] >= max || bp1[x] <= min) ? 0 : gp1[x];
2150 gp1 += gstride[2];
2151 bp1 += bstride[2];
2152 hp1 += hstride[2];
2153 }
2154 }
2155 }
2156 }
2157 } else {
2158 if (!CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
2159 {
2160 // Super optimal case, just do one for-loop for sum.
2161 const int tensor_count = ccv_nnc_tensor_count(h->info);
2162 if (isnan(min)__builtin_isnan (min))
2163 {
2164 for (x = 0; x < tensor_count; x++)
2165 h->data.f32[x] = b->data.f32[x] >= max ? 0 : 1;
2166 } else if (isnan(max)__builtin_isnan (max)) {
2167 for (x = 0; x < tensor_count; x++)
2168 h->data.f32[x] = b->data.f32[x] <= min ? 0 : 1;
2169 } else {
2170 for (x = 0; x < tensor_count; x++)
2171 h->data.f32[x] = (b->data.f32[x] >= max || b->data.f32[x] <= min) ? 0 : 1;
2172 }
2173 return CCV_NNC_EXEC_SUCCESS;
2174 }
2175 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 2175, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
2176 ccv_nnc_tensor_view_get_stride(b, bstride);
2177 ccv_nnc_tensor_view_get_stride(h, hstride);
2178 int i[CCV_NNC_MAX_DIM(2) + 2];
2179 float* const bp = b->data.f32;
2180 float* const hp = h->data.f32;
2181 const int count = dim[2] * dim[3];
2182 const float min = cmd.info.clamp.min;
2183 const float max = cmd.info.clamp.max;
2184 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 2184, __extension__ __PRETTY_FUNCTION__
); }))
;
2185 if (isnan(min)__builtin_isnan (min))
2186 {
2187 if (bstride[2] == dim[3] && hstride[2] == dim[3])
2188 {
2189 // Special casing if the binc[3] is the same as dim[3]
2190 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2191 {
2192 float* bp0 = bp + i[0] * bstride[0];
2193 float* hp0 = hp + i[0] * hstride[0];
2194 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2195 {
2196 for (x = 0; x < count; x++)
2197 hp0[x] = bp0[x] >= max ? 0 : 1;
2198 bp0 += bstride[1];
2199 hp0 += hstride[1];
2200 }
2201 }
2202 return CCV_NNC_EXEC_SUCCESS;
2203 }
2204 // Non-optimal case, need to do skip copy.
2205 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2206 {
2207 float* const bp0 = bp + i[0] * bstride[0];
2208 float* const hp0 = hp + i[0] * hstride[0];
2209 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2210 {
2211 float* bp1 = bp0 + i[1] * bstride[1];
2212 float* hp1 = hp0 + i[1] * hstride[1];
2213 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2214 {
2215 for (x = 0; x < dim[3]; x++)
2216 hp1[x] = bp1[x] >= max ? 0 : 1;
2217 bp1 += bstride[2];
2218 hp1 += hstride[2];
2219 }
2220 }
2221 }
2222 } else if (isnan(max)__builtin_isnan (max)) {
2223 if (bstride[2] == dim[3] && hstride[2] == dim[3])
2224 {
2225 // Special casing if the binc[3] is the same as dim[3]
2226 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2227 {
2228 float* bp0 = bp + i[0] * bstride[0];
2229 float* hp0 = hp + i[0] * hstride[0];
2230 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2231 {
2232 for (x = 0; x < count; x++)
2233 hp0[x] = bp0[x] <= min ? 0 : 1;
2234 bp0 += bstride[1];
2235 hp0 += hstride[1];
2236 }
2237 }
2238 return CCV_NNC_EXEC_SUCCESS;
2239 }
2240 // Non-optimal case, need to do skip copy.
2241 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2242 {
2243 float* const bp0 = bp + i[0] * bstride[0];
2244 float* const hp0 = hp + i[0] * hstride[0];
2245 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2246 {
2247 float* bp1 = bp0 + i[1] * bstride[1];
2248 float* hp1 = hp0 + i[1] * hstride[1];
2249 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2250 {
2251 for (x = 0; x < dim[3]; x++)
2252 hp1[x] = bp1[x] <= min ? 0 : 1;
2253 bp1 += bstride[2];
2254 hp1 += hstride[2];
2255 }
2256 }
2257 }
2258 } else {
2259 if (bstride[2] == dim[3] && hstride[2] == dim[3])
2260 {
2261 // Special casing if the binc[3] is the same as dim[3]
2262 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2263 {
2264 float* bp0 = bp + i[0] * bstride[0];
2265 float* hp0 = hp + i[0] * hstride[0];
2266 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2267 {
2268 for (x = 0; x < count; x++)
2269 hp0[x] = (bp0[x] >= max || bp0[x] <= min) ? 0 : 1;
2270 bp0 += bstride[1];
2271 hp0 += hstride[1];
2272 }
2273 }
2274 return CCV_NNC_EXEC_SUCCESS;
2275 }
2276 // Non-optimal case, need to do skip copy.
2277 for (i[0] = 0; i[0] < dim[0]; i[0]++)
2278 {
2279 float* const bp0 = bp + i[0] * bstride[0];
2280 float* const hp0 = hp + i[0] * hstride[0];
2281 for (i[1] = 0; i[1] < dim[1]; i[1]++)
2282 {
2283 float* bp1 = bp0 + i[1] * bstride[1];
2284 float* hp1 = hp0 + i[1] * hstride[1];
2285 for (i[2] = 0; i[2] < dim[2]; i[2]++)
2286 {
2287 for (x = 0; x < dim[3]; x++)
2288 hp1[x] = (bp1[x] >= max || bp1[x] <= min) ? 0 : 1;
2289 bp1 += bstride[2];
2290 hp1 += hstride[2];
2291 }
2292 }
2293 }
2294 }
2295 }
2296 return CCV_NNC_EXEC_SUCCESS;
2297}
2298
2299REGISTER_COMMAND_BACKEND(CCV_NNC_EWSUM_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSUM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2300{
2301 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2302 registry->tensor_datatypes = CCV_32F;
2303 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2304 registry->algorithms = 1;
2305 registry->exec = _ccv_nnc_ewsum_forw;
2306}
2307
2308REGISTER_COMMAND_BACKEND(CCV_NNC_EWSUM_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSUM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2309{
2310 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2311 registry->tensor_datatypes = CCV_32F;
2312 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2313 registry->algorithms = 1;
2314 registry->exec = _ccv_nnc_ewsum_back;
2315}
2316
2317REGISTER_COMMAND_BACKEND(CCV_NNC_EWPROD_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWPROD_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2318{
2319 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2320 registry->tensor_datatypes = CCV_32F;
2321 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2322 registry->algorithms = 1;
2323 registry->exec = _ccv_nnc_ewprod_forw;
2324}
2325
2326REGISTER_COMMAND_BACKEND(CCV_NNC_EWPROD_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWPROD_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2327{
2328 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2329 registry->tensor_datatypes = CCV_32F;
2330 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2331 registry->algorithms = 1;
2332 registry->exec = _ccv_nnc_ewprod_back;
2333}
2334
2335REGISTER_COMMAND_BACKEND(CCV_NNC_EWDIV_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWDIV_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2336{
2337 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2338 registry->tensor_datatypes = CCV_32F;
2339 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2340 registry->algorithms = 1;
2341 registry->exec = _ccv_nnc_ewdiv_forw;
2342}
2343
2344REGISTER_COMMAND_BACKEND(CCV_NNC_EWDIV_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWDIV_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2345{
2346 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2347 registry->tensor_datatypes = CCV_32F;
2348 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2349 registry->algorithms = 1;
2350 registry->exec = _ccv_nnc_ewdiv_back;
2351}
2352
2353REGISTER_COMMAND_BACKEND(CCV_NNC_EWEXP_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWEXP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2354{
2355 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2356 registry->tensor_datatypes = CCV_32F;
2357 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2358 registry->algorithms = 1;
2359 registry->exec = _ccv_nnc_ewexp_forw;
2360}
2361
2362REGISTER_COMMAND_BACKEND(CCV_NNC_EWEXP_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWEXP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2363{
2364 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2365 registry->tensor_datatypes = CCV_32F;
2366 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2367 registry->algorithms = 1;
2368 registry->exec = _ccv_nnc_ewexp_back;
2369}
2370
2371REGISTER_COMMAND_BACKEND(CCV_NNC_EWPOW_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWPOW_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2372{
2373 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2374 registry->tensor_datatypes = CCV_32F;
2375 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2376 registry->algorithms = 1;
2377 registry->exec = _ccv_nnc_ewpow_forw;
2378}
2379
2380REGISTER_COMMAND_BACKEND(CCV_NNC_EWPOW_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWPOW_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2381{
2382 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2383 registry->tensor_datatypes = CCV_32F;
2384 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2385 registry->algorithms = 1;
2386 registry->exec = _ccv_nnc_ewpow_back;
2387}
2388
2389REGISTER_COMMAND_BACKEND(CCV_NNC_EWLOG_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWLOG_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2390{
2391 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2392 registry->tensor_datatypes = CCV_32F;
2393 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2394 registry->algorithms = 1;
2395 registry->exec = _ccv_nnc_ewlog_forw;
2396}
2397
2398REGISTER_COMMAND_BACKEND(CCV_NNC_EWLOG_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWLOG_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2399{
2400 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2401 registry->tensor_datatypes = CCV_32F;
2402 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2403 registry->algorithms = 1;
2404 registry->exec = _ccv_nnc_ewlog_back;
2405}
2406
2407REGISTER_COMMAND_BACKEND(CCV_NNC_EWSQRT_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSQRT_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2408{
2409 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2410 registry->tensor_datatypes = CCV_32F;
2411 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2412 registry->algorithms = 1;
2413 registry->exec = _ccv_nnc_ewsqrt_forw;
2414}
2415
2416REGISTER_COMMAND_BACKEND(CCV_NNC_EWSQRT_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSQRT_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2417{
2418 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2419 registry->tensor_datatypes = CCV_32F;
2420 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2421 registry->algorithms = 1;
2422 registry->exec = _ccv_nnc_ewsqrt_back;
2423}
2424
2425REGISTER_COMMAND_BACKEND(CCV_NNC_EWSIN_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSIN_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2426{
2427 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2428 registry->tensor_datatypes = CCV_32F;
2429 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2430 registry->algorithms = 1;
2431 registry->exec = _ccv_nnc_ewsin_forw;
2432}
2433
2434REGISTER_COMMAND_BACKEND(CCV_NNC_EWSIN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSIN_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2435{
2436 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2437 registry->tensor_datatypes = CCV_32F;
2438 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2439 registry->algorithms = 1;
2440 registry->exec = _ccv_nnc_ewsin_back;
2441}
2442
2443REGISTER_COMMAND_BACKEND(CCV_NNC_EWCOS_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWCOS_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2444{
2445 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2446 registry->tensor_datatypes = CCV_32F;
2447 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2448 registry->algorithms = 1;
2449 registry->exec = _ccv_nnc_ewcos_forw;
2450}
2451
2452REGISTER_COMMAND_BACKEND(CCV_NNC_EWCOS_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWCOS_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2453{
2454 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2455 registry->tensor_datatypes = CCV_32F;
2456 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2457 registry->algorithms = 1;
2458 registry->exec = _ccv_nnc_ewcos_back;
2459}
2460
2461REGISTER_COMMAND_BACKEND(CCV_NNC_EWABS_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWABS_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2462{
2463 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2464 registry->tensor_datatypes = CCV_32F;
2465 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2466 registry->algorithms = 1;
2467 registry->exec = _ccv_nnc_ewabs_forw;
2468}
2469
2470REGISTER_COMMAND_BACKEND(CCV_NNC_EWABS_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWABS_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2471{
2472 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2473 registry->tensor_datatypes = CCV_32F;
2474 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2475 registry->algorithms = 1;
2476 registry->exec = _ccv_nnc_ewabs_back;
2477}
2478
2479REGISTER_COMMAND_BACKEND(CCV_NNC_CLAMP_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_CLAMP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2480{
2481 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2482 registry->tensor_datatypes = CCV_32F;
2483 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2484 registry->algorithms = 1;
2485 registry->exec = _ccv_nnc_clamp_forw;
2486}
2487
2488REGISTER_COMMAND_BACKEND(CCV_NNC_CLAMP_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_CLAMP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
2489{
2490 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
2491 registry->tensor_datatypes = CCV_32F;
2492 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
2493 registry->algorithms = 1;
2494 registry->exec = _ccv_nnc_clamp_back;
2495}