Bug Summary

File:nnc/cmd/ew/ccv_nnc_ew_cpu_ref.c
Warning:line 1303, column 27
The right operand of '*' is a garbage value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_ew_cpu_ref.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -resource-dir /usr/local/lib/clang/19 -I ../../ -I .. -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2025-04-15-094535-608653-1 -x c ew/ccv_nnc_ew_cpu_ref.c
1#include "ccv.h"
2#include "ccv_internal.h"
3#include "nnc/ccv_nnc.h"
4#include "nnc/ccv_nnc_easy.h"
5#include "nnc/ccv_nnc_internal.h"
6#ifdef USE_OPENMP
7#include <omp.h>
8#endif
9#ifdef USE_DISPATCH
10#include <dispatch/dispatch.h>
11#endif
12
13#include "../_ccv_nnc_cpu_ref.h"
14
15void _ccv_nnc_ewsum_forw_cpu_ref_f32(ccv_nnc_tensor_view_t* const* const inputs, const int input_size, ccv_nnc_tensor_view_t* const* const outputs, const int output_size)
16{
17 if (input_size == 1 && output_size == 1)
18 {
19 _ccv_nnc_tensor_transfer_cpu_ref_f32(inputs[0], outputs[0]);
20 return;
21 }
22 // Assuming this is float 32.
23 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
24 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
25 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
26 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
27 int x, z;
28 int k = 0;
29 // Bad, I promised this can be inplace operation. Need to first find out if there are share the same pointer first.
30 for (z = 1; z < input_size; z++)
31 {
32 ccv_nnc_tensor_view_t* c = outputs[0];
33 ccv_nnc_tensor_view_t* a = inputs[z];
34 if (c->data.f32 == a->data.f32)
35 {
36 k = z;
37 break;
38 }
39 }
40 for (z = 0; z < input_size - 1; z++)
41 {
42 ccv_nnc_tensor_view_t* c = outputs[0];
43 ccv_nnc_tensor_view_t* a = z > 0 ? c : inputs[k];
44 ccv_nnc_tensor_view_t* b = z >= k ? inputs[z + 1] : inputs[z];
45 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 45, __extension__ __PRETTY_FUNCTION__
); }))
;
46 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 46, __extension__ __PRETTY_FUNCTION__
); }))
;
47 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 47, __extension__ __PRETTY_FUNCTION__
); }))
;
48 ccv_nnc_tensor_view_get_dim(a, dim);
49 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 49, __extension__ __PRETTY_FUNCTION__
); }))
;
50 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 50, __extension__ __PRETTY_FUNCTION__
); }))
;
51 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
52 {
53 // Super optimal case, just do one for-loop for sum.
54 const int tensor_count = ccv_nnc_tensor_count(a->info);
55 for (x = 0; x < tensor_count; x++)
56 c->data.f32[x] = a->data.f32[x] + b->data.f32[x];
57 continue;
58 }
59 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 59, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
60 ccv_nnc_tensor_view_get_stride(a, astride);
61 ccv_nnc_tensor_view_get_stride(b, bstride);
62 ccv_nnc_tensor_view_get_stride(c, cstride);
63 int i[CCV_NNC_MAX_DIM(2) + 2];
64 float* const ap = a->data.f32;
65 float* const bp = b->data.f32;
66 float* const cp = c->data.f32;
67 const int count = dim[2] * dim[3];
68 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && astride[3] == 1 && bstride[3] == 1 && cstride[3] == 1)
69 {
70 // Special casing if the ainc[3] is the same as dim[3] (do memcpy for the last two dim)
71 for (i[0] = 0; i[0] < dim[0]; i[0]++)
72 {
73 float* ap0 = ap + i[0] * astride[0];
74 float* bp0 = bp + i[0] * bstride[0];
75 float* cp0 = cp + i[0] * cstride[0];
76 for (i[1] = 0; i[1] < dim[1]; i[1]++)
77 {
78 for (x = 0; x < count; x++)
79 cp0[x] = ap0[x] + bp0[x];
80 ap0 += astride[1];
81 bp0 += bstride[1];
82 cp0 += cstride[1];
83 }
84 }
85 continue;
86 }
87 // Non-optimal case, need to do skip copy.
88 for (i[0] = 0; i[0] < dim[0]; i[0]++)
89 {
90 float* const ap0 = ap + i[0] * astride[0];
91 float* const bp0 = bp + i[0] * bstride[0];
92 float* const cp0 = cp + i[0] * cstride[0];
93 for (i[1] = 0; i[1] < dim[1]; i[1]++)
94 {
95 float* ap1 = ap0 + i[1] * astride[1];
96 float* bp1 = bp0 + i[1] * bstride[1];
97 float* cp1 = cp0 + i[1] * cstride[1];
98 for (i[2] = 0; i[2] < dim[2]; i[2]++)
99 {
100 for (x = 0; x < dim[3]; x++)
101 cp1[x * cstride[3]] = ap1[x * astride[3]] + bp1[x * bstride[3]];
102 ap1 += astride[2];
103 bp1 += bstride[2];
104 cp1 += cstride[2];
105 }
106 }
107 }
108 }
109}
110
111void _ccv_nnc_ewsum_forw_cpu_ref_i32(ccv_nnc_tensor_view_t* const* const inputs, const int input_size, ccv_nnc_tensor_view_t* const* const outputs, const int output_size)
112{
113 if (input_size == 1 && output_size == 1)
114 {
115 _ccv_nnc_tensor_transfer_cpu_ref_f32(inputs[0], outputs[0]);
116 return;
117 }
118 // Assuming this is float 32.
119 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
120 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
121 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
122 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
123 int x, z;
124 int k = 0;
125 // Bad, I promised this can be inplace operation. Need to first find out if there are share the same pointer first.
126 for (z = 1; z < input_size; z++)
127 {
128 ccv_nnc_tensor_view_t* c = outputs[0];
129 ccv_nnc_tensor_view_t* a = inputs[z];
130 if (c->data.f32 == a->data.f32)
131 {
132 k = z;
133 break;
134 }
135 }
136 for (z = 0; z < input_size - 1; z++)
137 {
138 ccv_nnc_tensor_view_t* c = outputs[0];
139 ccv_nnc_tensor_view_t* a = z > 0 ? c : inputs[k];
140 ccv_nnc_tensor_view_t* b = z >= k ? inputs[z + 1] : inputs[z];
141 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 141, __extension__ __PRETTY_FUNCTION__
); }))
;
142 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 142, __extension__ __PRETTY_FUNCTION__
); }))
;
143 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 143, __extension__ __PRETTY_FUNCTION__
); }))
;
144 ccv_nnc_tensor_view_get_dim(a, dim);
145 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 145, __extension__ __PRETTY_FUNCTION__
); }))
;
146 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 146, __extension__ __PRETTY_FUNCTION__
); }))
;
147 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
148 {
149 // Super optimal case, just do one for-loop for sum.
150 const int tensor_count = ccv_nnc_tensor_count(a->info);
151 for (x = 0; x < tensor_count; x++)
152 c->data.f32[x] = a->data.f32[x] + b->data.f32[x];
153 continue;
154 }
155 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 155, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
156 ccv_nnc_tensor_view_get_stride(a, astride);
157 ccv_nnc_tensor_view_get_stride(b, bstride);
158 ccv_nnc_tensor_view_get_stride(c, cstride);
159 int i[CCV_NNC_MAX_DIM(2) + 2];
160 int* const ap = a->data.i32;
161 int* const bp = b->data.i32;
162 int* const cp = c->data.i32;
163 const int count = dim[2] * dim[3];
164 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && astride[3] == 1 && bstride[3] == 1 && cstride[3] == 1)
165 {
166 // Special casing if the ainc[3] is the same as dim[3] (do memcpy for the last two dim)
167 for (i[0] = 0; i[0] < dim[0]; i[0]++)
168 {
169 int* ap0 = ap + i[0] * astride[0];
170 int* bp0 = bp + i[0] * bstride[0];
171 int* cp0 = cp + i[0] * cstride[0];
172 for (i[1] = 0; i[1] < dim[1]; i[1]++)
173 {
174 for (x = 0; x < count; x++)
175 cp0[x] = ap0[x] + bp0[x];
176 ap0 += astride[1];
177 bp0 += bstride[1];
178 cp0 += cstride[1];
179 }
180 }
181 continue;
182 }
183 // Non-optimal case, need to do skip copy.
184 for (i[0] = 0; i[0] < dim[0]; i[0]++)
185 {
186 int* const ap0 = ap + i[0] * astride[0];
187 int* const bp0 = bp + i[0] * bstride[0];
188 int* const cp0 = cp + i[0] * cstride[0];
189 for (i[1] = 0; i[1] < dim[1]; i[1]++)
190 {
191 int* ap1 = ap0 + i[1] * astride[1];
192 int* bp1 = bp0 + i[1] * bstride[1];
193 int* cp1 = cp0 + i[1] * cstride[1];
194 for (i[2] = 0; i[2] < dim[2]; i[2]++)
195 {
196 for (x = 0; x < dim[3]; x++)
197 cp1[x * cstride[3]] = ap1[x * astride[3]] + bp1[x * bstride[3]];
198 ap1 += astride[2];
199 bp1 += bstride[2];
200 cp1 += cstride[2];
201 }
202 }
203 }
204 }
205}
206
207static int _ccv_nnc_ewsum_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
208{
209 if (outputs[0]->info.datatype == CCV_32S)
210 _ccv_nnc_ewsum_forw_cpu_ref_i32((ccv_nnc_tensor_view_t**)inputs, input_size, (ccv_nnc_tensor_view_t**)outputs, output_size);
211 else
212 _ccv_nnc_ewsum_forw_cpu_ref_f32((ccv_nnc_tensor_view_t**)inputs, input_size, (ccv_nnc_tensor_view_t**)outputs, output_size);
213 return CCV_NNC_EXEC_SUCCESS;
214}
215
216static int _ccv_nnc_ewsum_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
217{
218 // D[x + y + z, x] = 1
219 int i;
220 if (inputs[0] == 0)
221 {
222 // Set them to 1.
223 for (i = 0; i < output_size; i++)
224 if (outputs[i])
225 _ccv_nnc_tensor_set_cpu_ref_f32((ccv_nnc_tensor_view_t*)outputs[i], 1);
226 } else {
227 // Copy over the gradient (If they are not pointing to the same tensor already).
228 for (i = 0; i < output_size; i++)
229 if (outputs[i] && inputs[0]->data.f32 != outputs[i]->data.f32)
230 _ccv_nnc_tensor_transfer_cpu_ref_f32((ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)outputs[i]);
231 }
232 return CCV_NNC_EXEC_SUCCESS;
233}
234
235void _ccv_nnc_ewprod_forw_cpu_ref(ccv_nnc_tensor_view_t* const* const inputs, const int input_size, ccv_nnc_tensor_view_t* const* const outputs, const int output_size)
236{
237 if (input_size == 1 && output_size == 1)
238 {
239 _ccv_nnc_tensor_transfer_cpu_ref_f32(inputs[0], outputs[0]);
240 return;
241 }
242 // Assuming this is float 32.
243 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
244 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
245 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
246 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
247 int x, z;
248 int k = 0;
249 // Bad, I promised this can be inplace operation. Need to first find out if there are share the same pointer first.
250 for (z = 1; z < input_size; z++)
251 {
252 ccv_nnc_tensor_view_t* c = outputs[0];
253 ccv_nnc_tensor_view_t* a = inputs[z];
254 if (c->data.f32 == a->data.f32)
255 {
256 k = z;
257 break;
258 }
259 }
260 for (z = 0; z < input_size - 1; z++)
261 {
262 ccv_nnc_tensor_view_t* c = outputs[0];
263 ccv_nnc_tensor_view_t* a = z > 0 ? c : inputs[k];
264 ccv_nnc_tensor_view_t* b = z >= k ? inputs[z + 1] : inputs[z];
265 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 265, __extension__ __PRETTY_FUNCTION__
); }))
;
266 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 266, __extension__ __PRETTY_FUNCTION__
); }))
;
267 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 267, __extension__ __PRETTY_FUNCTION__
); }))
;
268 ccv_nnc_tensor_view_get_dim(a, dim);
269 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 269, __extension__ __PRETTY_FUNCTION__
); }))
;
270 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 270, __extension__ __PRETTY_FUNCTION__
); }))
;
271 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
272 {
273 // Super optimal case, just do one for-loop for sum.
274 const int tensor_count = ccv_nnc_tensor_count(a->info);
275 for (x = 0; x < tensor_count; x++)
276 c->data.f32[x] = a->data.f32[x] * b->data.f32[x];
277 continue;
278 }
279 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 279, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
280 ccv_nnc_tensor_view_get_stride(a, astride);
281 ccv_nnc_tensor_view_get_stride(b, bstride);
282 ccv_nnc_tensor_view_get_stride(c, cstride);
283 int i[CCV_NNC_MAX_DIM(2) + 2];
284 float* const ap = a->data.f32;
285 float* const bp = b->data.f32;
286 float* const cp = c->data.f32;
287 const int count = dim[2] * dim[3];
288 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3])
289 {
290 // Special casing if the ainc[3] is the same as dim[3]
291 for (i[0] = 0; i[0] < dim[0]; i[0]++)
292 {
293 float* ap0 = ap + i[0] * astride[0];
294 float* bp0 = bp + i[0] * bstride[0];
295 float* cp0 = cp + i[0] * cstride[0];
296 for (i[1] = 0; i[1] < dim[1]; i[1]++)
297 {
298 for (x = 0; x < count; x++)
299 cp0[x] = ap0[x] * bp0[x];
300 ap0 += astride[1];
301 bp0 += bstride[1];
302 cp0 += cstride[1];
303 }
304 }
305 continue;
306 }
307 // Non-optimal case, need to do skip copy.
308 for (i[0] = 0; i[0] < dim[0]; i[0]++)
309 {
310 float* const ap0 = ap + i[0] * astride[0];
311 float* const bp0 = bp + i[0] * bstride[0];
312 float* const cp0 = cp + i[0] * cstride[0];
313 for (i[1] = 0; i[1] < dim[1]; i[1]++)
314 {
315 float* ap1 = ap0 + i[1] * astride[1];
316 float* bp1 = bp0 + i[1] * bstride[1];
317 float* cp1 = cp0 + i[1] * cstride[1];
318 for (i[2] = 0; i[2] < dim[2]; i[2]++)
319 {
320 for (x = 0; x < dim[3]; x++)
321 cp1[x] = ap1[x] * bp1[x];
322 ap1 += astride[2];
323 bp1 += bstride[2];
324 cp1 += cstride[2];
325 }
326 }
327 }
328 }
329}
330
331static int _ccv_nnc_ewprod_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
332{
333 _ccv_nnc_ewprod_forw_cpu_ref((ccv_nnc_tensor_view_t**)inputs, input_size, (ccv_nnc_tensor_view_t**)outputs, output_size);
334 return CCV_NNC_EXEC_SUCCESS;
335}
336
337static int _ccv_nnc_ewprod_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
338{
339 // D[x * y * z, x] = y * z
340 // Assuming this is float 32.
341 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
342 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
343 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
344 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
345 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
346 int x, z;
347 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
348 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[output_size + 1];
349 if (g == 0)
350 {
351 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 351, __extension__ __PRETTY_FUNCTION__
); }))
;
352 ccv_nnc_tensor_view_get_dim(b, dim);
353 ccv_nnc_tensor_view_get_stride(b, bstride);
354 for (z = 0; z < output_size; z++)
355 {
356 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[z + 1];
357 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[z];
358 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 358, __extension__ __PRETTY_FUNCTION__
); }))
;
359 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 359, __extension__ __PRETTY_FUNCTION__
); }))
;
360 assert(ccv_nnc_tensor_view_check_dim(a, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(a, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(a, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(a, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 360, __extension__ __PRETTY_FUNCTION__
); }))
;
361 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 361, __extension__ __PRETTY_FUNCTION__
); }))
;
362 ccv_nnc_tensor_view_get_stride(a, astride);
363 ccv_nnc_tensor_view_get_stride(h, hstride);
364 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
365 {
366 // Super optimal case, just do one for-loop for sum.
367 const int tensor_count = ccv_nnc_tensor_count(b->info);
368 for (x = 0; x < tensor_count; x++)
369 h->data.f32[x] = b->data.f32[x] / a->data.f32[x];
370 continue;
371 }
372 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 372, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
373 int i[CCV_NNC_MAX_DIM(2) + 2];
374 float* const ap = a->data.f32;
375 float* const bp = b->data.f32;
376 float* const hp = h->data.f32;
377 const int count = dim[2] * dim[3];
378 if (astride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
379 {
380 // Special casing if the ainc[3] is the same as dim[3]
381 for (i[0] = 0; i[0] < dim[0]; i[0]++)
382 {
383 float* ap0 = ap + i[0] * astride[0];
384 float* bp0 = bp + i[0] * bstride[0];
385 float* hp0 = hp + i[0] * hstride[0];
386 for (i[1] = 0; i[1] < dim[1]; i[1]++)
387 {
388 for (x = 0; x < count; x++)
389 hp0[x] = bp0[x] / ap0[x];
390 ap0 += astride[1];
391 bp0 += bstride[1];
392 hp0 += hstride[1];
393 }
394 }
395 continue;
396 }
397 // Non-optimal case, need to do skip copy.
398 for (i[0] = 0; i[0] < dim[0]; i[0]++)
399 {
400 float* const ap0 = ap + i[0] * astride[0];
401 float* const bp0 = bp + i[0] * bstride[0];
402 float* const hp0 = hp + i[0] * hstride[0];
403 for (i[1] = 0; i[1] < dim[1]; i[1]++)
404 {
405 float* ap1 = ap0 + i[1] * astride[1];
406 float* bp1 = bp0 + i[1] * bstride[1];
407 float* hp1 = hp0 + i[1] * hstride[1];
408 for (i[2] = 0; i[2] < dim[2]; i[2]++)
409 {
410 for (x = 0; x < dim[3]; x++)
411 hp1[x] = bp1[x] / ap1[x];
412 ap1 += astride[2];
413 bp1 += bstride[2];
414 hp1 += hstride[2];
415 }
416 }
417 }
418 }
419 } else {
420 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 420, __extension__ __PRETTY_FUNCTION__
); }))
;
421 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 421, __extension__ __PRETTY_FUNCTION__
); }))
;
422 ccv_nnc_tensor_view_get_dim(b, dim);
423 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 423, __extension__ __PRETTY_FUNCTION__
); }))
;
424 ccv_nnc_tensor_view_get_stride(b, bstride);
425 ccv_nnc_tensor_view_get_stride(g, gstride);
426 for (z = 0; z < output_size; z++)
427 {
428 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[z + 1];
429 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[z];
430 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 430, __extension__ __PRETTY_FUNCTION__
); }))
;
431 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 431, __extension__ __PRETTY_FUNCTION__
); }))
;
432 assert(ccv_nnc_tensor_view_check_dim(a, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(a, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(a, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(a, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 432, __extension__ __PRETTY_FUNCTION__
); }))
;
433 assert(ccv_nnc_tensor_view_check_dim(h, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(h, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(h, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(h, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 433, __extension__ __PRETTY_FUNCTION__
); }))
;
434 ccv_nnc_tensor_view_get_stride(a, astride);
435 ccv_nnc_tensor_view_get_stride(h, hstride);
436 if (!CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW))
437 {
438 // Super optimal case, just do one for-loop for sum.
439 const int tensor_count = ccv_nnc_tensor_count(g->info);
440 for (x = 0; x < tensor_count; x++)
441 h->data.f32[x] = g->data.f32[x] * b->data.f32[x] / a->data.f32[x];
442 continue;
443 }
444 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 444, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
445 int i[CCV_NNC_MAX_DIM(2) + 2];
446 float* const gp = g->data.f32;
447 float* const ap = a->data.f32;
448 float* const bp = b->data.f32;
449 float* const hp = h->data.f32;
450 const int count = dim[2] * dim[3];
451 if (gstride[2] == dim[3] && astride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
452 {
453 // Special casing if the ainc[3] is the same as dim[3]
454 for (i[0] = 0; i[0] < dim[0]; i[0]++)
455 {
456 float* gp0 = gp + i[0] * gstride[0];
457 float* ap0 = ap + i[0] * astride[0];
458 float* bp0 = bp + i[0] * bstride[0];
459 float* hp0 = hp + i[0] * hstride[0];
460 for (i[1] = 0; i[1] < dim[1]; i[1]++)
461 {
462 for (x = 0; x < count; x++)
463 hp0[x] = gp0[x] * bp0[x] / ap0[x];
464 gp0 += gstride[1];
465 ap0 += astride[1];
466 bp0 += bstride[1];
467 hp0 += hstride[1];
468 }
469 }
470 continue;
471 }
472 // Non-optimal case, need to do skip copy.
473 for (i[0] = 0; i[0] < dim[0]; i[0]++)
474 {
475 float* const gp0 = gp + i[0] * gstride[0];
476 float* const ap0 = ap + i[0] * astride[0];
477 float* const bp0 = bp + i[0] * bstride[0];
478 float* const hp0 = hp + i[0] * hstride[0];
479 for (i[1] = 0; i[1] < dim[1]; i[1]++)
480 {
481 float* gp1 = gp0 + i[1] * gstride[1];
482 float* ap1 = ap0 + i[1] * astride[1];
483 float* bp1 = bp0 + i[1] * bstride[1];
484 float* hp1 = hp0 + i[1] * hstride[1];
485 for (i[2] = 0; i[2] < dim[2]; i[2]++)
486 {
487 for (x = 0; x < dim[3]; x++)
488 hp1[x] = gp1[x] * bp1[x] / ap1[x];
489 gp1 += gstride[2];
490 ap1 += astride[2];
491 bp1 += bstride[2];
492 hp1 += hstride[2];
493 }
494 }
495 }
496 }
497 }
498 return CCV_NNC_EXEC_SUCCESS;
499}
500
501static void _ccv_nnc_ewdiv_forw_cpu_ref(const float p, ccv_nnc_tensor_view_t* const a, ccv_nnc_tensor_view_t* const b, ccv_nnc_tensor_view_t* const c)
502{
503 // Assuming this is float 32.
504 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
505 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
506 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
507 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
508 if (a == 0) // Take 0 as all ones tensor.
509 {
510 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 510, __extension__ __PRETTY_FUNCTION__
); }))
;
511 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 511, __extension__ __PRETTY_FUNCTION__
); }))
;
512 ccv_nnc_tensor_view_get_dim(b, dim);
513 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 513, __extension__ __PRETTY_FUNCTION__
); }))
;
514 int x;
515 if (!CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
516 {
517 // Super optimal case, just do one for-loop for sum.
518 const int tensor_count = ccv_nnc_tensor_count(b->info);
519 for (x = 0; x < tensor_count; x++)
520 c->data.f32[x] = p / b->data.f32[x];
521 return;
522 }
523 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 523, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
524 ccv_nnc_tensor_view_get_stride(b, bstride);
525 ccv_nnc_tensor_view_get_stride(c, cstride);
526 int i[CCV_NNC_MAX_DIM(2) + 2];
527 float* const bp = b->data.f32;
528 float* const cp = c->data.f32;
529 const int count = dim[2] * dim[3];
530 if (bstride[2] == dim[3] && cstride[2] == dim[3])
531 {
532 // Special casing if the ainc[3] is the same as dim[3]
533 for (i[0] = 0; i[0] < dim[0]; i[0]++)
534 {
535 float* bp0 = bp + i[0] * bstride[0];
536 float* cp0 = cp + i[0] * cstride[0];
537 for (i[1] = 0; i[1] < dim[1]; i[1]++)
538 {
539 for (x = 0; x < count; x++)
540 cp0[x] = p / bp0[x];
541 bp0 += bstride[1];
542 cp0 += cstride[1];
543 }
544 }
545 return;
546 }
547 // Non-optimal case, need to do skip copy.
548 for (i[0] = 0; i[0] < dim[0]; i[0]++)
549 {
550 float* const bp0 = bp + i[0] * bstride[0];
551 float* const cp0 = cp + i[0] * cstride[0];
552 for (i[1] = 0; i[1] < dim[1]; i[1]++)
553 {
554 float* bp1 = bp0 + i[1] * bstride[1];
555 float* cp1 = cp0 + i[1] * cstride[1];
556 for (i[2] = 0; i[2] < dim[2]; i[2]++)
557 {
558 for (x = 0; x < dim[3]; x++)
559 cp1[x] = p / bp1[x];
560 bp1 += bstride[2];
561 cp1 += cstride[2];
562 }
563 }
564 }
565 } else {
566 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 566, __extension__ __PRETTY_FUNCTION__
); }))
;
567 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 567, __extension__ __PRETTY_FUNCTION__
); }))
;
568 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 568, __extension__ __PRETTY_FUNCTION__
); }))
;
569 ccv_nnc_tensor_view_get_dim(a, dim);
570 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 570, __extension__ __PRETTY_FUNCTION__
); }))
;
571 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 571, __extension__ __PRETTY_FUNCTION__
); }))
;
572 int x;
573 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW))
574 {
575 // Super optimal case, just do one for-loop for sum.
576 const int tensor_count = ccv_nnc_tensor_count(a->info);
577 for (x = 0; x < tensor_count; x++)
578 c->data.f32[x] = p * a->data.f32[x] / b->data.f32[x];
579 return;
580 }
581 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 581, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
582 ccv_nnc_tensor_view_get_stride(a, astride);
583 ccv_nnc_tensor_view_get_stride(b, bstride);
584 ccv_nnc_tensor_view_get_stride(c, cstride);
585 int i[CCV_NNC_MAX_DIM(2) + 2];
586 float* const ap = a->data.f32;
587 float* const bp = b->data.f32;
588 float* const cp = c->data.f32;
589 const int count = dim[2] * dim[3];
590 if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3])
591 {
592 // Special casing if the ainc[3] is the same as dim[3]
593 for (i[0] = 0; i[0] < dim[0]; i[0]++)
594 {
595 float* ap0 = ap + i[0] * astride[0];
596 float* bp0 = bp + i[0] * bstride[0];
597 float* cp0 = cp + i[0] * cstride[0];
598 for (i[1] = 0; i[1] < dim[1]; i[1]++)
599 {
600 for (x = 0; x < count; x++)
601 cp0[x] = p * ap0[x] / bp0[x];
602 ap0 += astride[1];
603 bp0 += bstride[1];
604 cp0 += cstride[1];
605 }
606 }
607 return;
608 }
609 // Non-optimal case, need to do skip copy.
610 for (i[0] = 0; i[0] < dim[0]; i[0]++)
611 {
612 float* const ap0 = ap + i[0] * astride[0];
613 float* const bp0 = bp + i[0] * bstride[0];
614 float* const cp0 = cp + i[0] * cstride[0];
615 for (i[1] = 0; i[1] < dim[1]; i[1]++)
616 {
617 float* ap1 = ap0 + i[1] * astride[1];
618 float* bp1 = bp0 + i[1] * bstride[1];
619 float* cp1 = cp0 + i[1] * cstride[1];
620 for (i[2] = 0; i[2] < dim[2]; i[2]++)
621 {
622 for (x = 0; x < dim[3]; x++)
623 cp1[x] = p * ap1[x] / bp1[x];
624 ap1 += astride[2];
625 bp1 += bstride[2];
626 cp1 += cstride[2];
627 }
628 }
629 }
630 }
631}
632
633static int _ccv_nnc_ewdiv_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
634{
635 _ccv_nnc_ewdiv_forw_cpu_ref(1, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[1], (ccv_nnc_tensor_view_t*)outputs[0]);
636 return CCV_NNC_EXEC_SUCCESS;
637}
638
639static int _ccv_nnc_ewdiv_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
640{
641 // D[x / y, x] = 1 / y, D[x / y, y] = -x / y^2
642 if (output_size == 1 || outputs[1] == 0)
643 {
644 // When we only need D[x / y, x]
645 _ccv_nnc_ewdiv_forw_cpu_ref(1, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[2], (ccv_nnc_tensor_view_t*)outputs[0]);
646 return CCV_NNC_EXEC_SUCCESS;
647 }
648 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
649 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
650 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
651 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
652 int hastride[CCV_NNC_MAX_DIM_ALLOC(12)];
653 int hbstride[CCV_NNC_MAX_DIM_ALLOC(12)];
654 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
655 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2];
656 ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)inputs[3];
657 ccv_nnc_tensor_view_t* ha = (ccv_nnc_tensor_view_t*)outputs[0];
658 ccv_nnc_tensor_view_t* hb = (ccv_nnc_tensor_view_t*)outputs[1];
659 if (g == 0)
660 {
661 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 661, __extension__ __PRETTY_FUNCTION__
); }))
;
662 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 662, __extension__ __PRETTY_FUNCTION__
); }))
;
663 assert(ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(hb->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(hb->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 663, __extension__ __PRETTY_FUNCTION__
); }))
;
664 ccv_nnc_tensor_view_get_dim(b, dim);
665 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 665, __extension__ __PRETTY_FUNCTION__
); }))
;
666 assert(ccv_nnc_tensor_view_check_dim(hb, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(hb, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(hb, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(hb, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 666, __extension__ __PRETTY_FUNCTION__
); }))
;
667 if (ha)
668 {
669 assert(ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(ha->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(ha->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 669, __extension__ __PRETTY_FUNCTION__
); }))
;
670 assert(ccv_nnc_tensor_view_check_dim(ha, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(ha, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(ha, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(ha, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 670, __extension__ __PRETTY_FUNCTION__
); }))
;
671 }
672 int x;
673 if (!CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW) && (ha == 0 || !CCV_IS_TENSOR_VIEW(ha)((*(int*)(ha)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(hb)((*(int*)(hb)) & CCV_TENSOR_VIEW))
674 {
675 // Super optimal case, just do one for-loop for sum.
676 const int tensor_count = ccv_nnc_tensor_count(b->info);
677 if (ha == 0)
678 {
679 for (x = 0; x < tensor_count; x++)
680 {
681 const float v = 1 / b->data.f32[x];
682 hb->data.f32[x] = -c->data.f32[x] * v;
683 }
684 } else {
685 for (x = 0; x < tensor_count; x++)
686 {
687 const float v = 1 / b->data.f32[x];
688 ha->data.f32[x] = v;
689 hb->data.f32[x] = -c->data.f32[x] * v;
690 }
691 }
692 return CCV_NNC_EXEC_SUCCESS;
693 }
694 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 694, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
695 ccv_nnc_tensor_view_get_stride(b, bstride);
696 ccv_nnc_tensor_view_get_stride(c, cstride);
697 ccv_nnc_tensor_view_get_stride(hb, hbstride);
698 int i[CCV_NNC_MAX_DIM(2) + 2];
699 float* const bp = b->data.f32;
700 float* const cp = c->data.f32;
701 float* const hbp = hb->data.f32;
702 const int count = dim[2] * dim[3];
703 if (ha == 0)
704 {
705 if (bstride[2] == dim[3] && cstride[2] == dim[3] && hbstride[2] == dim[3])
706 {
707 // Special casing if the ainc[3] is the same as dim[3]
708 for (i[0] = 0; i[0] < dim[0]; i[0]++)
709 {
710 float* bp0 = bp + i[0] * bstride[0];
711 float* cp0 = cp + i[0] * cstride[0];
712 float* hbp0 = hbp + i[0] * hbstride[0];
713 for (i[1] = 0; i[1] < dim[1]; i[1]++)
714 {
715 for (x = 0; x < count; x++)
716 {
717 const float v = 1 / bp0[x];
718 hbp0[x] = -cp0[x] * v;
719 }
720 bp0 += bstride[1];
721 cp0 += cstride[1];
722 hbp0 += hbstride[1];
723 }
724 }
725 return CCV_NNC_EXEC_SUCCESS;
726 }
727 // Non-optimal case, need to do skip copy.
728 for (i[0] = 0; i[0] < dim[0]; i[0]++)
729 {
730 float* const bp0 = bp + i[0] * bstride[0];
731 float* const cp0 = cp + i[0] * cstride[0];
732 float* const hbp0 = hbp + i[0] * hbstride[0];
733 for (i[1] = 0; i[1] < dim[1]; i[1]++)
734 {
735 float* bp1 = bp0 + i[1] * bstride[1];
736 float* cp1 = cp0 + i[1] * cstride[1];
737 float* hbp1 = hbp0 + i[1] * hbstride[1];
738 for (i[2] = 0; i[2] < dim[2]; i[2]++)
739 {
740 for (x = 0; x < dim[3]; x++)
741 {
742 const float v = 1 / bp1[x];
743 hbp1[x] = -cp1[x] * v;
744 }
745 bp1 += bstride[2];
746 cp1 += cstride[2];
747 hbp1 += hbstride[2];
748 }
749 }
750 }
751 } else {
752 float* const hap = ha->data.f32;
753 ccv_nnc_tensor_view_get_stride(ha, hastride);
754 if (bstride[2] == dim[3] && cstride[2] == dim[3] && hastride[2] == dim[3] && hbstride[2] == dim[3])
755 {
756 // Special casing if the ainc[3] is the same as dim[3]
757 for (i[0] = 0; i[0] < dim[0]; i[0]++)
758 {
759 float* bp0 = bp + i[0] * bstride[0];
760 float* cp0 = cp + i[0] * cstride[0];
761 float* hap0 = hap + i[0] * hastride[0];
762 float* hbp0 = hbp + i[0] * hbstride[0];
763 for (i[1] = 0; i[1] < dim[1]; i[1]++)
764 {
765 for (x = 0; x < count; x++)
766 {
767 const float v = 1 / bp0[x];
768 hap0[x] = v;
769 hbp0[x] = -cp0[x] * v;
770 }
771 bp0 += bstride[1];
772 cp0 += cstride[1];
773 hap0 += hastride[1];
774 hbp0 += hbstride[1];
775 }
776 }
777 return CCV_NNC_EXEC_SUCCESS;
778 }
779 // Non-optimal case, need to do skip copy.
780 for (i[0] = 0; i[0] < dim[0]; i[0]++)
781 {
782 float* const bp0 = bp + i[0] * bstride[0];
783 float* const cp0 = cp + i[0] * cstride[0];
784 float* const hap0 = hap + i[0] * hastride[0];
785 float* const hbp0 = hbp + i[0] * hbstride[0];
786 for (i[1] = 0; i[1] < dim[1]; i[1]++)
787 {
788 float* bp1 = bp0 + i[1] * bstride[1];
789 float* cp1 = cp0 + i[1] * cstride[1];
790 float* hap1 = hap0 + i[1] * hastride[1];
791 float* hbp1 = hbp0 + i[1] * hbstride[1];
792 for (i[2] = 0; i[2] < dim[2]; i[2]++)
793 {
794 for (x = 0; x < dim[3]; x++)
795 {
796 const float v = 1 / bp1[x];
797 hap1[x] = v;
798 hbp1[x] = -cp1[x] * v;
799 }
800 bp1 += bstride[2];
801 cp1 += cstride[2];
802 hap1 += hastride[2];
803 hbp1 += hbstride[2];
804 }
805 }
806 }
807 }
808 } else {
809 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 809, __extension__ __PRETTY_FUNCTION__
); }))
;
810 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 810, __extension__ __PRETTY_FUNCTION__
); }))
;
811 assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(c->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(c->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 811, __extension__ __PRETTY_FUNCTION__
); }))
;
812 assert(ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(hb->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(hb->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 812, __extension__ __PRETTY_FUNCTION__
); }))
;
813 ccv_nnc_tensor_view_get_dim(b, dim);
814 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 814, __extension__ __PRETTY_FUNCTION__
); }))
;
815 assert(ccv_nnc_tensor_view_check_dim(c, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(c, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(c, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(c, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 815, __extension__ __PRETTY_FUNCTION__
); }))
;
816 assert(ccv_nnc_tensor_view_check_dim(hb, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(hb, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(hb, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(hb, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 816, __extension__ __PRETTY_FUNCTION__
); }))
;
817 if (ha)
818 {
819 assert(ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(ha->info.dim) <= (2)
+ 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(ha->
info.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 819, __extension__ __PRETTY_FUNCTION__
); }))
;
820 assert(ccv_nnc_tensor_view_check_dim(ha, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(ha, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(ha, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(ha, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 820, __extension__ __PRETTY_FUNCTION__
); }))
;
821 }
822 int x;
823 if (!CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(c)((*(int*)(c)) & CCV_TENSOR_VIEW) && (ha == 0 || !CCV_IS_TENSOR_VIEW(ha)((*(int*)(ha)) & CCV_TENSOR_VIEW)) && !CCV_IS_TENSOR_VIEW(hb)((*(int*)(hb)) & CCV_TENSOR_VIEW))
824 {
825 // Super optimal case, just do one for-loop for sum.
826 const int tensor_count = ccv_nnc_tensor_count(g->info);
827 if (ha == 0)
828 {
829 for (x = 0; x < tensor_count; x++)
830 {
831 const float v = g->data.f32[x] / b->data.f32[x];
832 hb->data.f32[x] = -c->data.f32[x] * v;
833 }
834 } else {
835 for (x = 0; x < tensor_count; x++)
836 {
837 const float v = g->data.f32[x] / b->data.f32[x];
838 ha->data.f32[x] = v;
839 hb->data.f32[x] = -c->data.f32[x] * v;
840 }
841 }
842 return CCV_NNC_EXEC_SUCCESS;
843 }
844 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 844, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
845 ccv_nnc_tensor_view_get_stride(g, gstride);
846 ccv_nnc_tensor_view_get_stride(b, bstride);
847 ccv_nnc_tensor_view_get_stride(c, cstride);
848 ccv_nnc_tensor_view_get_stride(hb, hbstride);
849 int i[CCV_NNC_MAX_DIM(2) + 2];
850 float* const gp = g->data.f32;
851 float* const bp = b->data.f32;
852 float* const cp = c->data.f32;
853 float* const hbp = hb->data.f32;
854 const int count = dim[2] * dim[3];
855 if (ha == 0)
856 {
857 if (gstride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && hbstride[2] == dim[3])
858 {
859 // Special casing if the ainc[3] is the same as dim[3]
860 for (i[0] = 0; i[0] < dim[0]; i[0]++)
861 {
862 float* gp0 = gp + i[0] * gstride[0];
863 float* bp0 = bp + i[0] * bstride[0];
864 float* cp0 = cp + i[0] * cstride[0];
865 float* hbp0 = hbp + i[0] * hbstride[0];
866 for (i[1] = 0; i[1] < dim[1]; i[1]++)
867 {
868 for (x = 0; x < count; x++)
869 {
870 const float v = gp0[x] / bp0[x];
871 hbp0[x] = -cp0[x] * v;
872 }
873 gp0 += gstride[1];
874 bp0 += bstride[1];
875 cp0 += cstride[1];
876 hbp0 += hbstride[1];
877 }
878 }
879 return CCV_NNC_EXEC_SUCCESS;
880 }
881 // Non-optimal case, need to do skip copy.
882 for (i[0] = 0; i[0] < dim[0]; i[0]++)
883 {
884 float* const gp0 = gp + i[0] * gstride[0];
885 float* const bp0 = bp + i[0] * bstride[0];
886 float* const cp0 = cp + i[0] * cstride[0];
887 float* const hbp0 = hbp + i[0] * hbstride[0];
888 for (i[1] = 0; i[1] < dim[1]; i[1]++)
889 {
890 float* gp1 = gp0 + i[1] * gstride[1];
891 float* bp1 = bp0 + i[1] * bstride[1];
892 float* cp1 = cp0 + i[1] * cstride[1];
893 float* hbp1 = hbp0 + i[1] * hbstride[1];
894 for (i[2] = 0; i[2] < dim[2]; i[2]++)
895 {
896 for (x = 0; x < dim[3]; x++)
897 {
898 const float v = gp1[x] / bp1[x];
899 hbp1[x] = -cp1[x] * v;
900 }
901 gp1 += gstride[2];
902 bp1 += bstride[2];
903 cp1 += cstride[2];
904 hbp1 += hbstride[2];
905 }
906 }
907 }
908 } else {
909 ccv_nnc_tensor_view_get_stride(ha, hastride);
910 float* const hap = ha->data.f32;
911 if (gstride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3] && hastride[2] == dim[3] && hbstride[2] == dim[3])
912 {
913 // Special casing if the ainc[3] is the same as dim[3]
914 for (i[0] = 0; i[0] < dim[0]; i[0]++)
915 {
916 float* gp0 = gp + i[0] * gstride[0];
917 float* bp0 = bp + i[0] * bstride[0];
918 float* cp0 = cp + i[0] * cstride[0];
919 float* hap0 = hap + i[0] * hastride[0];
920 float* hbp0 = hbp + i[0] * hbstride[0];
921 for (i[1] = 0; i[1] < dim[1]; i[1]++)
922 {
923 for (x = 0; x < count; x++)
924 {
925 const float v = gp0[x] / bp0[x];
926 hap0[x] = v;
927 hbp0[x] = -cp0[x] * v;
928 }
929 gp0 += gstride[1];
930 bp0 += bstride[1];
931 cp0 += cstride[1];
932 hap0 += hastride[1];
933 hbp0 += hbstride[1];
934 }
935 }
936 return CCV_NNC_EXEC_SUCCESS;
937 }
938 // Non-optimal case, need to do skip copy.
939 for (i[0] = 0; i[0] < dim[0]; i[0]++)
940 {
941 float* const gp0 = gp + i[0] * gstride[0];
942 float* const bp0 = bp + i[0] * bstride[0];
943 float* const cp0 = cp + i[0] * cstride[0];
944 float* const hap0 = hap + i[0] * hastride[0];
945 float* const hbp0 = hbp + i[0] * hbstride[0];
946 for (i[1] = 0; i[1] < dim[1]; i[1]++)
947 {
948 float* gp1 = gp0 + i[1] * gstride[1];
949 float* bp1 = bp0 + i[1] * bstride[1];
950 float* cp1 = cp0 + i[1] * cstride[1];
951 float* hap1 = hap0 + i[1] * hastride[1];
952 float* hbp1 = hbp0 + i[1] * hbstride[1];
953 for (i[2] = 0; i[2] < dim[2]; i[2]++)
954 {
955 for (x = 0; x < dim[3]; x++)
956 {
957 const float v = gp1[x] / bp1[x];
958 hap1[x] = v;
959 hbp1[x] = -cp1[x] * v;
960 }
961 gp1 += gstride[2];
962 bp1 += bstride[2];
963 cp1 += cstride[2];
964 hap1 += hastride[2];
965 hbp1 += hbstride[2];
966 }
967 }
968 }
969 }
970 }
971 return CCV_NNC_EXEC_SUCCESS;
972}
973
974static int _ccv_nnc_ewexp_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
975{
976 // Assuming this is float 32.
977 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
978 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
979 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
980 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
981 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
982 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 982, __extension__ __PRETTY_FUNCTION__
); }))
;
983 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 983, __extension__ __PRETTY_FUNCTION__
); }))
;
984 ccv_nnc_tensor_view_get_dim(a, dim);
985 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 985, __extension__ __PRETTY_FUNCTION__
); }))
;
986 int x;
987 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
988 {
989 // Super optimal case, just do one for-loop for sum.
990 const int tensor_count = ccv_nnc_tensor_count(a->info);
991 for (x = 0; x < tensor_count; x++)
992 b->data.f32[x] = exp(a->data.f32[x]);
993 return CCV_NNC_EXEC_SUCCESS;
994 }
995 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 995, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
996 ccv_nnc_tensor_view_get_stride(a, astride);
997 ccv_nnc_tensor_view_get_stride(b, bstride);
998 int i[CCV_NNC_MAX_DIM(2) + 2];
999 float* const ap = a->data.f32;
1000 float* const bp = b->data.f32;
1001 const int count = dim[2] * dim[3];
1002 if (astride[2] == dim[3] && bstride[2] == dim[3])
1003 {
1004 // Special casing if the ainc[3] is the same as dim[3]
1005 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1006 {
1007 float* ap0 = ap + i[0] * astride[0];
1008 float* bp0 = bp + i[0] * bstride[0];
1009 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1010 {
1011 for (x = 0; x < count; x++)
1012 bp0[x] = exp(ap0[x]);
1013 ap0 += astride[1];
1014 bp0 += bstride[1];
1015 }
1016 }
1017 return CCV_NNC_EXEC_SUCCESS;
1018 }
1019 // Non-optimal case, need to do skip copy.
1020 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1021 {
1022 float* const ap0 = ap + i[0] * astride[0];
1023 float* const bp0 = bp + i[0] * bstride[0];
1024 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1025 {
1026 float* ap1 = ap0 + i[1] * astride[1];
1027 float* bp1 = bp0 + i[1] * bstride[1];
1028 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1029 {
1030 for (x = 0; x < dim[3]; x++)
1031 bp1[x] = exp(ap1[x]);
1032 ap1 += astride[2];
1033 bp1 += bstride[2];
1034 }
1035 }
1036 }
1037 return CCV_NNC_EXEC_SUCCESS;
1038}
1039
1040static int _ccv_nnc_ewexp_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1041{
1042 // D[Exp[x], x] = Exp[x]
1043 if (inputs[0] == 0)
1044 _ccv_nnc_tensor_transfer_cpu_ref_f32((ccv_nnc_tensor_view_t*)inputs[2], (ccv_nnc_tensor_view_t*)outputs[0]);
1045 else
1046 _ccv_nnc_ewprod_forw_cpu_ref((ccv_nnc_tensor_view_t*[]){
1047 (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[2]
1048 }, 2, (ccv_nnc_tensor_view_t**)outputs, output_size);
1049 return CCV_NNC_EXEC_SUCCESS;
1050}
1051
1052static int _ccv_nnc_ewlog_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1053{
1054 // Assuming this is float 32.
1055 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1056 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1057 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1058 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1059 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1060 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1060, __extension__ __PRETTY_FUNCTION__
); }))
;
1061 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1061, __extension__ __PRETTY_FUNCTION__
); }))
;
1062 ccv_nnc_tensor_view_get_dim(a, dim);
1063 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1063, __extension__ __PRETTY_FUNCTION__
); }))
;
1064 int x;
1065 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1066 {
1067 // Super optimal case, just do one for-loop for sum.
1068 const int tensor_count = ccv_nnc_tensor_count(a->info);
1069 for (x = 0; x < tensor_count; x++)
1070 b->data.f32[x] = log(a->data.f32[x]);
1071 return CCV_NNC_EXEC_SUCCESS;
1072 }
1073 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1073, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1074 ccv_nnc_tensor_view_get_stride(a, astride);
1075 ccv_nnc_tensor_view_get_stride(b, bstride);
1076 int i[CCV_NNC_MAX_DIM(2) + 2];
1077 float* const ap = a->data.f32;
1078 float* const bp = b->data.f32;
1079 const int count = dim[2] * dim[3];
1080 if (astride[2] == dim[3] && bstride[2] == dim[3])
1081 {
1082 // Special casing if the ainc[3] is the same as dim[3]
1083 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1084 {
1085 float* ap0 = ap + i[0] * astride[0];
1086 float* bp0 = bp + i[0] * bstride[0];
1087 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1088 {
1089 for (x = 0; x < count; x++)
1090 bp0[x] = log(ap0[x]);
1091 ap0 += astride[1];
1092 bp0 += bstride[1];
1093 }
1094 }
1095 return CCV_NNC_EXEC_SUCCESS;
1096 }
1097 // Non-optimal case, need to do skip copy.
1098 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1099 {
1100 float* const ap0 = ap + i[0] * astride[0];
1101 float* const bp0 = bp + i[0] * bstride[0];
1102 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1103 {
1104 float* ap1 = ap0 + i[1] * astride[1];
1105 float* bp1 = bp0 + i[1] * bstride[1];
1106 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1107 {
1108 for (x = 0; x < dim[3]; x++)
1109 bp1[x] = log(ap1[x]);
1110 ap1 += astride[2];
1111 bp1 += bstride[2];
1112 }
1113 }
1114 }
1115 return CCV_NNC_EXEC_SUCCESS;
1116}
1117
1118static int _ccv_nnc_ewlog_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1119{
1120 // D[Log[x], x] = 1 / x
1121 _ccv_nnc_ewdiv_forw_cpu_ref(1, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[1], (ccv_nnc_tensor_view_t*)outputs[0]);
1122 return CCV_NNC_EXEC_SUCCESS;
1123}
1124
1125static int _ccv_nnc_ewsqrt_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1126{
1127 // Assuming this is float 32.
1128 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1129 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1130 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1131 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1132 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1133 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1133, __extension__ __PRETTY_FUNCTION__
); }))
;
1134 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1134, __extension__ __PRETTY_FUNCTION__
); }))
;
1135 ccv_nnc_tensor_view_get_dim(a, dim);
1136 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1136, __extension__ __PRETTY_FUNCTION__
); }))
;
1137 int x;
1138 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1139 {
1140 // Super optimal case, just do one for-loop for sum.
1141 const int tensor_count = ccv_nnc_tensor_count(a->info);
1142 for (x = 0; x < tensor_count; x++)
1143 b->data.f32[x] = sqrt(a->data.f32[x]);
1144 return CCV_NNC_EXEC_SUCCESS;
1145 }
1146 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1146, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1147 ccv_nnc_tensor_view_get_stride(a, astride);
1148 ccv_nnc_tensor_view_get_stride(b, bstride);
1149 int i[CCV_NNC_MAX_DIM(2) + 2];
1150 float* const ap = a->data.f32;
1151 float* const bp = b->data.f32;
1152 const int count = dim[2] * dim[3];
1153 if (astride[2] == dim[3] && bstride[2] == dim[3])
1154 {
1155 // Special casing if the ainc[3] is the same as dim[3]
1156 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1157 {
1158 float* ap0 = ap + i[0] * astride[0];
1159 float* bp0 = bp + i[0] * bstride[0];
1160 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1161 {
1162 for (x = 0; x < count; x++)
1163 bp0[x] = sqrt(ap0[x]);
1164 ap0 += astride[1];
1165 bp0 += bstride[1];
1166 }
1167 }
1168 return CCV_NNC_EXEC_SUCCESS;
1169 }
1170 // Non-optimal case, need to do skip copy.
1171 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1172 {
1173 float* const ap0 = ap + i[0] * astride[0];
1174 float* const bp0 = bp + i[0] * bstride[0];
1175 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1176 {
1177 float* ap1 = ap0 + i[1] * astride[1];
1178 float* bp1 = bp0 + i[1] * bstride[1];
1179 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1180 {
1181 for (x = 0; x < dim[3]; x++)
1182 bp1[x] = sqrt(ap1[x]);
1183 ap1 += astride[2];
1184 bp1 += bstride[2];
1185 }
1186 }
1187 }
1188 return CCV_NNC_EXEC_SUCCESS;
1189}
1190
1191static int _ccv_nnc_ewsqrt_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1192{
1193 // D[Sqrt[x], x] = 0.5 / Sqrt[x]
1194 _ccv_nnc_ewdiv_forw_cpu_ref(0.5, (ccv_nnc_tensor_view_t*)inputs[0], (ccv_nnc_tensor_view_t*)inputs[2], (ccv_nnc_tensor_view_t*)outputs[0]);
1195 return CCV_NNC_EXEC_SUCCESS;
1196}
1197
1198static int _ccv_nnc_ewabs_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1199{
1200 // Assuming this is float 32.
1201 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1202 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1203 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1204 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1205 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1206 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1206, __extension__ __PRETTY_FUNCTION__
); }))
;
1207 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1207, __extension__ __PRETTY_FUNCTION__
); }))
;
1208 ccv_nnc_tensor_view_get_dim(a, dim);
1209 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1209, __extension__ __PRETTY_FUNCTION__
); }))
;
1210 int x;
1211 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1212 {
1213 // Super optimal case, just do one for-loop for sum.
1214 const int tensor_count = ccv_nnc_tensor_count(a->info);
1215 for (x = 0; x < tensor_count; x++)
1216 b->data.f32[x] = fabs(a->data.f32[x]);
1217 return CCV_NNC_EXEC_SUCCESS;
1218 }
1219 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1219, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1220 ccv_nnc_tensor_view_get_stride(a, astride);
1221 ccv_nnc_tensor_view_get_stride(b, bstride);
1222 int i[CCV_NNC_MAX_DIM(2) + 2];
1223 float* const ap = a->data.f32;
1224 float* const bp = b->data.f32;
1225 const int count = dim[2] * dim[3];
1226 if (astride[2] == dim[3] && bstride[2] == dim[3])
1227 {
1228 // Special casing if the ainc[3] is the same as dim[3]
1229 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1230 {
1231 float* ap0 = ap + i[0] * astride[0];
1232 float* bp0 = bp + i[0] * bstride[0];
1233 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1234 {
1235 for (x = 0; x < count; x++)
1236 bp0[x] = fabs(ap0[x]);
1237 ap0 += astride[1];
1238 bp0 += bstride[1];
1239 }
1240 }
1241 return CCV_NNC_EXEC_SUCCESS;
1242 }
1243 // Non-optimal case, need to do skip copy.
1244 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1245 {
1246 float* const ap0 = ap + i[0] * astride[0];
1247 float* const bp0 = bp + i[0] * bstride[0];
1248 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1249 {
1250 float* ap1 = ap0 + i[1] * astride[1];
1251 float* bp1 = bp0 + i[1] * bstride[1];
1252 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1253 {
1254 for (x = 0; x < dim[3]; x++)
1255 bp1[x] = fabs(ap1[x]);
1256 ap1 += astride[2];
1257 bp1 += bstride[2];
1258 }
1259 }
1260 }
1261 return CCV_NNC_EXEC_SUCCESS;
1262}
1263
1264static int _ccv_nnc_ewabs_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1265{
1266 // Assuming this is float 32.
1267 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1268 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1269 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1270 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1271 ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0];
1272 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[1];
1273 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1274 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1274, __extension__ __PRETTY_FUNCTION__
); }))
;
1
Assuming the condition is true
2
Taking true branch
1275 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1275, __extension__ __PRETTY_FUNCTION__
); }))
;
3
Assuming the condition is true
4
Taking true branch
1276 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1276, __extension__ __PRETTY_FUNCTION__
); }))
;
5
Assuming the condition is true
6
Taking true branch
1277 ccv_nnc_tensor_view_get_dim(a, dim);
1278 assert(ccv_nnc_tensor_view_check_dim(g, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1278, __extension__ __PRETTY_FUNCTION__
); }))
;
7
Assuming the condition is true
8
Taking true branch
1279 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1279, __extension__ __PRETTY_FUNCTION__
); }))
;
9
Assuming the condition is true
10
Taking true branch
1280 int x;
1281 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW))
11
Assuming the condition is false
1282 {
1283 // Super optimal case, just do one for-loop for sum.
1284 const int tensor_count = ccv_nnc_tensor_count(a->info);
1285 for (x = 0; x < tensor_count; x++)
1286 b->data.f32[x] = a->data.f32[x] >= 0 ? g->data.f32[x] : -g->data.f32[x];
1287 return CCV_NNC_EXEC_SUCCESS;
1288 }
1289 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1289, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
12
Taking true branch
1290 ccv_nnc_tensor_view_get_stride(g, astride);
1291 ccv_nnc_tensor_view_get_stride(a, astride);
1292 ccv_nnc_tensor_view_get_stride(b, bstride);
1293 int i[CCV_NNC_MAX_DIM(2) + 2];
1294 float* const gp = g->data.f32;
1295 float* const ap = a->data.f32;
1296 float* const bp = b->data.f32;
1297 const int count = dim[2] * dim[3];
1298 if (astride[2] == dim[3] && bstride[2] == dim[3])
13
Assuming the condition is true
14
Assuming the condition is true
15
Taking true branch
1299 {
1300 // Special casing if the ainc[3] is the same as dim[3]
1301 for (i[0] = 0; i[0] < dim[0]; i[0]++)
16
Assuming the condition is true
17
Loop condition is true. Entering loop body
1302 {
1303 float* gp0 = gp + i[0] * gstride[0];
18
The right operand of '*' is a garbage value
1304 float* ap0 = ap + i[0] * astride[0];
1305 float* bp0 = bp + i[0] * bstride[0];
1306 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1307 {
1308 for (x = 0; x < count; x++)
1309 bp0[x] = ap0[x] >= 0 ? gp0[x] : -gp0[x];
1310 gp0 += gstride[1];
1311 ap0 += astride[1];
1312 bp0 += bstride[1];
1313 }
1314 }
1315 return CCV_NNC_EXEC_SUCCESS;
1316 }
1317 // Non-optimal case, need to do skip copy.
1318 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1319 {
1320 float* const gp0 = gp + i[0] * gstride[0];
1321 float* const ap0 = ap + i[0] * astride[0];
1322 float* const bp0 = bp + i[0] * bstride[0];
1323 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1324 {
1325 float* gp1 = gp0 + i[1] * gstride[1];
1326 float* ap1 = ap0 + i[1] * astride[1];
1327 float* bp1 = bp0 + i[1] * bstride[1];
1328 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1329 {
1330 for (x = 0; x < dim[3]; x++)
1331 bp1[x] = ap1[x] >= 0 ? gp1[x] : -gp1[x];
1332 gp1 += gstride[2];
1333 ap1 += astride[2];
1334 bp1 += bstride[2];
1335 }
1336 }
1337 }
1338 return CCV_NNC_EXEC_SUCCESS;
1339}
1340
1341static int _ccv_nnc_clamp_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1342{
1343 // Assuming this is float 32.
1344 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1345 int astride[CCV_NNC_MAX_DIM_ALLOC(12)];
1346 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1347 ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
1348 ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
1349 assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1349, __extension__ __PRETTY_FUNCTION__
); }))
;
1350 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1350, __extension__ __PRETTY_FUNCTION__
); }))
;
1351 ccv_nnc_tensor_view_get_dim(a, dim);
1352 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1352, __extension__ __PRETTY_FUNCTION__
); }))
;
1353 int x;
1354 const float min = cmd.info.clamp.min;
1355 const float max = cmd.info.clamp.max;
1356 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1356, __extension__ __PRETTY_FUNCTION__
); }))
;
1357 if (!CCV_IS_TENSOR_VIEW(a)((*(int*)(a)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1358 {
1359 // Super optimal case, just do one for-loop for sum.
1360 const int tensor_count = ccv_nnc_tensor_count(a->info);
1361 if (isnan(min)__builtin_isnan (min))
1362 {
1363 for (x = 0; x < tensor_count; x++)
1364 b->data.f32[x] = ccv_min(a->data.f32[x], max)({ typeof (a->data.f32[x]) _a = (a->data.f32[x]); typeof
(max) _b = (max); (_a < _b) ? _a : _b; })
;
1365 } else if (isnan(max)__builtin_isnan (max)) {
1366 for (x = 0; x < tensor_count; x++)
1367 b->data.f32[x] = ccv_max(a->data.f32[x], min)({ typeof (a->data.f32[x]) _a = (a->data.f32[x]); typeof
(min) _b = (min); (_a > _b) ? _a : _b; })
;
1368 } else {
1369 for (x = 0; x < tensor_count; x++)
1370 b->data.f32[x] = ccv_clamp(a->data.f32[x], min, max)({ typeof (min) _a = (min); typeof (max) _b = (max); typeof (
a->data.f32[x]) _x = (a->data.f32[x]); (_x < _a) ? _a
: ((_x > _b) ? _b : _x); })
;
1371 }
1372 return CCV_NNC_EXEC_SUCCESS;
1373 }
1374 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1374, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1375 ccv_nnc_tensor_view_get_stride(a, astride);
1376 ccv_nnc_tensor_view_get_stride(b, bstride);
1377 int i[CCV_NNC_MAX_DIM(2) + 2];
1378 float* const ap = a->data.f32;
1379 float* const bp = b->data.f32;
1380 const int count = dim[2] * dim[3];
1381 if (isnan(min)__builtin_isnan (min))
1382 {
1383 if (astride[2] == dim[3] && bstride[2] == dim[3])
1384 {
1385 // Special casing if the ainc[3] is the same as dim[3]
1386 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1387 {
1388 float* ap0 = ap + i[0] * astride[0];
1389 float* bp0 = bp + i[0] * bstride[0];
1390 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1391 {
1392 for (x = 0; x < count; x++)
1393 bp0[x] = ccv_min(ap0[x], max)({ typeof (ap0[x]) _a = (ap0[x]); typeof (max) _b = (max); (_a
< _b) ? _a : _b; })
;
1394 ap0 += astride[1];
1395 bp0 += bstride[1];
1396 }
1397 }
1398 return CCV_NNC_EXEC_SUCCESS;
1399 }
1400 // Non-optimal case, need to do skip copy.
1401 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1402 {
1403 float* const ap0 = ap + i[0] * astride[0];
1404 float* const bp0 = bp + i[0] * bstride[0];
1405 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1406 {
1407 float* ap1 = ap0 + i[1] * astride[1];
1408 float* bp1 = bp0 + i[1] * bstride[1];
1409 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1410 {
1411 for (x = 0; x < dim[3]; x++)
1412 bp1[x] = ccv_min(ap1[x], max)({ typeof (ap1[x]) _a = (ap1[x]); typeof (max) _b = (max); (_a
< _b) ? _a : _b; })
;
1413 ap1 += astride[2];
1414 bp1 += bstride[2];
1415 }
1416 }
1417 }
1418 } else if (isnan(max)__builtin_isnan (max)) {
1419 if (astride[2] == dim[3] && bstride[2] == dim[3])
1420 {
1421 // Special casing if the ainc[3] is the same as dim[3]
1422 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1423 {
1424 float* ap0 = ap + i[0] * astride[0];
1425 float* bp0 = bp + i[0] * bstride[0];
1426 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1427 {
1428 for (x = 0; x < count; x++)
1429 bp0[x] = ccv_max(ap0[x], min)({ typeof (ap0[x]) _a = (ap0[x]); typeof (min) _b = (min); (_a
> _b) ? _a : _b; })
;
1430 ap0 += astride[1];
1431 bp0 += bstride[1];
1432 }
1433 }
1434 return CCV_NNC_EXEC_SUCCESS;
1435 }
1436 // Non-optimal case, need to do skip copy.
1437 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1438 {
1439 float* const ap0 = ap + i[0] * astride[0];
1440 float* const bp0 = bp + i[0] * bstride[0];
1441 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1442 {
1443 float* ap1 = ap0 + i[1] * astride[1];
1444 float* bp1 = bp0 + i[1] * bstride[1];
1445 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1446 {
1447 for (x = 0; x < dim[3]; x++)
1448 bp1[x] = ccv_max(ap1[x], min)({ typeof (ap1[x]) _a = (ap1[x]); typeof (min) _b = (min); (_a
> _b) ? _a : _b; })
;
1449 ap1 += astride[2];
1450 bp1 += bstride[2];
1451 }
1452 }
1453 }
1454 } else {
1455 if (astride[2] == dim[3] && bstride[2] == dim[3])
1456 {
1457 // Special casing if the ainc[3] is the same as dim[3]
1458 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1459 {
1460 float* ap0 = ap + i[0] * astride[0];
1461 float* bp0 = bp + i[0] * bstride[0];
1462 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1463 {
1464 for (x = 0; x < count; x++)
1465 bp0[x] = ccv_clamp(ap0[x], min, max)({ typeof (min) _a = (min); typeof (max) _b = (max); typeof (
ap0[x]) _x = (ap0[x]); (_x < _a) ? _a : ((_x > _b) ? _b
: _x); })
;
1466 ap0 += astride[1];
1467 bp0 += bstride[1];
1468 }
1469 }
1470 return CCV_NNC_EXEC_SUCCESS;
1471 }
1472 // Non-optimal case, need to do skip copy.
1473 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1474 {
1475 float* const ap0 = ap + i[0] * astride[0];
1476 float* const bp0 = bp + i[0] * bstride[0];
1477 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1478 {
1479 float* ap1 = ap0 + i[1] * astride[1];
1480 float* bp1 = bp0 + i[1] * bstride[1];
1481 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1482 {
1483 for (x = 0; x < dim[3]; x++)
1484 bp1[x] = ccv_clamp(ap1[x], min, max)({ typeof (min) _a = (min); typeof (max) _b = (max); typeof (
ap1[x]) _x = (ap1[x]); (_x < _a) ? _a : ((_x > _b) ? _b
: _x); })
;
1485 ap1 += astride[2];
1486 bp1 += bstride[2];
1487 }
1488 }
1489 }
1490 }
1491 return CCV_NNC_EXEC_SUCCESS;
1492}
1493
1494static int _ccv_nnc_clamp_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
1495{
1496 assert(input_size == 3)((void) sizeof ((input_size == 3) ? 1 : 0), __extension__ ({ if
(input_size == 3) ; else __assert_fail ("input_size == 3", "ew/ccv_nnc_ew_cpu_ref.c"
, 1496, __extension__ __PRETTY_FUNCTION__); }))
;
1497 const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0]; // gradient
1498 const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2];
1499 assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({
if (output_size == 1) ; else __assert_fail ("output_size == 1"
, "ew/ccv_nnc_ew_cpu_ref.c", 1499, __extension__ __PRETTY_FUNCTION__
); }))
;
1500 ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0];
1501 // Assuming this is float 32.
1502 int dim[CCV_NNC_MAX_DIM_ALLOC(12)];
1503 int hstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1504 int bstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1505 assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(h->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(h->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1505, __extension__ __PRETTY_FUNCTION__
); }))
;
1506 assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1506, __extension__ __PRETTY_FUNCTION__
); }))
;
1507 ccv_nnc_tensor_view_get_dim(g, dim);
1508 ccv_nnc_tensor_view_get_dim(h, dim);
1509 assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 :
0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim
)) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1509, __extension__ __PRETTY_FUNCTION__
); }))
;
1510 int x;
1511 const float min = cmd.info.clamp.min;
1512 const float max = cmd.info.clamp.max;
1513 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1513, __extension__ __PRETTY_FUNCTION__
); }))
;
1514 if (g)
1515 {
1516 if (!CCV_IS_TENSOR_VIEW(g)((*(int*)(g)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1517 {
1518 // Super optimal case, just do one for-loop for sum.
1519 const int tensor_count = ccv_nnc_tensor_count(g->info);
1520 if (isnan(min)__builtin_isnan (min))
1521 {
1522 for (x = 0; x < tensor_count; x++)
1523 h->data.f32[x] = b->data.f32[x] >= max ? 0 : g->data.f32[x];
1524 } else if (isnan(max)__builtin_isnan (max)) {
1525 for (x = 0; x < tensor_count; x++)
1526 h->data.f32[x] = b->data.f32[x] <= min ? 0 : g->data.f32[x];
1527 } else {
1528 for (x = 0; x < tensor_count; x++)
1529 h->data.f32[x] = (b->data.f32[x] >= max || b->data.f32[x] <= min) ? 0 : g->data.f32[x];
1530 }
1531 return CCV_NNC_EXEC_SUCCESS;
1532 }
1533 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
1534 assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(g->info.dim) <= (2) +
2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(g->info
.dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2"
, "ew/ccv_nnc_ew_cpu_ref.c", 1534, __extension__ __PRETTY_FUNCTION__
); }))
;
1535 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1535, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1536 ccv_nnc_tensor_view_get_stride(g, gstride);
1537 ccv_nnc_tensor_view_get_stride(b, bstride);
1538 ccv_nnc_tensor_view_get_stride(h, hstride);
1539 int i[CCV_NNC_MAX_DIM(2) + 2];
1540 float* const gp = g->data.f32;
1541 float* const bp = b->data.f32;
1542 float* const hp = h->data.f32;
1543 const int count = dim[2] * dim[3];
1544 const float min = cmd.info.clamp.min;
1545 const float max = cmd.info.clamp.max;
1546 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1546, __extension__ __PRETTY_FUNCTION__
); }))
;
1547 if (isnan(min)__builtin_isnan (min))
1548 {
1549 if (gstride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
1550 {
1551 // Special casing if the ginc[3] is the same as dim[3]
1552 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1553 {
1554 float* gp0 = gp + i[0] * gstride[0];
1555 float* bp0 = bp + i[0] * bstride[0];
1556 float* hp0 = hp + i[0] * hstride[0];
1557 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1558 {
1559 for (x = 0; x < count; x++)
1560 hp0[x] = bp0[x] >= max ? 0 : gp0[x];
1561 gp0 += gstride[1];
1562 bp0 += bstride[1];
1563 hp0 += hstride[1];
1564 }
1565 }
1566 return CCV_NNC_EXEC_SUCCESS;
1567 }
1568 // Non-optimal case, need to do skip copy.
1569 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1570 {
1571 float* const gp0 = gp + i[0] * gstride[0];
1572 float* const bp0 = bp + i[0] * bstride[0];
1573 float* const hp0 = hp + i[0] * hstride[0];
1574 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1575 {
1576 float* gp1 = gp0 + i[1] * gstride[1];
1577 float* bp1 = bp0 + i[1] * bstride[1];
1578 float* hp1 = hp0 + i[1] * hstride[1];
1579 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1580 {
1581 for (x = 0; x < dim[3]; x++)
1582 hp1[x] = bp1[x] >= max ? 0 : gp1[x];
1583 gp1 += gstride[2];
1584 bp1 += bstride[2];
1585 hp1 += hstride[2];
1586 }
1587 }
1588 }
1589 } else if (isnan(max)__builtin_isnan (max)) {
1590 if (gstride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
1591 {
1592 // Special casing if the ginc[3] is the same as dim[3]
1593 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1594 {
1595 float* gp0 = gp + i[0] * gstride[0];
1596 float* bp0 = bp + i[0] * bstride[0];
1597 float* hp0 = hp + i[0] * hstride[0];
1598 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1599 {
1600 for (x = 0; x < count; x++)
1601 hp0[x] = bp0[x] <= min ? 0 : gp0[x];
1602 gp0 += gstride[1];
1603 bp0 += bstride[1];
1604 hp0 += hstride[1];
1605 }
1606 }
1607 return CCV_NNC_EXEC_SUCCESS;
1608 }
1609 // Non-optimal case, need to do skip copy.
1610 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1611 {
1612 float* const gp0 = gp + i[0] * gstride[0];
1613 float* const bp0 = bp + i[0] * bstride[0];
1614 float* const hp0 = hp + i[0] * hstride[0];
1615 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1616 {
1617 float* gp1 = gp0 + i[1] * gstride[1];
1618 float* bp1 = bp0 + i[1] * bstride[1];
1619 float* hp1 = hp0 + i[1] * hstride[1];
1620 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1621 {
1622 for (x = 0; x < dim[3]; x++)
1623 hp1[x] = bp1[x] <= min ? 0 : gp1[x];
1624 gp1 += gstride[2];
1625 bp1 += bstride[2];
1626 hp1 += hstride[2];
1627 }
1628 }
1629 }
1630 } else {
1631 if (gstride[2] == dim[3] && bstride[2] == dim[3] && hstride[2] == dim[3])
1632 {
1633 // Special casing if the ginc[3] is the same as dim[3]
1634 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1635 {
1636 float* gp0 = gp + i[0] * gstride[0];
1637 float* bp0 = bp + i[0] * bstride[0];
1638 float* hp0 = hp + i[0] * hstride[0];
1639 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1640 {
1641 for (x = 0; x < count; x++)
1642 hp0[x] = (bp0[x] >= max || bp0[x] <= min) ? 0 : gp0[x];
1643 gp0 += gstride[1];
1644 bp0 += bstride[1];
1645 hp0 += hstride[1];
1646 }
1647 }
1648 return CCV_NNC_EXEC_SUCCESS;
1649 }
1650 // Non-optimal case, need to do skip copy.
1651 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1652 {
1653 float* const gp0 = gp + i[0] * gstride[0];
1654 float* const bp0 = bp + i[0] * bstride[0];
1655 float* const hp0 = hp + i[0] * hstride[0];
1656 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1657 {
1658 float* gp1 = gp0 + i[1] * gstride[1];
1659 float* bp1 = bp0 + i[1] * bstride[1];
1660 float* hp1 = hp0 + i[1] * hstride[1];
1661 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1662 {
1663 for (x = 0; x < dim[3]; x++)
1664 hp1[x] = (bp1[x] >= max || bp1[x] <= min) ? 0 : gp1[x];
1665 gp1 += gstride[2];
1666 bp1 += bstride[2];
1667 hp1 += hstride[2];
1668 }
1669 }
1670 }
1671 }
1672 } else {
1673 if (!CCV_IS_TENSOR_VIEW(h)((*(int*)(h)) & CCV_TENSOR_VIEW) && !CCV_IS_TENSOR_VIEW(b)((*(int*)(b)) & CCV_TENSOR_VIEW))
1674 {
1675 // Super optimal case, just do one for-loop for sum.
1676 const int tensor_count = ccv_nnc_tensor_count(h->info);
1677 if (isnan(min)__builtin_isnan (min))
1678 {
1679 for (x = 0; x < tensor_count; x++)
1680 h->data.f32[x] = b->data.f32[x] >= max ? 0 : 1;
1681 } else if (isnan(max)__builtin_isnan (max)) {
1682 for (x = 0; x < tensor_count; x++)
1683 h->data.f32[x] = b->data.f32[x] <= min ? 0 : 1;
1684 } else {
1685 for (x = 0; x < tensor_count; x++)
1686 h->data.f32[x] = (b->data.f32[x] >= max || b->data.f32[x] <= min) ? 0 : 1;
1687 }
1688 return CCV_NNC_EXEC_SUCCESS;
1689 }
1690 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "ew/ccv_nnc_ew_cpu_ref.c"
, 1690, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
1691 ccv_nnc_tensor_view_get_stride(b, bstride);
1692 ccv_nnc_tensor_view_get_stride(h, hstride);
1693 int i[CCV_NNC_MAX_DIM(2) + 2];
1694 float* const bp = b->data.f32;
1695 float* const hp = h->data.f32;
1696 const int count = dim[2] * dim[3];
1697 const float min = cmd.info.clamp.min;
1698 const float max = cmd.info.clamp.max;
1699 assert(!isnan(min) || !isnan(max))((void) sizeof ((!__builtin_isnan (min) || !__builtin_isnan (
max)) ? 1 : 0), __extension__ ({ if (!__builtin_isnan (min) ||
!__builtin_isnan (max)) ; else __assert_fail ("!isnan(min) || !isnan(max)"
, "ew/ccv_nnc_ew_cpu_ref.c", 1699, __extension__ __PRETTY_FUNCTION__
); }))
;
1700 if (isnan(min)__builtin_isnan (min))
1701 {
1702 if (bstride[2] == dim[3] && hstride[2] == dim[3])
1703 {
1704 // Special casing if the binc[3] is the same as dim[3]
1705 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1706 {
1707 float* bp0 = bp + i[0] * bstride[0];
1708 float* hp0 = hp + i[0] * hstride[0];
1709 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1710 {
1711 for (x = 0; x < count; x++)
1712 hp0[x] = bp0[x] >= max ? 0 : 1;
1713 bp0 += bstride[1];
1714 hp0 += hstride[1];
1715 }
1716 }
1717 return CCV_NNC_EXEC_SUCCESS;
1718 }
1719 // Non-optimal case, need to do skip copy.
1720 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1721 {
1722 float* const bp0 = bp + i[0] * bstride[0];
1723 float* const hp0 = hp + i[0] * hstride[0];
1724 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1725 {
1726 float* bp1 = bp0 + i[1] * bstride[1];
1727 float* hp1 = hp0 + i[1] * hstride[1];
1728 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1729 {
1730 for (x = 0; x < dim[3]; x++)
1731 hp1[x] = bp1[x] >= max ? 0 : 1;
1732 bp1 += bstride[2];
1733 hp1 += hstride[2];
1734 }
1735 }
1736 }
1737 } else if (isnan(max)__builtin_isnan (max)) {
1738 if (bstride[2] == dim[3] && hstride[2] == dim[3])
1739 {
1740 // Special casing if the binc[3] is the same as dim[3]
1741 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1742 {
1743 float* bp0 = bp + i[0] * bstride[0];
1744 float* hp0 = hp + i[0] * hstride[0];
1745 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1746 {
1747 for (x = 0; x < count; x++)
1748 hp0[x] = bp0[x] <= min ? 0 : 1;
1749 bp0 += bstride[1];
1750 hp0 += hstride[1];
1751 }
1752 }
1753 return CCV_NNC_EXEC_SUCCESS;
1754 }
1755 // Non-optimal case, need to do skip copy.
1756 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1757 {
1758 float* const bp0 = bp + i[0] * bstride[0];
1759 float* const hp0 = hp + i[0] * hstride[0];
1760 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1761 {
1762 float* bp1 = bp0 + i[1] * bstride[1];
1763 float* hp1 = hp0 + i[1] * hstride[1];
1764 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1765 {
1766 for (x = 0; x < dim[3]; x++)
1767 hp1[x] = bp1[x] <= min ? 0 : 1;
1768 bp1 += bstride[2];
1769 hp1 += hstride[2];
1770 }
1771 }
1772 }
1773 } else {
1774 if (bstride[2] == dim[3] && hstride[2] == dim[3])
1775 {
1776 // Special casing if the binc[3] is the same as dim[3]
1777 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1778 {
1779 float* bp0 = bp + i[0] * bstride[0];
1780 float* hp0 = hp + i[0] * hstride[0];
1781 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1782 {
1783 for (x = 0; x < count; x++)
1784 hp0[x] = (bp0[x] >= max || bp0[x] <= min) ? 0 : 1;
1785 bp0 += bstride[1];
1786 hp0 += hstride[1];
1787 }
1788 }
1789 return CCV_NNC_EXEC_SUCCESS;
1790 }
1791 // Non-optimal case, need to do skip copy.
1792 for (i[0] = 0; i[0] < dim[0]; i[0]++)
1793 {
1794 float* const bp0 = bp + i[0] * bstride[0];
1795 float* const hp0 = hp + i[0] * hstride[0];
1796 for (i[1] = 0; i[1] < dim[1]; i[1]++)
1797 {
1798 float* bp1 = bp0 + i[1] * bstride[1];
1799 float* hp1 = hp0 + i[1] * hstride[1];
1800 for (i[2] = 0; i[2] < dim[2]; i[2]++)
1801 {
1802 for (x = 0; x < dim[3]; x++)
1803 hp1[x] = (bp1[x] >= max || bp1[x] <= min) ? 0 : 1;
1804 bp1 += bstride[2];
1805 hp1 += hstride[2];
1806 }
1807 }
1808 }
1809 }
1810 }
1811 return CCV_NNC_EXEC_SUCCESS;
1812}
1813
1814REGISTER_COMMAND_BACKEND(CCV_NNC_EWSUM_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSUM_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1815{
1816 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1817 registry->tensor_datatypes = CCV_32F;
1818 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1819 registry->algorithms = 1;
1820 registry->exec = _ccv_nnc_ewsum_forw;
1821}
1822
1823REGISTER_COMMAND_BACKEND(CCV_NNC_EWSUM_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSUM_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1824{
1825 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1826 registry->tensor_datatypes = CCV_32F;
1827 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1828 registry->algorithms = 1;
1829 registry->exec = _ccv_nnc_ewsum_back;
1830}
1831
1832REGISTER_COMMAND_BACKEND(CCV_NNC_EWPROD_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWPROD_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1833{
1834 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1835 registry->tensor_datatypes = CCV_32F;
1836 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1837 registry->algorithms = 1;
1838 registry->exec = _ccv_nnc_ewprod_forw;
1839}
1840
1841REGISTER_COMMAND_BACKEND(CCV_NNC_EWPROD_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWPROD_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1842{
1843 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1844 registry->tensor_datatypes = CCV_32F;
1845 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1846 registry->algorithms = 1;
1847 registry->exec = _ccv_nnc_ewprod_back;
1848}
1849
1850REGISTER_COMMAND_BACKEND(CCV_NNC_EWDIV_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWDIV_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1851{
1852 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1853 registry->tensor_datatypes = CCV_32F;
1854 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1855 registry->algorithms = 1;
1856 registry->exec = _ccv_nnc_ewdiv_forw;
1857}
1858
1859REGISTER_COMMAND_BACKEND(CCV_NNC_EWDIV_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWDIV_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1860{
1861 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1862 registry->tensor_datatypes = CCV_32F;
1863 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1864 registry->algorithms = 1;
1865 registry->exec = _ccv_nnc_ewdiv_back;
1866}
1867
1868REGISTER_COMMAND_BACKEND(CCV_NNC_EWEXP_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWEXP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1869{
1870 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1871 registry->tensor_datatypes = CCV_32F;
1872 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1873 registry->algorithms = 1;
1874 registry->exec = _ccv_nnc_ewexp_forw;
1875}
1876
1877REGISTER_COMMAND_BACKEND(CCV_NNC_EWEXP_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWEXP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1878{
1879 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1880 registry->tensor_datatypes = CCV_32F;
1881 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1882 registry->algorithms = 1;
1883 registry->exec = _ccv_nnc_ewexp_back;
1884}
1885
1886REGISTER_COMMAND_BACKEND(CCV_NNC_EWLOG_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWLOG_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1887{
1888 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1889 registry->tensor_datatypes = CCV_32F;
1890 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1891 registry->algorithms = 1;
1892 registry->exec = _ccv_nnc_ewlog_forw;
1893}
1894
1895REGISTER_COMMAND_BACKEND(CCV_NNC_EWLOG_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWLOG_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1896{
1897 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1898 registry->tensor_datatypes = CCV_32F;
1899 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1900 registry->algorithms = 1;
1901 registry->exec = _ccv_nnc_ewlog_back;
1902}
1903
1904REGISTER_COMMAND_BACKEND(CCV_NNC_EWSQRT_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSQRT_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1905{
1906 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1907 registry->tensor_datatypes = CCV_32F;
1908 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1909 registry->algorithms = 1;
1910 registry->exec = _ccv_nnc_ewsqrt_forw;
1911}
1912
1913REGISTER_COMMAND_BACKEND(CCV_NNC_EWSQRT_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWSQRT_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1914{
1915 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1916 registry->tensor_datatypes = CCV_32F;
1917 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1918 registry->algorithms = 1;
1919 registry->exec = _ccv_nnc_ewsqrt_back;
1920}
1921
1922REGISTER_COMMAND_BACKEND(CCV_NNC_EWABS_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWABS_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1923{
1924 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1925 registry->tensor_datatypes = CCV_32F;
1926 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1927 registry->algorithms = 1;
1928 registry->exec = _ccv_nnc_ewabs_forw;
1929}
1930
1931REGISTER_COMMAND_BACKEND(CCV_NNC_EWABS_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_EWABS_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1932{
1933 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1934 registry->tensor_datatypes = CCV_32F;
1935 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1936 registry->algorithms = 1;
1937 registry->exec = _ccv_nnc_ewabs_back;
1938}
1939
1940REGISTER_COMMAND_BACKEND(CCV_NNC_CLAMP_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_CLAMP_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1941{
1942 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1943 registry->tensor_datatypes = CCV_32F;
1944 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1945 registry->algorithms = 1;
1946 registry->exec = _ccv_nnc_clamp_forw;
1947}
1948
1949REGISTER_COMMAND_BACKEND(CCV_NNC_CLAMP_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_CLAMP_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
1950{
1951 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
1952 registry->tensor_datatypes = CCV_32F;
1953 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
1954 registry->algorithms = 1;
1955 registry->exec = _ccv_nnc_clamp_back;
1956}