/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/reduce/ccv_nnc_reduce_max_cpu_ref.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #ifdef USE_OPENMP |
7 | | #include <omp.h> |
8 | | #endif |
9 | | #ifdef USE_DISPATCH |
10 | | #include <dispatch/dispatch.h> |
11 | | #endif |
12 | | |
13 | | // Shared methods. |
14 | | #include "../_ccv_nnc_cpu_ref.h" |
15 | | |
16 | | static int _ccv_nnc_reduce_max_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
17 | 4 | { |
18 | 4 | assert(input_size == 1); |
19 | 4 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0]; |
20 | 4 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; |
21 | 4 | assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2); |
22 | 4 | assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2); |
23 | | // Assuming this is float 32. |
24 | 4 | int adim[CCV_NNC_MAX_DIM_ALLOC]; |
25 | 4 | int bdim[CCV_NNC_MAX_DIM_ALLOC]; |
26 | 4 | ccv_nnc_tensor_view_get_dim(a, adim); |
27 | 4 | ccv_nnc_tensor_view_get_dim(b, bdim); |
28 | 4 | assert(ccv_nnc_tensor_view_check_broadcast_dim(b, adim)); |
29 | 4 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
30 | 4 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
31 | 4 | assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number. |
32 | 4 | ccv_nnc_tensor_view_get_stride(a, astride); |
33 | 4 | ccv_nnc_tensor_view_get_stride(b, bstride); |
34 | 4 | int i[CCV_NNC_MAX_DIM + 2]; |
35 | 4 | int x; |
36 | 4 | _ccv_nnc_tensor_set_cpu_ref_f32(b, -FLT_MAX); |
37 | 4 | float* const ap = a->data.f32; |
38 | 4 | float* const bp = b->data.f32; |
39 | | // Non-optimal case, need to do skip if needed. |
40 | 8 | for (i[0] = 0; i[0] < adim[0]; i[0]++4 ) |
41 | 4 | { |
42 | 4 | float* const ap0 = ap + i[0] * astride[0]; |
43 | 4 | float* const bp0 = bdim[0] == 1 ? bp : bp + i[0] * bstride[0]0 ; |
44 | 8 | for (i[1] = 0; i[1] < adim[1]; i[1]++4 ) |
45 | 4 | { |
46 | 4 | float* ap1 = ap0 + i[1] * astride[1]; |
47 | 4 | float* const bp1 = bdim[1] == 1 ? bp0 : bp0 + i[1] * bstride[1]0 ; |
48 | 11 | for (i[2] = 0; i[2] < adim[2]; i[2]++7 ) |
49 | 7 | { |
50 | 7 | float* const bp2 = bdim[2] == 1 ? bp15 : bp1 + i[2] * bstride[2]2 ; |
51 | 7 | if (bdim[3] == 1) |
52 | 3 | { |
53 | 109 | for (x = 0; x < adim[3]; x++106 ) |
54 | 106 | if (ap1[x] > bp2[0]) |
55 | 11 | bp2[0] = ap1[x]; |
56 | 4 | } else { |
57 | 16 | for (x = 0; x < adim[3]; x++12 ) |
58 | 12 | if (ap1[x] > bp2[x]) |
59 | 12 | bp2[x] = ap1[x]; |
60 | 4 | } |
61 | 7 | ap1 += astride[2]; |
62 | 7 | } |
63 | 4 | } |
64 | 4 | } |
65 | 4 | return CCV_NNC_EXEC_SUCCESS; |
66 | 4 | } |
67 | | |
68 | | static int _ccv_nnc_reduce_max_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
69 | 1 | { |
70 | 1 | if (inputs[0] == 0) |
71 | 0 | { |
72 | 0 | ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0]; |
73 | 0 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1]; |
74 | 0 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2]; |
75 | 0 | assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2); |
76 | 0 | assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2); |
77 | 0 | assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2); |
78 | | // Assuming this is float 32. |
79 | 0 | int hdim[CCV_NNC_MAX_DIM_ALLOC]; |
80 | 0 | int bdim[CCV_NNC_MAX_DIM_ALLOC]; |
81 | 0 | ccv_nnc_tensor_view_get_dim(h, hdim); |
82 | 0 | ccv_nnc_tensor_view_get_dim(b, bdim); |
83 | 0 | assert(ccv_nnc_tensor_view_check_broadcast_dim(b, hdim)); |
84 | 0 | assert(ccv_nnc_tensor_view_check_dim(a, hdim)); |
85 | 0 | int hstride[CCV_NNC_MAX_DIM_ALLOC]; |
86 | 0 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
87 | 0 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
88 | 0 | assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number. |
89 | 0 | ccv_nnc_tensor_view_get_stride(h, hstride); |
90 | 0 | ccv_nnc_tensor_view_get_stride(a, astride); |
91 | 0 | ccv_nnc_tensor_view_get_stride(b, bstride); |
92 | 0 | int i[CCV_NNC_MAX_DIM + 2]; |
93 | 0 | int x; |
94 | 0 | float* const hp = h->data.f32; |
95 | 0 | float* const ap = a->data.f32; |
96 | 0 | float* const bp = b->data.f32; |
97 | 0 | ccv_nnc_tensor_zero(h); |
98 | | // Non-optimal case, need to do skip if needed. |
99 | 0 | for (i[0] = 0; i[0] < hdim[0]; i[0]++) |
100 | 0 | { |
101 | 0 | float* const ap0 = ap + i[0] * astride[0]; |
102 | 0 | float* const hp0 = hp + i[0] * hstride[0]; |
103 | 0 | float* const bp0 = bdim[0] == 1 ? bp : bp + i[0] * bstride[0]; |
104 | 0 | for (i[1] = 0; i[1] < hdim[1]; i[1]++) |
105 | 0 | { |
106 | 0 | float* ap1 = ap0 + i[1] * astride[1]; |
107 | 0 | float* hp1 = hp0 + i[1] * hstride[1]; |
108 | 0 | float* const bp1 = bdim[1] == 1 ? bp0 : bp0 + i[1] * bstride[1]; |
109 | 0 | for (i[2] = 0; i[2] < hdim[2]; i[2]++) |
110 | 0 | { |
111 | 0 | float* const bp2 = bdim[2] == 1 ? bp1 : bp1 + i[2] * bstride[2]; |
112 | 0 | if (bdim[3] == 1) |
113 | 0 | { |
114 | 0 | for (x = 0; x < hdim[3]; x++) |
115 | 0 | if (ap1[x] == bp2[0]) |
116 | 0 | hp1[x] = 1; |
117 | 0 | } else { |
118 | 0 | for (x = 0; x < hdim[3]; x++) |
119 | 0 | if (ap1[x] == bp2[x]) |
120 | 0 | hp1[x] = 1; |
121 | 0 | } |
122 | 0 | hp1 += hstride[2]; |
123 | 0 | ap1 += astride[2]; |
124 | 0 | } |
125 | 0 | } |
126 | 0 | } |
127 | 0 | return CCV_NNC_EXEC_SUCCESS; |
128 | 0 | } |
129 | 1 | ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0]; |
130 | 1 | ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0]; |
131 | 1 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1]; |
132 | 1 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2]; |
133 | 1 | assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2); |
134 | 1 | assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2); |
135 | 1 | assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2); |
136 | 1 | assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2); |
137 | | // Assuming this is float 32. |
138 | 1 | int hdim[CCV_NNC_MAX_DIM_ALLOC]; |
139 | 1 | int gdim[CCV_NNC_MAX_DIM_ALLOC]; |
140 | 1 | ccv_nnc_tensor_view_get_dim(h, hdim); |
141 | 1 | ccv_nnc_tensor_view_get_dim(g, gdim); |
142 | 1 | assert(ccv_nnc_tensor_view_check_broadcast_dim(g, hdim)); |
143 | 1 | assert(ccv_nnc_tensor_view_check_dim(a, hdim)); |
144 | 1 | assert(ccv_nnc_tensor_view_check_dim(b, gdim)); |
145 | 1 | int hstride[CCV_NNC_MAX_DIM_ALLOC]; |
146 | 1 | int gstride[CCV_NNC_MAX_DIM_ALLOC]; |
147 | 1 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
148 | 1 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
149 | 1 | assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number. |
150 | 1 | ccv_nnc_tensor_view_get_stride(h, hstride); |
151 | 1 | ccv_nnc_tensor_view_get_stride(g, gstride); |
152 | 1 | ccv_nnc_tensor_view_get_stride(a, astride); |
153 | 1 | ccv_nnc_tensor_view_get_stride(b, bstride); |
154 | 1 | int i[CCV_NNC_MAX_DIM + 2]; |
155 | 1 | int x; |
156 | 1 | float* const hp = h->data.f32; |
157 | 1 | float* const gp = g->data.f32; |
158 | 1 | float* const ap = a->data.f32; |
159 | 1 | float* const bp = b->data.f32; |
160 | 1 | ccv_nnc_tensor_zero(h); |
161 | | // Non-optimal case, need to do skip if needed. |
162 | 2 | for (i[0] = 0; i[0] < hdim[0]; i[0]++1 ) |
163 | 1 | { |
164 | 1 | float* const ap0 = ap + i[0] * astride[0]; |
165 | 1 | float* const hp0 = hp + i[0] * hstride[0]; |
166 | 1 | float* const gp0 = gdim[0] == 1 ? gp : gp + i[0] * gstride[0]0 ; |
167 | 1 | float* const bp0 = gdim[0] == 1 ? bp : bp + i[0] * bstride[0]0 ; |
168 | 2 | for (i[1] = 0; i[1] < hdim[1]; i[1]++1 ) |
169 | 1 | { |
170 | 1 | float* ap1 = ap0 + i[1] * astride[1]; |
171 | 1 | float* hp1 = hp0 + i[1] * hstride[1]; |
172 | 1 | float* const gp1 = gdim[1] == 1 ? gp0 : gp0 + i[1] * gstride[1]0 ; |
173 | 1 | float* const bp1 = gdim[1] == 1 ? bp0 : bp0 + i[1] * bstride[1]0 ; |
174 | 2 | for (i[2] = 0; i[2] < hdim[2]; i[2]++1 ) |
175 | 1 | { |
176 | 1 | float* const gp2 = gdim[2] == 1 ? gp1 : gp1 + i[2] * gstride[2]0 ; |
177 | 1 | float* const bp2 = gdim[2] == 1 ? bp1 : bp1 + i[2] * bstride[2]0 ; |
178 | 1 | if (gdim[3] == 1) |
179 | 1 | { |
180 | 101 | for (x = 0; x < hdim[3]; x++100 ) |
181 | 100 | if (ap1[x] == bp2[0]) |
182 | 1 | hp1[x] = gp2[0]; |
183 | 1 | } else { |
184 | 0 | for (x = 0; x < hdim[3]; x++) |
185 | 0 | if (ap1[x] == bp2[x]) |
186 | 0 | hp1[x] = gp2[x]; |
187 | 0 | } |
188 | 1 | hp1 += hstride[2]; |
189 | 1 | ap1 += astride[2]; |
190 | 1 | } |
191 | 1 | } |
192 | 1 | } |
193 | 1 | return CCV_NNC_EXEC_SUCCESS; |
194 | 1 | } |
195 | | |
196 | | REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_MAX_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
197 | 1 | { |
198 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
199 | 1 | registry->tensor_datatypes = CCV_32F; |
200 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
201 | 1 | registry->algorithms = 1; |
202 | 1 | registry->exec = _ccv_nnc_reduce_max_forw; |
203 | 1 | } |
204 | | |
205 | | REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_MAX_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
206 | 1 | { |
207 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
208 | 1 | registry->tensor_datatypes = CCV_32F; |
209 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
210 | 1 | registry->algorithms = 1; |
211 | 1 | registry->exec = _ccv_nnc_reduce_max_back; |
212 | 1 | } |