/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/histogram/ccv_nnc_histogram_cpu_ref.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #ifdef USE_OPENMP |
7 | | #include <omp.h> |
8 | | #endif |
9 | | #ifdef USE_DISPATCH |
10 | | #include <dispatch/dispatch.h> |
11 | | #endif |
12 | | |
13 | | static int _upper_bound(const float v, const int size, const float* const bounds) |
14 | 12.9M | { |
15 | 12.9M | int upper_bound = size; |
16 | 12.9M | int lower_bound = -1; |
17 | 132M | while (lower_bound + 1 < upper_bound) |
18 | 119M | { |
19 | 119M | const int middle = ((upper_bound - lower_bound) >> 1) + lower_bound; |
20 | 119M | if (v < bounds[middle]) |
21 | 57.4M | upper_bound = middle; |
22 | 61.7M | else |
23 | 61.7M | lower_bound = middle; |
24 | 119M | } |
25 | 12.9M | return upper_bound; |
26 | 12.9M | } |
27 | | |
28 | | void _ccv_nnc_tensor_histogram_even(float* ap, int* bp, const int nd, const int* const dim, const int* const stride, const float max, const float min, const int bins, const float range, float* a_max, float* a_min, double* a_sum, double* a_sum_of_squares) |
29 | 1 | { |
30 | 1 | if (nd == 1) |
31 | 0 | { |
32 | 0 | int i; |
33 | 0 | for (i = 0; i < dim[0]; i++) |
34 | 0 | { |
35 | 0 | const float av = ap[i * stride[0]]; |
36 | 0 | *a_min = ccv_min(*a_min, av); |
37 | 0 | *a_max = ccv_max(*a_max, av); |
38 | 0 | *a_sum += av; |
39 | 0 | *a_sum_of_squares += av * av; |
40 | 0 | if (isnan(av)) |
41 | 0 | ++bp[bins + 2]; |
42 | 0 | else if (av < min) |
43 | 0 | ++bp[0]; |
44 | 0 | else if (av >= max) |
45 | 0 | ++bp[bins + 1]; |
46 | 0 | else { |
47 | 0 | int idx = (int)((av - min) * range) + 1; |
48 | 0 | idx = ccv_min(ccv_max(idx, 1), bins); |
49 | 0 | ++bp[idx]; |
50 | 0 | } |
51 | 0 | } |
52 | 1 | } else if (nd == 2) { |
53 | 0 | int x, y; |
54 | 0 | for (y = 0; y < dim[0]; y++) |
55 | 0 | { |
56 | 0 | float* const apy = ap + y * stride[0]; |
57 | 0 | for (x = 0; x < dim[1]; x++) |
58 | 0 | { |
59 | 0 | const float av = apy[x * stride[1]]; |
60 | 0 | *a_min = ccv_min(*a_min, av); |
61 | 0 | *a_max = ccv_max(*a_max, av); |
62 | 0 | *a_sum += av; |
63 | 0 | *a_sum_of_squares += av * av; |
64 | 0 | if (isnan(av)) |
65 | 0 | ++bp[bins + 2]; |
66 | 0 | else if (av < min) |
67 | 0 | ++bp[0]; |
68 | 0 | else if (av >= max) |
69 | 0 | ++bp[bins + 1]; |
70 | 0 | else { |
71 | 0 | int idx = (int)((av - min) * range) + 1; |
72 | 0 | idx = ccv_min(ccv_max(idx, 1), bins); |
73 | 0 | ++bp[idx]; |
74 | 0 | } |
75 | 0 | } |
76 | 0 | } |
77 | 1 | } else if (nd == 3) { |
78 | 0 | int x, y, z; |
79 | 0 | for (z = 0; z < dim[0]; z++) |
80 | 0 | { |
81 | 0 | float* const apz = ap + z * stride[0]; |
82 | 0 | for (y = 0; y < dim[1]; y++) |
83 | 0 | { |
84 | 0 | float* const apy = apz + y * stride[1]; |
85 | 0 | for (x = 0; x < dim[2]; x++) |
86 | 0 | { |
87 | 0 | const float av = apy[x * stride[2]]; |
88 | 0 | *a_min = ccv_min(*a_min, av); |
89 | 0 | *a_max = ccv_max(*a_max, av); |
90 | 0 | *a_sum += av; |
91 | 0 | *a_sum_of_squares += av * av; |
92 | 0 | if (isnan(av)) |
93 | 0 | ++bp[bins + 2]; |
94 | 0 | else if (av < min) |
95 | 0 | ++bp[0]; |
96 | 0 | else if (av >= max) |
97 | 0 | ++bp[bins + 1]; |
98 | 0 | else { |
99 | 0 | int idx = (int)((av - min) * range) + 1; |
100 | 0 | idx = ccv_min(ccv_max(idx, 1), bins); |
101 | 0 | ++bp[idx]; |
102 | 0 | } |
103 | 0 | } |
104 | 0 | } |
105 | 0 | } |
106 | 1 | } else if (nd == 4) { |
107 | 1 | int x, y, z, s; |
108 | 31 | for (s = 0; s < dim[0]; s++30 ) |
109 | 30 | { |
110 | 30 | float* const aps = ap + s * stride[0]; |
111 | 630 | for (z = 0; z < dim[1]; z++600 ) |
112 | 600 | { |
113 | 600 | float* const apz = aps + z * stride[1]; |
114 | 12.6k | for (y = 0; y < dim[2]; y++12.0k ) |
115 | 12.0k | { |
116 | 12.0k | float* const apy = apz + y * stride[2]; |
117 | 492k | for (x = 0; x < dim[3]; x++480k ) |
118 | 480k | { |
119 | 480k | const float av = apy[x * stride[3]]; |
120 | 480k | *a_min = ccv_min(*a_min, av); |
121 | 480k | *a_max = ccv_max(*a_max, av); |
122 | 480k | *a_sum += av; |
123 | 480k | *a_sum_of_squares += av * av; |
124 | 480k | if (isnan(av)) |
125 | 0 | ++bp[bins + 2]; |
126 | 480k | else if (av < min) |
127 | 0 | ++bp[0]; |
128 | 480k | else if (av >= max) |
129 | 0 | ++bp[bins + 1]; |
130 | 480k | else { |
131 | 480k | int idx = (int)((av - min) * range) + 1; |
132 | 480k | idx = ccv_min(ccv_max(idx, 1), bins); |
133 | 480k | ++bp[idx]; |
134 | 480k | } |
135 | 480k | } |
136 | 12.0k | } |
137 | 600 | } |
138 | 30 | } |
139 | 1 | } else { |
140 | 0 | int i; |
141 | 0 | for (i = 0; i < dim[0]; i++) |
142 | 0 | _ccv_nnc_tensor_histogram_even(ap + i * stride[0], bp, nd - 1, dim + 1, stride + 1, max, min, bins, range, a_max, a_min, a_sum, a_sum_of_squares); |
143 | 0 | } |
144 | 1 | } |
145 | | |
146 | | void _ccv_nnc_tensor_histogram_logarithmic(float* ap, int* bp, const int nd, const int* const dim, const int* const stride, const float max, const float min, const int upper_range, const float min_inv, const float log_base, float* a_max, float* a_min, double* a_sum, double* a_sum_of_squares) |
147 | 1 | { |
148 | 1 | if (nd == 1) |
149 | 0 | { |
150 | 0 | int i; |
151 | 0 | for (i = 0; i < dim[0]; i++) |
152 | 0 | { |
153 | 0 | const float av = ap[i * stride[0]]; |
154 | 0 | *a_min = ccv_min(*a_min, av); |
155 | 0 | *a_max = ccv_max(*a_max, av); |
156 | 0 | *a_sum += av; |
157 | 0 | *a_sum_of_squares += av * av; |
158 | 0 | if (isnan(av)) |
159 | 0 | ++bp[upper_range * 2 + 1]; |
160 | 0 | else if (av >= max) |
161 | 0 | ++bp[upper_range * 2]; |
162 | 0 | else if (av <= -max) |
163 | 0 | ++bp[0]; |
164 | 0 | else if (av <= -max) |
165 | 0 | ++bp[0]; |
166 | 0 | else if (av < min && av > -min) |
167 | 0 | ++bp[upper_range]; |
168 | 0 | else { |
169 | 0 | int idx = ceilf(logf(fabsf(av) * min_inv) * log_base); |
170 | 0 | idx = av > 0 ? idx + upper_range : upper_range - idx; |
171 | 0 | idx = ccv_min(ccv_max(idx, 0), upper_range * 2); |
172 | 0 | ++bp[idx]; |
173 | 0 | } |
174 | 0 | } |
175 | 1 | } else if (nd == 2) { |
176 | 0 | int x, y; |
177 | 0 | for (y = 0; y < dim[0]; y++) |
178 | 0 | { |
179 | 0 | float* const apy = ap + y * stride[0]; |
180 | 0 | for (x = 0; x < dim[1]; x++) |
181 | 0 | { |
182 | 0 | const float av = apy[x * stride[1]]; |
183 | 0 | *a_min = ccv_min(*a_min, av); |
184 | 0 | *a_max = ccv_max(*a_max, av); |
185 | 0 | *a_sum += av; |
186 | 0 | *a_sum_of_squares += av * av; |
187 | 0 | if (isnan(av)) |
188 | 0 | ++bp[upper_range * 2 + 1]; |
189 | 0 | else if (av >= max) |
190 | 0 | ++bp[upper_range * 2]; |
191 | 0 | else if (av <= -max) |
192 | 0 | ++bp[0]; |
193 | 0 | else if (av <= -max) |
194 | 0 | ++bp[0]; |
195 | 0 | else if (av < min && av > -min) |
196 | 0 | ++bp[upper_range]; |
197 | 0 | else { |
198 | 0 | int idx = ceilf(logf(fabsf(av) * min_inv) * log_base); |
199 | 0 | idx = av > 0 ? idx + upper_range : upper_range - idx; |
200 | 0 | idx = ccv_min(ccv_max(idx, 0), upper_range * 2); |
201 | 0 | ++bp[idx]; |
202 | 0 | } |
203 | 0 | } |
204 | 0 | } |
205 | 1 | } else if (nd == 3) { |
206 | 0 | int x, y, z; |
207 | 0 | for (z = 0; z < dim[0]; z++) |
208 | 0 | { |
209 | 0 | float* const apz = ap + z * stride[0]; |
210 | 0 | for (y = 0; y < dim[1]; y++) |
211 | 0 | { |
212 | 0 | float* const apy = apz + y * stride[1]; |
213 | 0 | for (x = 0; x < dim[2]; x++) |
214 | 0 | { |
215 | 0 | const float av = apy[x * stride[2]]; |
216 | 0 | *a_min = ccv_min(*a_min, av); |
217 | 0 | *a_max = ccv_max(*a_max, av); |
218 | 0 | *a_sum += av; |
219 | 0 | *a_sum_of_squares += av * av; |
220 | 0 | if (isnan(av)) |
221 | 0 | ++bp[upper_range * 2 + 1]; |
222 | 0 | else if (av >= max) |
223 | 0 | ++bp[upper_range * 2]; |
224 | 0 | else if (av <= -max) |
225 | 0 | ++bp[0]; |
226 | 0 | else if (av <= -max) |
227 | 0 | ++bp[0]; |
228 | 0 | else if (av < min && av > -min) |
229 | 0 | ++bp[upper_range]; |
230 | 0 | else { |
231 | 0 | int idx = ceilf(logf(fabsf(av) * min_inv) * log_base); |
232 | 0 | idx = av > 0 ? idx + upper_range : upper_range - idx; |
233 | 0 | idx = ccv_min(ccv_max(idx, 0), upper_range * 2); |
234 | 0 | ++bp[idx]; |
235 | 0 | } |
236 | 0 | } |
237 | 0 | } |
238 | 0 | } |
239 | 1 | } else if (nd == 4) { |
240 | 1 | int x, y, z, s; |
241 | 31 | for (s = 0; s < dim[0]; s++30 ) |
242 | 30 | { |
243 | 30 | float* const aps = ap + s * stride[0]; |
244 | 630 | for (z = 0; z < dim[1]; z++600 ) |
245 | 600 | { |
246 | 600 | float* const apz = aps + z * stride[1]; |
247 | 12.6k | for (y = 0; y < dim[2]; y++12.0k ) |
248 | 12.0k | { |
249 | 12.0k | float* const apy = apz + y * stride[2]; |
250 | 492k | for (x = 0; x < dim[3]; x++480k ) |
251 | 480k | { |
252 | 480k | const float av = apy[x * stride[3]]; |
253 | 480k | *a_min = ccv_min(*a_min, av); |
254 | 480k | *a_max = ccv_max(*a_max, av); |
255 | 480k | *a_sum += av; |
256 | 480k | *a_sum_of_squares += av * av; |
257 | 480k | if (isnan(av)) |
258 | 0 | ++bp[upper_range * 2 + 1]; |
259 | 480k | else if (av >= max) |
260 | 0 | ++bp[upper_range * 2]; |
261 | 480k | else if (av <= -max) |
262 | 0 | ++bp[0]; |
263 | 480k | else if (av <= -max) |
264 | 0 | ++bp[0]; |
265 | 480k | else if (av < min && av > -min239k ) |
266 | 0 | ++bp[upper_range]; |
267 | 480k | else { |
268 | 480k | int idx = ceilf(logf(fabsf(av) * min_inv) * log_base); |
269 | 480k | idx = av > 0 ? idx + upper_range240k : upper_range - idx239k ; |
270 | 480k | idx = ccv_min(ccv_max(idx, 0), upper_range * 2); |
271 | 480k | ++bp[idx]; |
272 | 480k | } |
273 | 480k | } |
274 | 12.0k | } |
275 | 600 | } |
276 | 30 | } |
277 | 1 | } else { |
278 | 0 | int i; |
279 | 0 | for (i = 0; i < dim[0]; i++) |
280 | 0 | _ccv_nnc_tensor_histogram_logarithmic(ap + i * stride[0], bp, nd - 1, dim + 1, stride + 1, max, min, upper_range, min_inv, log_base, a_max, a_min, a_sum, a_sum_of_squares); |
281 | 0 | } |
282 | 1 | } |
283 | | |
284 | | void _ccv_nnc_tensor_histogram_bins(float* ap, float* hp, int* bp, const int nd, const int* const dim, const int* const stride, const int upper_range, float* a_max, float* a_min, double* a_sum, double* a_sum_of_squares) |
285 | 2 | { |
286 | 2 | if (nd == 1) |
287 | 0 | { |
288 | 0 | int i; |
289 | 0 | for (i = 0; i < dim[0]; i++) |
290 | 0 | { |
291 | 0 | const float av = ap[i * stride[0]]; |
292 | 0 | *a_min = ccv_min(*a_min, av); |
293 | 0 | *a_max = ccv_max(*a_max, av); |
294 | 0 | *a_sum += av; |
295 | 0 | *a_sum_of_squares += av * av; |
296 | 0 | if (isnan(av)) |
297 | 0 | ++bp[upper_range + 1]; |
298 | 0 | else { |
299 | 0 | const int idx = _upper_bound(av, upper_range, hp); |
300 | 0 | ++bp[idx]; |
301 | 0 | } |
302 | 0 | } |
303 | 2 | } else if (nd == 2) { |
304 | 0 | int x, y; |
305 | 0 | for (y = 0; y < dim[0]; y++) |
306 | 0 | { |
307 | 0 | float* const apy = ap + y * stride[0]; |
308 | 0 | for (x = 0; x < dim[1]; x++) |
309 | 0 | { |
310 | 0 | const float av = apy[x * stride[1]]; |
311 | 0 | *a_min = ccv_min(*a_min, av); |
312 | 0 | *a_max = ccv_max(*a_max, av); |
313 | 0 | *a_sum += av; |
314 | 0 | *a_sum_of_squares += av * av; |
315 | 0 | if (isnan(av)) |
316 | 0 | ++bp[upper_range + 1]; |
317 | 0 | else { |
318 | 0 | const int idx = _upper_bound(av, upper_range, hp); |
319 | 0 | ++bp[idx]; |
320 | 0 | } |
321 | 0 | } |
322 | 0 | } |
323 | 2 | } else if (nd == 3) { |
324 | 0 | int x, y, z; |
325 | 0 | for (z = 0; z < dim[0]; z++) |
326 | 0 | { |
327 | 0 | float* const apz = ap + z * stride[0]; |
328 | 0 | for (y = 0; y < dim[1]; y++) |
329 | 0 | { |
330 | 0 | float* const apy = apz + y * stride[1]; |
331 | 0 | for (x = 0; x < dim[2]; x++) |
332 | 0 | { |
333 | 0 | const float av = apy[x * stride[2]]; |
334 | 0 | *a_min = ccv_min(*a_min, av); |
335 | 0 | *a_max = ccv_max(*a_max, av); |
336 | 0 | *a_sum += av; |
337 | 0 | *a_sum_of_squares += av * av; |
338 | 0 | if (isnan(av)) |
339 | 0 | ++bp[upper_range + 1]; |
340 | 0 | else { |
341 | 0 | const int idx = _upper_bound(av, upper_range, hp); |
342 | 0 | ++bp[idx]; |
343 | 0 | } |
344 | 0 | } |
345 | 0 | } |
346 | 0 | } |
347 | 2 | } else if (nd == 4) { |
348 | 2 | int x, y, z, s; |
349 | 62 | for (s = 0; s < dim[0]; s++60 ) |
350 | 60 | { |
351 | 60 | float* const aps = ap + s * stride[0]; |
352 | 1.26k | for (z = 0; z < dim[1]; z++1.20k ) |
353 | 1.20k | { |
354 | 1.20k | float* const apz = aps + z * stride[1]; |
355 | 25.2k | for (y = 0; y < dim[2]; y++24.0k ) |
356 | 24.0k | { |
357 | 24.0k | float* const apy = apz + y * stride[2]; |
358 | 984k | for (x = 0; x < dim[3]; x++960k ) |
359 | 960k | { |
360 | 960k | const float av = apy[x * stride[3]]; |
361 | 960k | *a_min = ccv_min(*a_min, av); |
362 | 960k | *a_max = ccv_max(*a_max, av); |
363 | 960k | *a_sum += av; |
364 | 960k | *a_sum_of_squares += av * av; |
365 | 960k | if (isnan(av)) |
366 | 0 | ++bp[upper_range + 1]; |
367 | 960k | else { |
368 | 960k | const int idx = _upper_bound(av, upper_range, hp); |
369 | 960k | ++bp[idx]; |
370 | 960k | } |
371 | 960k | } |
372 | 24.0k | } |
373 | 1.20k | } |
374 | 60 | } |
375 | 2 | } else { |
376 | 0 | int i; |
377 | 0 | for (i = 0; i < dim[0]; i++) |
378 | 0 | _ccv_nnc_tensor_histogram_bins(ap + i * stride[0], hp, bp, nd - 1, dim + 1, stride + 1, upper_range, a_max, a_min, a_sum, a_sum_of_squares); |
379 | 0 | } |
380 | 2 | } |
381 | | |
382 | | static int _ccv_nnc_histogram_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
383 | 8 | { |
384 | 8 | assert(input_size >= 1); |
385 | 8 | const ccv_nnc_tensor_t* a = inputs[0]; |
386 | 8 | assert(a->info.datatype == CCV_32F); |
387 | 8 | const ccv_nnc_tensor_t* h = input_size > 1 ? inputs[1]4 : 04 ; |
388 | 8 | if (h) |
389 | 4 | { assert(CCV_IS_TENSOR_CONTIGUOUS(h)); } |
390 | 8 | assert(output_size >= 1); |
391 | 8 | ccv_nnc_tensor_t* b = outputs[0]; |
392 | 8 | ccv_nnc_tensor_t* s = output_size > 1 ? outputs[1] : 00 ; |
393 | 8 | assert(CCV_IS_TENSOR_CONTIGUOUS(b)); |
394 | 8 | ccv_nnc_tensor_zero(b); |
395 | 8 | assert(b->info.datatype == CCV_32S); |
396 | 8 | int* bp = b->data.i32; |
397 | 8 | float a_min = a->data.f32[0]; |
398 | 8 | float a_max = a_min; |
399 | 8 | double a_sum = 0; |
400 | 8 | double a_sum_of_squares = 0; |
401 | 8 | if (CCV_IS_TENSOR_CONTIGUOUS(a)) |
402 | 4 | { |
403 | 4 | float* ap = a->data.f32; |
404 | 4 | int i, count = ccv_nnc_tensor_count(a->info); |
405 | 4 | switch (cmd.info.histogram.type) |
406 | 4 | { |
407 | 1 | case CCV_NNC_HISTOGRAM_EVEN: |
408 | 1 | { |
409 | 1 | const int bins = cmd.info.histogram.bins; |
410 | 1 | assert(ccv_nnc_tensor_count(b->info) == bins + 3); |
411 | 1 | const float min = cmd.info.histogram.min; |
412 | 1 | const float max = cmd.info.histogram.max; |
413 | 1 | assert(cmd.info.histogram.max > cmd.info.histogram.min); |
414 | 1 | const float range = bins / (max - min); |
415 | 6.00M | for (i = 0; i < count; i++6.00M ) |
416 | 6.00M | { |
417 | 6.00M | a_min = ccv_min(a_min, ap[i]); |
418 | 6.00M | a_max = ccv_max(a_max, ap[i]); |
419 | 6.00M | a_sum += ap[i]; |
420 | 6.00M | a_sum_of_squares += ap[i] * ap[i]; |
421 | 6.00M | if (isnan(ap[i])) |
422 | 1 | ++bp[bins + 2]; |
423 | 5.99M | else if (ap[i] < min) |
424 | 2 | ++bp[0]; |
425 | 5.99M | else if (ap[i] >= max) |
426 | 2 | ++bp[bins + 1]; |
427 | 5.99M | else { |
428 | 5.99M | int idx = (int)((ap[i] - min) * range) + 1; |
429 | 5.99M | idx = ccv_min(ccv_max(idx, 1), bins); |
430 | 5.99M | ++bp[idx]; |
431 | 5.99M | } |
432 | 6.00M | } |
433 | 1 | break; |
434 | 1 | } |
435 | 1 | case CCV_NNC_HISTOGRAM_LOGARITHMIC: |
436 | 1 | { |
437 | 1 | const float log_base = 1.0 / logf(cmd.info.histogram.rate); |
438 | 1 | assert(cmd.info.histogram.max > 0); |
439 | 1 | assert(cmd.info.histogram.min > 0); |
440 | 1 | assert(cmd.info.histogram.max > cmd.info.histogram.min); |
441 | 1 | const float min = cmd.info.histogram.min; |
442 | 1 | const float max = cmd.info.histogram.max; |
443 | 1 | const int upper_range = ceilf(logf(cmd.info.histogram.max / cmd.info.histogram.min) * log_base); |
444 | 1 | const float min_inv = 1.0 / cmd.info.histogram.min; |
445 | 6.00M | for (i = 0; i < count; i++6.00M ) |
446 | 6.00M | { |
447 | 6.00M | a_min = ccv_min(a_min, ap[i]); |
448 | 6.00M | a_max = ccv_max(a_max, ap[i]); |
449 | 6.00M | a_sum += ap[i]; |
450 | 6.00M | a_sum_of_squares += ap[i] * ap[i]; |
451 | | // Range from 1e-12 to 1e20, with 1.1 ratio. We reserve 0, count - 2 for -inf and inf, count - 1 for nan. |
452 | 6.00M | if (isnan(ap[i])) |
453 | 1 | ++bp[upper_range * 2 + 1]; |
454 | 5.99M | else if (ap[i] >= max) |
455 | 1 | ++bp[upper_range * 2]; |
456 | 5.99M | else if (ap[i] <= -max) |
457 | 1 | ++bp[0]; |
458 | 5.99M | else if (ap[i] < min && ap[i] > -min2.99M ) |
459 | 3 | ++bp[upper_range]; |
460 | 5.99M | else { |
461 | 5.99M | int idx = ceilf(logf(fabsf(ap[i]) * min_inv) * log_base); |
462 | 5.99M | idx = ap[i] > 0 ? idx + upper_range3.00M : upper_range - idx2.99M ; |
463 | 5.99M | idx = ccv_min(ccv_max(idx, 0), upper_range * 2); |
464 | 5.99M | ++bp[idx]; |
465 | 5.99M | } |
466 | 6.00M | } |
467 | 1 | break; |
468 | 1 | } |
469 | 2 | case CCV_NNC_HISTOGRAM_BINS: |
470 | 2 | { |
471 | 2 | assert(h); |
472 | 2 | const int upper_range = ccv_nnc_tensor_count(h->info); |
473 | 2 | assert(ccv_nnc_tensor_count(b->info) == upper_range + 2); |
474 | 12.0M | for (i = 0; 2 i < count; i++12.0M ) |
475 | 12.0M | { |
476 | 12.0M | a_min = ccv_min(a_min, ap[i]); |
477 | 12.0M | a_max = ccv_max(a_max, ap[i]); |
478 | 12.0M | a_sum += ap[i]; |
479 | 12.0M | a_sum_of_squares += ap[i] * ap[i]; |
480 | 12.0M | if (isnan(ap[i])) |
481 | 2 | ++bp[upper_range + 1]; |
482 | 11.9M | else { |
483 | 11.9M | const int idx = _upper_bound(ap[i], upper_range, h->data.f32); |
484 | 11.9M | ++bp[idx]; |
485 | 11.9M | } |
486 | 12.0M | } |
487 | 2 | break; |
488 | 2 | } |
489 | 4 | } |
490 | 4 | if (s) |
491 | 4 | { |
492 | 4 | assert(ccv_nnc_tensor_count(s->info) >= 4); |
493 | 4 | assert(s->info.datatype == CCV_32F); |
494 | 4 | s->data.f32[0] = a_min; |
495 | 4 | s->data.f32[1] = a_max; |
496 | 4 | s->data.f32[2] = a_sum; |
497 | 4 | s->data.f32[3] = a_sum_of_squares; |
498 | 4 | } |
499 | 4 | return CCV_NNC_EXEC_SUCCESS; |
500 | 4 | } |
501 | 4 | ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)a; |
502 | 4 | assert(CCV_IS_TENSOR_VIEW(tv)); |
503 | 4 | const int nd = ccv_nnc_tensor_nd(tv->info.dim); |
504 | 4 | assert(nd >= 1); |
505 | | // reset it to 0. |
506 | 4 | switch (cmd.info.histogram.type) |
507 | 4 | { |
508 | 1 | case CCV_NNC_HISTOGRAM_EVEN: |
509 | 1 | { |
510 | 1 | const int bins = cmd.info.histogram.bins; |
511 | 1 | assert(ccv_nnc_tensor_count(b->info) == bins + 3); |
512 | 1 | const float min = cmd.info.histogram.min; |
513 | 1 | const float max = cmd.info.histogram.max; |
514 | 1 | assert(cmd.info.histogram.max > cmd.info.histogram.min); |
515 | 1 | const float range = bins / (max - min); |
516 | 1 | _ccv_nnc_tensor_histogram_even(tv->data.f32, bp, nd, tv->info.dim, tv->stride, max, min, bins, range, &a_max, &a_min, &a_sum, &a_sum_of_squares); |
517 | 1 | break; |
518 | 1 | } |
519 | 1 | case CCV_NNC_HISTOGRAM_LOGARITHMIC: |
520 | 1 | { |
521 | 1 | const float log_base = 1.0 / logf(cmd.info.histogram.rate); |
522 | 1 | assert(cmd.info.histogram.max > 0); |
523 | 1 | assert(cmd.info.histogram.min > 0); |
524 | 1 | assert(cmd.info.histogram.max > cmd.info.histogram.min); |
525 | 1 | const float min = cmd.info.histogram.min; |
526 | 1 | const float max = cmd.info.histogram.max; |
527 | 1 | const int upper_range = ceilf(logf(cmd.info.histogram.max / cmd.info.histogram.min) * log_base); |
528 | 1 | const float min_inv = 1.0 / cmd.info.histogram.min; |
529 | 1 | _ccv_nnc_tensor_histogram_logarithmic(tv->data.f32, bp, nd, tv->info.dim, tv->stride, max, min, upper_range, min_inv, log_base, &a_max, &a_min, &a_sum, &a_sum_of_squares); |
530 | 1 | break; |
531 | 1 | } |
532 | 2 | case CCV_NNC_HISTOGRAM_BINS: |
533 | 2 | { |
534 | 2 | assert(h); |
535 | 2 | const int upper_range = ccv_nnc_tensor_count(h->info); |
536 | 2 | assert(ccv_nnc_tensor_count(b->info) == upper_range + 2); |
537 | 2 | _ccv_nnc_tensor_histogram_bins(tv->data.f32, h->data.f32, bp, nd, tv->info.dim, tv->stride, upper_range, &a_max, &a_min, &a_sum, &a_sum_of_squares); |
538 | 2 | break; |
539 | 2 | } |
540 | 4 | } |
541 | 4 | if (s) |
542 | 4 | { |
543 | 4 | assert(ccv_nnc_tensor_count(s->info) >= 4); |
544 | 4 | assert(s->info.datatype == CCV_32F); |
545 | 4 | s->data.f32[0] = a_min; |
546 | 4 | s->data.f32[1] = a_max; |
547 | 4 | s->data.f32[2] = a_sum; |
548 | 4 | s->data.f32[3] = a_sum_of_squares; |
549 | 4 | } |
550 | 4 | return CCV_NNC_EXEC_SUCCESS; |
551 | 4 | } |
552 | | |
553 | | static int _ccv_nnc_histogram_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
554 | 0 | { |
555 | 0 | return CCV_NNC_EXEC_INVALID; |
556 | 0 | } |
557 | | |
558 | | REGISTER_COMMAND_BACKEND(CCV_NNC_HISTOGRAM_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
559 | 1 | { |
560 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
561 | 1 | registry->tensor_datatypes = CCV_32F | CCV_32S; |
562 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
563 | 1 | registry->algorithms = 1; |
564 | 1 | registry->exec = _ccv_nnc_histogram_forw; |
565 | 1 | } |
566 | | |
567 | | REGISTER_COMMAND_BACKEND(CCV_NNC_HISTOGRAM_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
568 | 1 | { |
569 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
570 | 1 | registry->tensor_datatypes = CCV_32F | CCV_32S; |
571 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
572 | 1 | registry->algorithms = 1; |
573 | 1 | registry->exec = _ccv_nnc_histogram_back; |
574 | 1 | } |