Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/histogram/ccv_nnc_histogram_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _upper_bound(const float v, const int size, const float* const bounds)
14
12.9M
{
15
12.9M
  int upper_bound = size;
16
12.9M
  int lower_bound = -1;
17
132M
  while (lower_bound + 1 < upper_bound)
18
119M
  {
19
119M
    const int middle = ((upper_bound - lower_bound) >> 1) + lower_bound;
20
119M
    if (v < bounds[middle])
21
57.4M
      upper_bound = middle;
22
61.7M
    else
23
61.7M
      lower_bound = middle;
24
119M
  }
25
12.9M
  return upper_bound;
26
12.9M
}
27
28
void _ccv_nnc_tensor_histogram_even(float* ap, int* bp, const int nd, const int* const dim, const int* const stride, const float max, const float min, const int bins, const float range, float* a_max, float* a_min, double* a_sum, double* a_sum_of_squares)
29
1
{
30
1
  if (nd == 1)
31
0
  {
32
0
    int i;
33
0
    for (i = 0; i < dim[0]; i++)
34
0
    {
35
0
      const float av = ap[i * stride[0]];
36
0
      *a_min = ccv_min(*a_min, av);
37
0
      *a_max = ccv_max(*a_max, av);
38
0
      *a_sum += av;
39
0
      *a_sum_of_squares += av * av;
40
0
      if (isnan(av))
41
0
        ++bp[bins + 2];
42
0
      else if (av < min)
43
0
        ++bp[0];
44
0
      else if (av >= max)
45
0
        ++bp[bins + 1];
46
0
      else {
47
0
        int idx = (int)((av - min) * range) + 1;
48
0
        idx = ccv_min(ccv_max(idx, 1), bins);
49
0
        ++bp[idx];
50
0
      }
51
0
    }
52
1
  } else if (nd == 2) {
53
0
    int x, y;
54
0
    for (y = 0; y < dim[0]; y++)
55
0
    {
56
0
      float* const apy = ap + y * stride[0];
57
0
      for (x = 0; x < dim[1]; x++)
58
0
      {
59
0
        const float av = apy[x * stride[1]];
60
0
        *a_min = ccv_min(*a_min, av);
61
0
        *a_max = ccv_max(*a_max, av);
62
0
        *a_sum += av;
63
0
        *a_sum_of_squares += av * av;
64
0
        if (isnan(av))
65
0
          ++bp[bins + 2];
66
0
        else if (av < min)
67
0
          ++bp[0];
68
0
        else if (av >= max)
69
0
          ++bp[bins + 1];
70
0
        else {
71
0
          int idx = (int)((av - min) * range) + 1;
72
0
          idx = ccv_min(ccv_max(idx, 1), bins);
73
0
          ++bp[idx];
74
0
        }
75
0
      }
76
0
    }
77
1
  } else if (nd == 3) {
78
0
    int x, y, z;
79
0
    for (z = 0; z < dim[0]; z++)
80
0
    {
81
0
      float* const apz = ap + z * stride[0];
82
0
      for (y = 0; y < dim[1]; y++)
83
0
      {
84
0
        float* const apy = apz + y * stride[1];
85
0
        for (x = 0; x < dim[2]; x++)
86
0
        {
87
0
          const float av = apy[x * stride[2]];
88
0
          *a_min = ccv_min(*a_min, av);
89
0
          *a_max = ccv_max(*a_max, av);
90
0
          *a_sum += av;
91
0
          *a_sum_of_squares += av * av;
92
0
          if (isnan(av))
93
0
            ++bp[bins + 2];
94
0
          else if (av < min)
95
0
            ++bp[0];
96
0
          else if (av >= max)
97
0
            ++bp[bins + 1];
98
0
          else {
99
0
            int idx = (int)((av - min) * range) + 1;
100
0
            idx = ccv_min(ccv_max(idx, 1), bins);
101
0
            ++bp[idx];
102
0
          }
103
0
        }
104
0
      }
105
0
    }
106
1
  } else if (nd == 4) {
107
1
    int x, y, z, s;
108
31
    for (s = 0; s < dim[0]; 
s++30
)
109
30
    {
110
30
      float* const aps = ap + s * stride[0];
111
630
      for (z = 0; z < dim[1]; 
z++600
)
112
600
      {
113
600
        float* const apz = aps + z * stride[1];
114
12.6k
        for (y = 0; y < dim[2]; 
y++12.0k
)
115
12.0k
        {
116
12.0k
          float* const apy = apz + y * stride[2];
117
492k
          for (x = 0; x < dim[3]; 
x++480k
)
118
480k
          {
119
480k
            const float av = apy[x * stride[3]];
120
480k
            *a_min = ccv_min(*a_min, av);
121
480k
            *a_max = ccv_max(*a_max, av);
122
480k
            *a_sum += av;
123
480k
            *a_sum_of_squares += av * av;
124
480k
            if (isnan(av))
125
0
              ++bp[bins + 2];
126
480k
            else if (av < min)
127
0
              ++bp[0];
128
480k
            else if (av >= max)
129
0
              ++bp[bins + 1];
130
480k
            else {
131
480k
              int idx = (int)((av - min) * range) + 1;
132
480k
              idx = ccv_min(ccv_max(idx, 1), bins);
133
480k
              ++bp[idx];
134
480k
            }
135
480k
          }
136
12.0k
        }
137
600
      }
138
30
    }
139
1
  } else {
140
0
    int i;
141
0
    for (i = 0; i < dim[0]; i++)
142
0
      _ccv_nnc_tensor_histogram_even(ap + i * stride[0], bp, nd - 1, dim + 1, stride + 1, max, min, bins, range, a_max, a_min, a_sum, a_sum_of_squares);
143
0
  }
144
1
}
145
146
void _ccv_nnc_tensor_histogram_logarithmic(float* ap, int* bp, const int nd, const int* const dim, const int* const stride, const float max, const float min, const int upper_range, const float min_inv, const float log_base, float* a_max, float* a_min, double* a_sum, double* a_sum_of_squares)
147
1
{
148
1
  if (nd == 1)
149
0
  {
150
0
    int i;
151
0
    for (i = 0; i < dim[0]; i++)
152
0
    {
153
0
      const float av = ap[i * stride[0]];
154
0
      *a_min = ccv_min(*a_min, av);
155
0
      *a_max = ccv_max(*a_max, av);
156
0
      *a_sum += av;
157
0
      *a_sum_of_squares += av * av;
158
0
      if (isnan(av))
159
0
        ++bp[upper_range * 2 + 1];
160
0
      else if (av >= max)
161
0
        ++bp[upper_range * 2];
162
0
      else if (av <= -max)
163
0
        ++bp[0];
164
0
      else if (av <= -max)
165
0
        ++bp[0];
166
0
      else if (av < min && av > -min)
167
0
        ++bp[upper_range];
168
0
      else {
169
0
        int idx = ceilf(logf(fabsf(av) * min_inv) * log_base);
170
0
        idx = av > 0 ? idx + upper_range : upper_range - idx;
171
0
        idx = ccv_min(ccv_max(idx, 0), upper_range * 2);
172
0
        ++bp[idx];
173
0
      }
174
0
    }
175
1
  } else if (nd == 2) {
176
0
    int x, y;
177
0
    for (y = 0; y < dim[0]; y++)
178
0
    {
179
0
      float* const apy = ap + y * stride[0];
180
0
      for (x = 0; x < dim[1]; x++)
181
0
      {
182
0
        const float av = apy[x * stride[1]];
183
0
        *a_min = ccv_min(*a_min, av);
184
0
        *a_max = ccv_max(*a_max, av);
185
0
        *a_sum += av;
186
0
        *a_sum_of_squares += av * av;
187
0
        if (isnan(av))
188
0
          ++bp[upper_range * 2 + 1];
189
0
        else if (av >= max)
190
0
          ++bp[upper_range * 2];
191
0
        else if (av <= -max)
192
0
          ++bp[0];
193
0
        else if (av <= -max)
194
0
          ++bp[0];
195
0
        else if (av < min && av > -min)
196
0
          ++bp[upper_range];
197
0
        else {
198
0
          int idx = ceilf(logf(fabsf(av) * min_inv) * log_base);
199
0
          idx = av > 0 ? idx + upper_range : upper_range - idx;
200
0
          idx = ccv_min(ccv_max(idx, 0), upper_range * 2);
201
0
          ++bp[idx];
202
0
        }
203
0
      }
204
0
    }
205
1
  } else if (nd == 3) {
206
0
    int x, y, z;
207
0
    for (z = 0; z < dim[0]; z++)
208
0
    {
209
0
      float* const apz = ap + z * stride[0];
210
0
      for (y = 0; y < dim[1]; y++)
211
0
      {
212
0
        float* const apy = apz + y * stride[1];
213
0
        for (x = 0; x < dim[2]; x++)
214
0
        {
215
0
          const float av = apy[x * stride[2]];
216
0
          *a_min = ccv_min(*a_min, av);
217
0
          *a_max = ccv_max(*a_max, av);
218
0
          *a_sum += av;
219
0
          *a_sum_of_squares += av * av;
220
0
          if (isnan(av))
221
0
            ++bp[upper_range * 2 + 1];
222
0
          else if (av >= max)
223
0
            ++bp[upper_range * 2];
224
0
          else if (av <= -max)
225
0
            ++bp[0];
226
0
          else if (av <= -max)
227
0
            ++bp[0];
228
0
          else if (av < min && av > -min)
229
0
            ++bp[upper_range];
230
0
          else {
231
0
            int idx = ceilf(logf(fabsf(av) * min_inv) * log_base);
232
0
            idx = av > 0 ? idx + upper_range : upper_range - idx;
233
0
            idx = ccv_min(ccv_max(idx, 0), upper_range * 2);
234
0
            ++bp[idx];
235
0
          }
236
0
        }
237
0
      }
238
0
    }
239
1
  } else if (nd == 4) {
240
1
    int x, y, z, s;
241
31
    for (s = 0; s < dim[0]; 
s++30
)
242
30
    {
243
30
      float* const aps = ap + s * stride[0];
244
630
      for (z = 0; z < dim[1]; 
z++600
)
245
600
      {
246
600
        float* const apz = aps + z * stride[1];
247
12.6k
        for (y = 0; y < dim[2]; 
y++12.0k
)
248
12.0k
        {
249
12.0k
          float* const apy = apz + y * stride[2];
250
492k
          for (x = 0; x < dim[3]; 
x++480k
)
251
480k
          {
252
480k
            const float av = apy[x * stride[3]];
253
480k
            *a_min = ccv_min(*a_min, av);
254
480k
            *a_max = ccv_max(*a_max, av);
255
480k
            *a_sum += av;
256
480k
            *a_sum_of_squares += av * av;
257
480k
            if (isnan(av))
258
0
              ++bp[upper_range * 2 + 1];
259
480k
            else if (av >= max)
260
0
              ++bp[upper_range * 2];
261
480k
            else if (av <= -max)
262
0
              ++bp[0];
263
480k
            else if (av <= -max)
264
0
              ++bp[0];
265
480k
            else if (av < min && 
av > -min239k
)
266
0
              ++bp[upper_range];
267
480k
            else {
268
480k
              int idx = ceilf(logf(fabsf(av) * min_inv) * log_base);
269
480k
              idx = av > 0 ? 
idx + upper_range240k
:
upper_range - idx239k
;
270
480k
              idx = ccv_min(ccv_max(idx, 0), upper_range * 2);
271
480k
              ++bp[idx];
272
480k
            }
273
480k
          }
274
12.0k
        }
275
600
      }
276
30
    }
277
1
  } else {
278
0
    int i;
279
0
    for (i = 0; i < dim[0]; i++)
280
0
      _ccv_nnc_tensor_histogram_logarithmic(ap + i * stride[0], bp, nd - 1, dim + 1, stride + 1, max, min, upper_range, min_inv, log_base, a_max, a_min, a_sum, a_sum_of_squares);
281
0
  }
282
1
}
283
284
void _ccv_nnc_tensor_histogram_bins(float* ap, float* hp, int* bp, const int nd, const int* const dim, const int* const stride, const int upper_range, float* a_max, float* a_min, double* a_sum, double* a_sum_of_squares)
285
2
{
286
2
  if (nd == 1)
287
0
  {
288
0
    int i;
289
0
    for (i = 0; i < dim[0]; i++)
290
0
    {
291
0
      const float av = ap[i * stride[0]];
292
0
      *a_min = ccv_min(*a_min, av);
293
0
      *a_max = ccv_max(*a_max, av);
294
0
      *a_sum += av;
295
0
      *a_sum_of_squares += av * av;
296
0
      if (isnan(av))
297
0
        ++bp[upper_range + 1];
298
0
      else {
299
0
        const int idx = _upper_bound(av, upper_range, hp);
300
0
        ++bp[idx];
301
0
      }
302
0
    }
303
2
  } else if (nd == 2) {
304
0
    int x, y;
305
0
    for (y = 0; y < dim[0]; y++)
306
0
    {
307
0
      float* const apy = ap + y * stride[0];
308
0
      for (x = 0; x < dim[1]; x++)
309
0
      {
310
0
        const float av = apy[x * stride[1]];
311
0
        *a_min = ccv_min(*a_min, av);
312
0
        *a_max = ccv_max(*a_max, av);
313
0
        *a_sum += av;
314
0
        *a_sum_of_squares += av * av;
315
0
        if (isnan(av))
316
0
          ++bp[upper_range + 1];
317
0
        else {
318
0
          const int idx = _upper_bound(av, upper_range, hp);
319
0
          ++bp[idx];
320
0
        }
321
0
      }
322
0
    }
323
2
  } else if (nd == 3) {
324
0
    int x, y, z;
325
0
    for (z = 0; z < dim[0]; z++)
326
0
    {
327
0
      float* const apz = ap + z * stride[0];
328
0
      for (y = 0; y < dim[1]; y++)
329
0
      {
330
0
        float* const apy = apz + y * stride[1];
331
0
        for (x = 0; x < dim[2]; x++)
332
0
        {
333
0
          const float av = apy[x * stride[2]];
334
0
          *a_min = ccv_min(*a_min, av);
335
0
          *a_max = ccv_max(*a_max, av);
336
0
          *a_sum += av;
337
0
          *a_sum_of_squares += av * av;
338
0
          if (isnan(av))
339
0
            ++bp[upper_range + 1];
340
0
          else {
341
0
            const int idx = _upper_bound(av, upper_range, hp);
342
0
            ++bp[idx];
343
0
          }
344
0
        }
345
0
      }
346
0
    }
347
2
  } else if (nd == 4) {
348
2
    int x, y, z, s;
349
62
    for (s = 0; s < dim[0]; 
s++60
)
350
60
    {
351
60
      float* const aps = ap + s * stride[0];
352
1.26k
      for (z = 0; z < dim[1]; 
z++1.20k
)
353
1.20k
      {
354
1.20k
        float* const apz = aps + z * stride[1];
355
25.2k
        for (y = 0; y < dim[2]; 
y++24.0k
)
356
24.0k
        {
357
24.0k
          float* const apy = apz + y * stride[2];
358
984k
          for (x = 0; x < dim[3]; 
x++960k
)
359
960k
          {
360
960k
            const float av = apy[x * stride[3]];
361
960k
            *a_min = ccv_min(*a_min, av);
362
960k
            *a_max = ccv_max(*a_max, av);
363
960k
            *a_sum += av;
364
960k
            *a_sum_of_squares += av * av;
365
960k
            if (isnan(av))
366
0
              ++bp[upper_range + 1];
367
960k
            else {
368
960k
              const int idx = _upper_bound(av, upper_range, hp);
369
960k
              ++bp[idx];
370
960k
            }
371
960k
          }
372
24.0k
        }
373
1.20k
      }
374
60
    }
375
2
  } else {
376
0
    int i;
377
0
    for (i = 0; i < dim[0]; i++)
378
0
      _ccv_nnc_tensor_histogram_bins(ap + i * stride[0], hp, bp, nd - 1, dim + 1, stride + 1, upper_range, a_max, a_min, a_sum, a_sum_of_squares);
379
0
  }
380
2
}
381
382
static int _ccv_nnc_histogram_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
383
8
{
384
8
  assert(input_size >= 1);
385
8
  const ccv_nnc_tensor_t* a = inputs[0];
386
8
  assert(a->info.datatype == CCV_32F);
387
8
  const ccv_nnc_tensor_t* h = input_size > 1 ? 
inputs[1]4
:
04
;
388
8
  if (h)
389
4
    { assert(CCV_IS_TENSOR_CONTIGUOUS(h)); }
390
8
  assert(output_size >= 1);
391
8
  ccv_nnc_tensor_t* b = outputs[0];
392
8
  ccv_nnc_tensor_t* s = output_size > 1 ? outputs[1] : 
00
;
393
8
  assert(CCV_IS_TENSOR_CONTIGUOUS(b));
394
8
  ccv_nnc_tensor_zero(b);
395
8
  assert(b->info.datatype == CCV_32S);
396
8
  int* bp = b->data.i32;
397
8
  float a_min = a->data.f32[0];
398
8
  float a_max = a_min;
399
8
  double a_sum = 0;
400
8
  double a_sum_of_squares = 0;
401
8
  if (CCV_IS_TENSOR_CONTIGUOUS(a))
402
4
  {
403
4
    float* ap = a->data.f32;
404
4
    int i, count = ccv_nnc_tensor_count(a->info);
405
4
    switch (cmd.info.histogram.type)
406
4
    {
407
1
      case CCV_NNC_HISTOGRAM_EVEN:
408
1
      {
409
1
        const int bins = cmd.info.histogram.bins;
410
1
        assert(ccv_nnc_tensor_count(b->info) == bins + 3);
411
1
        const float min = cmd.info.histogram.min;
412
1
        const float max = cmd.info.histogram.max;
413
1
        assert(cmd.info.histogram.max > cmd.info.histogram.min);
414
1
        const float range = bins / (max - min);
415
6.00M
        for (i = 0; i < count; 
i++6.00M
)
416
6.00M
        {
417
6.00M
          a_min = ccv_min(a_min, ap[i]);
418
6.00M
          a_max = ccv_max(a_max, ap[i]);
419
6.00M
          a_sum += ap[i];
420
6.00M
          a_sum_of_squares += ap[i] * ap[i];
421
6.00M
          if (isnan(ap[i]))
422
1
            ++bp[bins + 2];
423
5.99M
          else if (ap[i] < min)
424
2
            ++bp[0];
425
5.99M
          else if (ap[i] >= max)
426
2
            ++bp[bins + 1];
427
5.99M
          else {
428
5.99M
            int idx = (int)((ap[i] - min) * range) + 1;
429
5.99M
            idx = ccv_min(ccv_max(idx, 1), bins);
430
5.99M
            ++bp[idx];
431
5.99M
          }
432
6.00M
        }
433
1
        break;
434
1
      }
435
1
      case CCV_NNC_HISTOGRAM_LOGARITHMIC:
436
1
      {
437
1
        const float log_base = 1.0 / logf(cmd.info.histogram.rate);
438
1
        assert(cmd.info.histogram.max > 0);
439
1
        assert(cmd.info.histogram.min > 0);
440
1
        assert(cmd.info.histogram.max > cmd.info.histogram.min);
441
1
        const float min = cmd.info.histogram.min;
442
1
        const float max = cmd.info.histogram.max;
443
1
        const int upper_range = ceilf(logf(cmd.info.histogram.max / cmd.info.histogram.min) * log_base);
444
1
        const float min_inv = 1.0 / cmd.info.histogram.min;
445
6.00M
        for (i = 0; i < count; 
i++6.00M
)
446
6.00M
        {
447
6.00M
          a_min = ccv_min(a_min, ap[i]);
448
6.00M
          a_max = ccv_max(a_max, ap[i]);
449
6.00M
          a_sum += ap[i];
450
6.00M
          a_sum_of_squares += ap[i] * ap[i];
451
          // Range from 1e-12 to 1e20, with 1.1 ratio. We reserve 0, count - 2 for -inf and inf, count - 1 for nan.
452
6.00M
          if (isnan(ap[i]))
453
1
            ++bp[upper_range * 2 + 1];
454
5.99M
          else if (ap[i] >= max)
455
1
            ++bp[upper_range * 2];
456
5.99M
          else if (ap[i] <= -max)
457
1
            ++bp[0];
458
5.99M
          else if (ap[i] < min && 
ap[i] > -min2.99M
)
459
3
            ++bp[upper_range];
460
5.99M
          else {
461
5.99M
            int idx = ceilf(logf(fabsf(ap[i]) * min_inv) * log_base);
462
5.99M
            idx = ap[i] > 0 ? 
idx + upper_range3.00M
:
upper_range - idx2.99M
;
463
5.99M
            idx = ccv_min(ccv_max(idx, 0), upper_range * 2);
464
5.99M
            ++bp[idx];
465
5.99M
          }
466
6.00M
        }
467
1
        break;
468
1
      }
469
2
      case CCV_NNC_HISTOGRAM_BINS:
470
2
      {
471
2
        assert(h);
472
2
        const int upper_range = ccv_nnc_tensor_count(h->info);
473
2
        assert(ccv_nnc_tensor_count(b->info) == upper_range + 2);
474
12.0M
        
for (i = 0; 2
i < count;
i++12.0M
)
475
12.0M
        {
476
12.0M
          a_min = ccv_min(a_min, ap[i]);
477
12.0M
          a_max = ccv_max(a_max, ap[i]);
478
12.0M
          a_sum += ap[i];
479
12.0M
          a_sum_of_squares += ap[i] * ap[i];
480
12.0M
          if (isnan(ap[i]))
481
2
            ++bp[upper_range + 1];
482
11.9M
          else {
483
11.9M
            const int idx = _upper_bound(ap[i], upper_range, h->data.f32);
484
11.9M
            ++bp[idx];
485
11.9M
          }
486
12.0M
        }
487
2
        break;
488
2
      }
489
4
    }
490
4
    if (s)
491
4
    {
492
4
      assert(ccv_nnc_tensor_count(s->info) >= 4);
493
4
      assert(s->info.datatype == CCV_32F);
494
4
      s->data.f32[0] = a_min;
495
4
      s->data.f32[1] = a_max;
496
4
      s->data.f32[2] = a_sum;
497
4
      s->data.f32[3] = a_sum_of_squares;
498
4
    }
499
4
    return CCV_NNC_EXEC_SUCCESS;
500
4
  }
501
4
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)a;
502
4
  assert(CCV_IS_TENSOR_VIEW(tv));
503
4
  const int nd = ccv_nnc_tensor_nd(tv->info.dim);
504
4
  assert(nd >= 1);
505
  // reset it to 0.
506
4
  switch (cmd.info.histogram.type)
507
4
  {
508
1
    case CCV_NNC_HISTOGRAM_EVEN:
509
1
    {
510
1
      const int bins = cmd.info.histogram.bins;
511
1
      assert(ccv_nnc_tensor_count(b->info) == bins + 3);
512
1
      const float min = cmd.info.histogram.min;
513
1
      const float max = cmd.info.histogram.max;
514
1
      assert(cmd.info.histogram.max > cmd.info.histogram.min);
515
1
      const float range = bins / (max - min);
516
1
      _ccv_nnc_tensor_histogram_even(tv->data.f32, bp, nd, tv->info.dim, tv->stride, max, min, bins, range, &a_max, &a_min, &a_sum, &a_sum_of_squares);
517
1
      break;
518
1
    }
519
1
    case CCV_NNC_HISTOGRAM_LOGARITHMIC:
520
1
    {
521
1
      const float log_base = 1.0 / logf(cmd.info.histogram.rate);
522
1
      assert(cmd.info.histogram.max > 0);
523
1
      assert(cmd.info.histogram.min > 0);
524
1
      assert(cmd.info.histogram.max > cmd.info.histogram.min);
525
1
      const float min = cmd.info.histogram.min;
526
1
      const float max = cmd.info.histogram.max;
527
1
      const int upper_range = ceilf(logf(cmd.info.histogram.max / cmd.info.histogram.min) * log_base);
528
1
      const float min_inv = 1.0 / cmd.info.histogram.min;
529
1
      _ccv_nnc_tensor_histogram_logarithmic(tv->data.f32, bp, nd, tv->info.dim, tv->stride, max, min, upper_range, min_inv, log_base, &a_max, &a_min, &a_sum, &a_sum_of_squares);
530
1
      break;
531
1
    }
532
2
    case CCV_NNC_HISTOGRAM_BINS:
533
2
    {
534
2
      assert(h);
535
2
      const int upper_range = ccv_nnc_tensor_count(h->info);
536
2
      assert(ccv_nnc_tensor_count(b->info) == upper_range + 2);
537
2
      _ccv_nnc_tensor_histogram_bins(tv->data.f32, h->data.f32, bp, nd, tv->info.dim, tv->stride, upper_range, &a_max, &a_min, &a_sum, &a_sum_of_squares);
538
2
      break;
539
2
    }
540
4
  }
541
4
  if (s)
542
4
  {
543
4
    assert(ccv_nnc_tensor_count(s->info) >= 4);
544
4
    assert(s->info.datatype == CCV_32F);
545
4
    s->data.f32[0] = a_min;
546
4
    s->data.f32[1] = a_max;
547
4
    s->data.f32[2] = a_sum;
548
4
    s->data.f32[3] = a_sum_of_squares;
549
4
  }
550
4
  return CCV_NNC_EXEC_SUCCESS;
551
4
}
552
553
static int _ccv_nnc_histogram_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
554
0
{
555
0
  return CCV_NNC_EXEC_INVALID;
556
0
}
557
558
REGISTER_COMMAND_BACKEND(CCV_NNC_HISTOGRAM_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
559
1
{
560
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
561
1
  registry->tensor_datatypes = CCV_32F | CCV_32S;
562
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
563
1
  registry->algorithms = 1;
564
1
  registry->exec = _ccv_nnc_histogram_forw;
565
1
}
566
567
REGISTER_COMMAND_BACKEND(CCV_NNC_HISTOGRAM_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
568
1
{
569
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
570
1
  registry->tensor_datatypes = CCV_32F | CCV_32S;
571
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
572
1
  registry->algorithms = 1;
573
1
  registry->exec = _ccv_nnc_histogram_back;
574
1
}