Coverage Report

Created: 2021-04-14 15:26

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/reduce/ccv_nnc_reduce_sum_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
// Shared methods.
14
#include "../_ccv_nnc_cpu_ref.h"
15
16
void _ccv_nnc_reduce_sum_forw_cpu_ref(ccv_nnc_tensor_view_t* const a, ccv_nnc_tensor_view_t* const b)
17
5.04k
{
18
5.04k
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
19
5.04k
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
20
5.04k
  // Assuming this is float 32.
21
5.04k
  int adim[CCV_NNC_MAX_DIM_ALLOC];
22
5.04k
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
23
5.04k
  ccv_nnc_tensor_view_get_dim(a, adim);
24
5.04k
  ccv_nnc_tensor_view_get_dim(b, bdim);
25
5.04k
  assert(ccv_nnc_tensor_view_check_broadcast_dim(b, adim));
26
5.04k
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
27
5.04k
  int binc[CCV_NNC_MAX_DIM_ALLOC];
28
5.04k
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
29
5.04k
  ccv_nnc_tensor_view_get_inc(a, ainc);
30
5.04k
  ccv_nnc_tensor_view_get_inc(b, binc);
31
5.04k
  int i[CCV_NNC_MAX_DIM + 2];
32
5.04k
  int x;
33
5.04k
  ccv_nnc_tensor_zero(b);
34
5.04k
  float* ap = a->data.f32;
35
5.04k
  float* const bp = b->data.f32;
36
5.04k
  // Non-optimal case, need to do skip if needed.
37
10.3k
  for (i[0] = 0; i[0] < adim[0]; 
i[0]++5.25k
)
38
5.25k
  {
39
5.25k
    float* const bp0 = bdim[0] == 1 ? 
bp5.19k
:
bp + i[0] * binc[1] * binc[2] * binc[3]62
;
40
11.1k
    for (i[1] = 0; i[1] < adim[1]; 
i[1]++5.93k
)
41
5.93k
    {
42
5.93k
      float* const bp1 = bdim[1] == 1 ? 
bp05.89k
:
bp0 + i[1] * binc[2] * binc[3]36
;
43
19.5k
      for (i[2] = 0; i[2] < adim[2]; 
i[2]++13.5k
)
44
13.5k
      {
45
13.5k
        float* const bp2 = bdim[2] == 1 ? 
bp113.4k
:
bp1 + i[2] * binc[3]140
;
46
13.5k
        if (bdim[3] == 1)
47
40.2k
          
for (x = 0; 10.9k
x < adim[3];
x++29.3k
)
48
29.3k
            bp2[0] += ap[x];
49
2.66k
        else
50
29.3k
          
for (x = 0; 2.66k
x < adim[3];
x++26.6k
)
51
26.6k
            bp2[x] += ap[x];
52
13.5k
        ap += ainc[3];
53
13.5k
      }
54
5.93k
      ap += (ainc[2] - adim[2]) * ainc[3];
55
5.93k
    }
56
5.25k
    ap += (ainc[1] - adim[1]) * ainc[2] * ainc[3];
57
5.25k
  }
58
5.04k
}
59
60
static int _ccv_nnc_reduce_sum_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
61
5.01k
{
62
5.01k
  assert(input_size == 1);
63
5.01k
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
64
5.01k
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
65
5.01k
  _ccv_nnc_reduce_sum_forw_cpu_ref(a, b);
66
5.01k
  return CCV_NNC_EXEC_SUCCESS;
67
5.01k
}
68
69
static int _ccv_nnc_reduce_sum_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
70
5.00k
{
71
5.00k
  if (inputs[0] == 0)
72
0
  {
73
0
    _ccv_nnc_tensor_set_cpu_ref((ccv_nnc_tensor_view_t*)outputs[0], 1);
74
0
    return CCV_NNC_EXEC_SUCCESS;
75
0
  }
76
5.00k
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)outputs[0];
77
5.00k
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[0];
78
5.00k
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
79
5.00k
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
80
5.00k
  // Assuming this is float 32.
81
5.00k
  int adim[CCV_NNC_MAX_DIM_ALLOC];
82
5.00k
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
83
5.00k
  ccv_nnc_tensor_view_get_dim(a, adim);
84
5.00k
  ccv_nnc_tensor_view_get_dim(b, bdim);
85
5.00k
  int ainc[CCV_NNC_MAX_DIM_ALLOC];
86
5.00k
  int binc[CCV_NNC_MAX_DIM_ALLOC];
87
5.00k
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
88
5.00k
  ccv_nnc_tensor_view_get_inc(a, ainc);
89
5.00k
  ccv_nnc_tensor_view_get_inc(b, binc);
90
5.00k
  int i[CCV_NNC_MAX_DIM + 2];
91
5.00k
  int x;
92
5.00k
  float* ap = a->data.f32;
93
5.00k
  float* const bp = b->data.f32;
94
5.00k
  // Non-optimal case, need to do skip if needed.
95
10.0k
  for (i[0] = 0; i[0] < adim[0]; 
i[0]++5.02k
)
96
5.02k
  {
97
5.02k
    float* const bp0 = bdim[0] == 1 ? 
bp5.00k
:
bp + i[0] * binc[1] * binc[2] * binc[3]16
;
98
10.0k
    for (i[1] = 0; i[1] < adim[1]; 
i[1]++5.07k
)
99
5.07k
    {
100
5.07k
      float* const bp1 = bdim[1] == 1 ? bp0 : 
bp0 + i[1] * binc[2] * binc[3]0
;
101
15.3k
      for (i[2] = 0; i[2] < adim[2]; 
i[2]++10.2k
)
102
10.2k
      {
103
10.2k
        float* const bp2 = bdim[2] == 1 ? bp1 : 
bp1 + i[2] * binc[3]0
;
104
10.2k
        if (bdim[3] == 1)
105
32.9k
          
for (x = 0; 10.2k
x < adim[3];
x++22.6k
)
106
22.6k
            ap[x] = bp2[0];
107
18
        else
108
184
          
for (x = 0; 18
x < adim[3];
x++166
)
109
166
            ap[x] = bp2[x];
110
10.2k
        ap += ainc[3];
111
10.2k
      }
112
5.07k
      ap += (ainc[2] - adim[2]) * ainc[3];
113
5.07k
    }
114
5.02k
    ap += (ainc[1] - adim[1]) * ainc[2] * ainc[3];
115
5.02k
  }
116
5.00k
  return CCV_NNC_EXEC_SUCCESS;
117
5.00k
}
118
119
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_SUM_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
120
1
{
121
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
122
1
  registry->tensor_datatypes = CCV_32F;
123
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
124
1
  registry->algorithms = 1;
125
1
  registry->exec = _ccv_nnc_reduce_sum_forw;
126
1
}
127
128
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_SUM_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
129
1
{
130
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
131
1
  registry->tensor_datatypes = CCV_32F;
132
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
133
1
  registry->algorithms = 1;
134
1
  registry->exec = _ccv_nnc_reduce_sum_back;
135
1
}