Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/reduce/ccv_nnc_reduce_sum_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
// Shared methods.
14
#include "../_ccv_nnc_cpu_ref.h"
15
16
void _ccv_nnc_reduce_sum_forw_cpu_ref(ccv_nnc_tensor_view_t* const a, ccv_nnc_tensor_view_t* const b)
17
6.08k
{
18
6.08k
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
19
6.08k
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
20
  // Assuming this is float 32.
21
6.08k
  int adim[CCV_NNC_MAX_DIM_ALLOC];
22
6.08k
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
23
6.08k
  ccv_nnc_tensor_view_get_dim(a, adim);
24
6.08k
  ccv_nnc_tensor_view_get_dim(b, bdim);
25
6.08k
  assert(ccv_nnc_tensor_view_check_broadcast_dim(b, adim));
26
6.08k
  int astride[CCV_NNC_MAX_DIM_ALLOC];
27
6.08k
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
28
6.08k
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
29
6.08k
  ccv_nnc_tensor_view_get_stride(a, astride);
30
6.08k
  ccv_nnc_tensor_view_get_stride(b, bstride);
31
6.08k
  int i[CCV_NNC_MAX_DIM + 2];
32
6.08k
  int x;
33
6.08k
  ccv_nnc_tensor_zero(b);
34
6.08k
  float* const ap = a->data.f32;
35
6.08k
  float* const bp = b->data.f32;
36
  // Non-optimal case, need to do skip if needed.
37
12.5k
  for (i[0] = 0; i[0] < adim[0]; 
i[0]++6.44k
)
38
6.44k
  {
39
6.44k
    float* const ap0 = ap + i[0] * astride[0];
40
6.44k
    float* const bp0 = bdim[0] == 1 ? 
bp6.20k
:
bp + i[0] * bstride[0]244
;
41
14.1k
    for (i[1] = 0; i[1] < adim[1]; 
i[1]++7.73k
)
42
7.73k
    {
43
7.73k
      float* ap1 = ap0 + i[1] * astride[1];
44
7.73k
      float* const bp1 = bdim[1] == 1 ? 
bp07.22k
:
bp0 + i[1] * bstride[1]516
;
45
26.4k
      for (i[2] = 0; i[2] < adim[2]; 
i[2]++18.7k
)
46
18.7k
      {
47
18.7k
        float* const bp2 = bdim[2] == 1 ? 
bp117.7k
:
bp1 + i[2] * bstride[2]978
;
48
18.7k
        if (bdim[3] == 1)
49
65.8k
          
for (x = 0; 14.7k
x < adim[3];
x++51.1k
)
50
51.1k
            bp2[0] += ap1[x];
51
4.01k
        else
52
38.3k
          
for (x = 0; 4.01k
x < adim[3];
x++34.3k
)
53
34.3k
            bp2[x] += ap1[x];
54
18.7k
        ap1 += astride[2];
55
18.7k
      }
56
7.73k
    }
57
6.44k
  }
58
6.08k
}
59
60
static int _ccv_nnc_reduce_sum_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
61
6.03k
{
62
6.03k
  assert(input_size == 1);
63
6.03k
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
64
6.03k
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
65
6.03k
  _ccv_nnc_reduce_sum_forw_cpu_ref(a, b);
66
6.03k
  return CCV_NNC_EXEC_SUCCESS;
67
6.03k
}
68
69
static int _ccv_nnc_reduce_sum_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
70
6.01k
{
71
6.01k
  if (inputs[0] == 0)
72
0
  {
73
0
    _ccv_nnc_tensor_set_cpu_ref_f32((ccv_nnc_tensor_view_t*)outputs[0], 1);
74
0
    return CCV_NNC_EXEC_SUCCESS;
75
0
  }
76
6.01k
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)outputs[0];
77
6.01k
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[0];
78
6.01k
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
79
6.01k
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
80
  // Assuming this is float 32.
81
6.01k
  int adim[CCV_NNC_MAX_DIM_ALLOC];
82
6.01k
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
83
6.01k
  ccv_nnc_tensor_view_get_dim(a, adim);
84
6.01k
  ccv_nnc_tensor_view_get_dim(b, bdim);
85
6.01k
  int astride[CCV_NNC_MAX_DIM_ALLOC];
86
6.01k
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
87
6.01k
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
88
6.01k
  ccv_nnc_tensor_view_get_stride(a, astride);
89
6.01k
  ccv_nnc_tensor_view_get_stride(b, bstride);
90
6.01k
  int i[CCV_NNC_MAX_DIM + 2];
91
6.01k
  int x;
92
6.01k
  float* const ap = a->data.f32;
93
6.01k
  float* const bp = b->data.f32;
94
  // Non-optimal case, need to do skip if needed.
95
12.1k
  for (i[0] = 0; i[0] < adim[0]; 
i[0]++6.09k
)
96
6.09k
  {
97
6.09k
    float* const ap0 = ap + i[0] * astride[0];
98
6.09k
    float* const bp0 = bdim[0] == 1 ? 
bp6.00k
:
bp + i[0] * bstride[0]88
;
99
12.4k
    for (i[1] = 0; i[1] < adim[1]; 
i[1]++6.36k
)
100
6.36k
    {
101
6.36k
      float* ap1 = ap0 + i[1] * astride[1];
102
6.36k
      float* const bp1 = bdim[1] == 1 ? 
bp06.17k
:
bp0 + i[1] * bstride[1]192
;
103
19.7k
      for (i[2] = 0; i[2] < adim[2]; 
i[2]++13.4k
)
104
13.4k
      {
105
13.4k
        float* const bp2 = bdim[2] == 1 ? 
bp113.1k
:
bp1 + i[2] * bstride[2]256
;
106
13.4k
        if (bdim[3] == 1)
107
45.9k
          
for (x = 0; 12.8k
x < adim[3];
x++33.0k
)
108
33.0k
            ap1[x] = bp2[0];
109
530
        else
110
3.25k
          
for (x = 0; 530
x < adim[3];
x++2.72k
)
111
2.72k
            ap1[x] = bp2[x];
112
13.4k
        ap1 += astride[2];
113
13.4k
      }
114
6.36k
    }
115
6.09k
  }
116
6.01k
  return CCV_NNC_EXEC_SUCCESS;
117
6.01k
}
118
119
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_SUM_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
120
1
{
121
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
122
1
  registry->tensor_datatypes = CCV_32F;
123
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
124
1
  registry->algorithms = 1;
125
1
  registry->exec = _ccv_nnc_reduce_sum_forw;
126
1
}
127
128
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_SUM_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
129
1
{
130
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
131
1
  registry->tensor_datatypes = CCV_32F;
132
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
133
1
  registry->algorithms = 1;
134
1
  registry->exec = _ccv_nnc_reduce_sum_back;
135
1
}