Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/reduce/ccv_nnc_reduce_max_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
// Shared methods.
14
#include "../_ccv_nnc_cpu_ref.h"
15
16
static int _ccv_nnc_reduce_max_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
17
4
{
18
4
  assert(input_size == 1);
19
4
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
20
4
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
21
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
22
4
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
23
  // Assuming this is float 32.
24
4
  int adim[CCV_NNC_MAX_DIM_ALLOC];
25
4
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
26
4
  ccv_nnc_tensor_view_get_dim(a, adim);
27
4
  ccv_nnc_tensor_view_get_dim(b, bdim);
28
4
  assert(ccv_nnc_tensor_view_check_broadcast_dim(b, adim));
29
4
  int astride[CCV_NNC_MAX_DIM_ALLOC];
30
4
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
31
4
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
32
4
  ccv_nnc_tensor_view_get_stride(a, astride);
33
4
  ccv_nnc_tensor_view_get_stride(b, bstride);
34
4
  int i[CCV_NNC_MAX_DIM + 2];
35
4
  int x;
36
4
  _ccv_nnc_tensor_set_cpu_ref_f32(b, -FLT_MAX);
37
4
  float* const ap = a->data.f32;
38
4
  float* const bp = b->data.f32;
39
  // Non-optimal case, need to do skip if needed.
40
8
  for (i[0] = 0; i[0] < adim[0]; 
i[0]++4
)
41
4
  {
42
4
    float* const ap0 = ap + i[0] * astride[0];
43
4
    float* const bp0 = bdim[0] == 1 ? bp : 
bp + i[0] * bstride[0]0
;
44
8
    for (i[1] = 0; i[1] < adim[1]; 
i[1]++4
)
45
4
    {
46
4
      float* ap1 = ap0 + i[1] * astride[1];
47
4
      float* const bp1 = bdim[1] == 1 ? bp0 : 
bp0 + i[1] * bstride[1]0
;
48
11
      for (i[2] = 0; i[2] < adim[2]; 
i[2]++7
)
49
7
      {
50
7
        float* const bp2 = bdim[2] == 1 ? 
bp15
:
bp1 + i[2] * bstride[2]2
;
51
7
        if (bdim[3] == 1)
52
3
        {
53
109
          for (x = 0; x < adim[3]; 
x++106
)
54
106
            if (ap1[x] > bp2[0])
55
11
              bp2[0] = ap1[x];
56
4
        } else {
57
16
          for (x = 0; x < adim[3]; 
x++12
)
58
12
            if (ap1[x] > bp2[x])
59
12
              bp2[x] = ap1[x];
60
4
        }
61
7
        ap1 += astride[2];
62
7
      }
63
4
    }
64
4
  }
65
4
  return CCV_NNC_EXEC_SUCCESS;
66
4
}
67
68
static int _ccv_nnc_reduce_max_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
69
1
{
70
1
  if (inputs[0] == 0)
71
0
  {
72
0
    ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
73
0
    ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
74
0
    ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
75
0
    assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2);
76
0
    assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
77
0
    assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
78
    // Assuming this is float 32.
79
0
    int hdim[CCV_NNC_MAX_DIM_ALLOC];
80
0
    int bdim[CCV_NNC_MAX_DIM_ALLOC];
81
0
    ccv_nnc_tensor_view_get_dim(h, hdim);
82
0
    ccv_nnc_tensor_view_get_dim(b, bdim);
83
0
    assert(ccv_nnc_tensor_view_check_broadcast_dim(b, hdim));
84
0
    assert(ccv_nnc_tensor_view_check_dim(a, hdim));
85
0
    int hstride[CCV_NNC_MAX_DIM_ALLOC];
86
0
    int astride[CCV_NNC_MAX_DIM_ALLOC];
87
0
    int bstride[CCV_NNC_MAX_DIM_ALLOC];
88
0
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
89
0
    ccv_nnc_tensor_view_get_stride(h, hstride);
90
0
    ccv_nnc_tensor_view_get_stride(a, astride);
91
0
    ccv_nnc_tensor_view_get_stride(b, bstride);
92
0
    int i[CCV_NNC_MAX_DIM + 2];
93
0
    int x;
94
0
    float* const hp = h->data.f32;
95
0
    float* const ap = a->data.f32;
96
0
    float* const bp = b->data.f32;
97
0
    ccv_nnc_tensor_zero(h);
98
    // Non-optimal case, need to do skip if needed.
99
0
    for (i[0] = 0; i[0] < hdim[0]; i[0]++)
100
0
    {
101
0
      float* const ap0 = ap + i[0] * astride[0];
102
0
      float* const hp0 = hp + i[0] * hstride[0];
103
0
      float* const bp0 = bdim[0] == 1 ? bp : bp + i[0] * bstride[0];
104
0
      for (i[1] = 0; i[1] < hdim[1]; i[1]++)
105
0
      {
106
0
        float* ap1 = ap0 + i[1] * astride[1];
107
0
        float* hp1 = hp0 + i[1] * hstride[1];
108
0
        float* const bp1 = bdim[1] == 1 ? bp0 : bp0 + i[1] * bstride[1];
109
0
        for (i[2] = 0; i[2] < hdim[2]; i[2]++)
110
0
        {
111
0
          float* const bp2 = bdim[2] == 1 ? bp1 : bp1 + i[2] * bstride[2];
112
0
          if (bdim[3] == 1)
113
0
          {
114
0
            for (x = 0; x < hdim[3]; x++)
115
0
              if (ap1[x] == bp2[0])
116
0
                hp1[x] = 1;
117
0
          } else {
118
0
            for (x = 0; x < hdim[3]; x++)
119
0
              if (ap1[x] == bp2[x])
120
0
                hp1[x] = 1;
121
0
          }
122
0
          hp1 += hstride[2];
123
0
          ap1 += astride[2];
124
0
        }
125
0
      }
126
0
    }
127
0
    return CCV_NNC_EXEC_SUCCESS;
128
0
  }
129
1
  ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
130
1
  ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
131
1
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
132
1
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
133
1
  assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2);
134
1
  assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2);
135
1
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
136
1
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
137
  // Assuming this is float 32.
138
1
  int hdim[CCV_NNC_MAX_DIM_ALLOC];
139
1
  int gdim[CCV_NNC_MAX_DIM_ALLOC];
140
1
  ccv_nnc_tensor_view_get_dim(h, hdim);
141
1
  ccv_nnc_tensor_view_get_dim(g, gdim);
142
1
  assert(ccv_nnc_tensor_view_check_broadcast_dim(g, hdim));
143
1
  assert(ccv_nnc_tensor_view_check_dim(a, hdim));
144
1
  assert(ccv_nnc_tensor_view_check_dim(b, gdim));
145
1
  int hstride[CCV_NNC_MAX_DIM_ALLOC];
146
1
  int gstride[CCV_NNC_MAX_DIM_ALLOC];
147
1
  int astride[CCV_NNC_MAX_DIM_ALLOC];
148
1
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
149
1
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
150
1
  ccv_nnc_tensor_view_get_stride(h, hstride);
151
1
  ccv_nnc_tensor_view_get_stride(g, gstride);
152
1
  ccv_nnc_tensor_view_get_stride(a, astride);
153
1
  ccv_nnc_tensor_view_get_stride(b, bstride);
154
1
  int i[CCV_NNC_MAX_DIM + 2];
155
1
  int x;
156
1
  float* const hp = h->data.f32;
157
1
  float* const gp = g->data.f32;
158
1
  float* const ap = a->data.f32;
159
1
  float* const bp = b->data.f32;
160
1
  ccv_nnc_tensor_zero(h);
161
  // Non-optimal case, need to do skip if needed.
162
2
  for (i[0] = 0; i[0] < hdim[0]; 
i[0]++1
)
163
1
  {
164
1
    float* const ap0 = ap + i[0] * astride[0];
165
1
    float* const hp0 = hp + i[0] * hstride[0];
166
1
    float* const gp0 = gdim[0] == 1 ? gp : 
gp + i[0] * gstride[0]0
;
167
1
    float* const bp0 = gdim[0] == 1 ? bp : 
bp + i[0] * bstride[0]0
;
168
2
    for (i[1] = 0; i[1] < hdim[1]; 
i[1]++1
)
169
1
    {
170
1
      float* ap1 = ap0 + i[1] * astride[1];
171
1
      float* hp1 = hp0 + i[1] * hstride[1];
172
1
      float* const gp1 = gdim[1] == 1 ? gp0 : 
gp0 + i[1] * gstride[1]0
;
173
1
      float* const bp1 = gdim[1] == 1 ? bp0 : 
bp0 + i[1] * bstride[1]0
;
174
2
      for (i[2] = 0; i[2] < hdim[2]; 
i[2]++1
)
175
1
      {
176
1
        float* const gp2 = gdim[2] == 1 ? gp1 : 
gp1 + i[2] * gstride[2]0
;
177
1
        float* const bp2 = gdim[2] == 1 ? bp1 : 
bp1 + i[2] * bstride[2]0
;
178
1
        if (gdim[3] == 1)
179
1
        {
180
101
          for (x = 0; x < hdim[3]; 
x++100
)
181
100
            if (ap1[x] == bp2[0])
182
1
              hp1[x] = gp2[0];
183
1
        } else {
184
0
          for (x = 0; x < hdim[3]; x++)
185
0
            if (ap1[x] == bp2[x])
186
0
              hp1[x] = gp2[x];
187
0
        }
188
1
        hp1 += hstride[2];
189
1
        ap1 += astride[2];
190
1
      }
191
1
    }
192
1
  }
193
1
  return CCV_NNC_EXEC_SUCCESS;
194
1
}
195
196
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_MAX_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
197
1
{
198
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
199
1
  registry->tensor_datatypes = CCV_32F;
200
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
201
1
  registry->algorithms = 1;
202
1
  registry->exec = _ccv_nnc_reduce_max_forw;
203
1
}
204
205
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_MAX_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
206
1
{
207
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
208
1
  registry->tensor_datatypes = CCV_32F;
209
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
210
1
  registry->algorithms = 1;
211
1
  registry->exec = _ccv_nnc_reduce_max_back;
212
1
}