Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/reduce/ccv_nnc_reduce_min_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
// Shared methods.
14
#include "../_ccv_nnc_cpu_ref.h"
15
16
static int _ccv_nnc_reduce_min_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
17
3
{
18
3
  assert(input_size == 1);
19
3
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
20
3
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
21
3
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
22
3
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
23
  // Assuming this is float 32.
24
3
  int adim[CCV_NNC_MAX_DIM_ALLOC];
25
3
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
26
3
  ccv_nnc_tensor_view_get_dim(a, adim);
27
3
  ccv_nnc_tensor_view_get_dim(b, bdim);
28
3
  assert(ccv_nnc_tensor_view_check_broadcast_dim(b, adim));
29
3
  int astride[CCV_NNC_MAX_DIM_ALLOC];
30
3
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
31
3
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
32
3
  ccv_nnc_tensor_view_get_stride(a, astride);
33
3
  ccv_nnc_tensor_view_get_stride(b, bstride);
34
3
  int i[CCV_NNC_MAX_DIM + 2];
35
3
  int x;
36
3
  _ccv_nnc_tensor_set_cpu_ref_f32(b, FLT_MAX);
37
3
  float* const ap = a->data.f32;
38
3
  float* const bp = b->data.f32;
39
  // Non-optimal case, need to do skip if needed.
40
6
  for (i[0] = 0; i[0] < adim[0]; 
i[0]++3
)
41
3
  {
42
3
    float* const ap0 = ap + i[0] * astride[0];
43
3
    float* const bp0 = bdim[0] == 1 ? bp : 
bp + i[0] * bstride[0]0
;
44
6
    for (i[1] = 0; i[1] < adim[1]; 
i[1]++3
)
45
3
    {
46
3
      float* ap1 = ap0 + i[1] * astride[1];
47
3
      float* const bp1 = bdim[1] == 1 ? bp0 : 
bp0 + i[1] * bstride[1]0
;
48
9
      for (i[2] = 0; i[2] < adim[2]; 
i[2]++6
)
49
6
      {
50
6
        float* const bp2 = bdim[2] == 1 ? 
bp14
:
bp1 + i[2] * bstride[2]2
;
51
6
        if (bdim[3] == 1)
52
2
        {
53
8
          for (x = 0; x < adim[3]; 
x++6
)
54
6
            if (ap1[x] < bp2[0])
55
2
              bp2[0] = ap1[x];
56
4
        } else {
57
16
          for (x = 0; x < adim[3]; 
x++12
)
58
12
            if (ap1[x] < bp2[x])
59
6
              bp2[x] = ap1[x];
60
4
        }
61
6
        ap1 += astride[2];
62
6
      }
63
3
    }
64
3
  }
65
3
  return CCV_NNC_EXEC_SUCCESS;
66
3
}
67
68
static int _ccv_nnc_reduce_min_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
69
0
{
70
0
  if (inputs[0] == 0)
71
0
  {
72
0
    ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
73
0
    ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
74
0
    ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
75
0
    assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2);
76
0
    assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
77
0
    assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
78
    // Assuming this is float 32.
79
0
    int hdim[CCV_NNC_MAX_DIM_ALLOC];
80
0
    int bdim[CCV_NNC_MAX_DIM_ALLOC];
81
0
    ccv_nnc_tensor_view_get_dim(h, hdim);
82
0
    ccv_nnc_tensor_view_get_dim(b, bdim);
83
0
    assert(ccv_nnc_tensor_view_check_broadcast_dim(b, hdim));
84
0
    assert(ccv_nnc_tensor_view_check_dim(a, hdim));
85
0
    int hstride[CCV_NNC_MAX_DIM_ALLOC];
86
0
    int astride[CCV_NNC_MAX_DIM_ALLOC];
87
0
    int bstride[CCV_NNC_MAX_DIM_ALLOC];
88
0
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
89
0
    ccv_nnc_tensor_view_get_stride(h, hstride);
90
0
    ccv_nnc_tensor_view_get_stride(a, astride);
91
0
    ccv_nnc_tensor_view_get_stride(b, bstride);
92
0
    int i[CCV_NNC_MAX_DIM + 2];
93
0
    int x;
94
0
    float* const hp = h->data.f32;
95
0
    float* const ap = a->data.f32;
96
0
    float* const bp = b->data.f32;
97
0
    ccv_nnc_tensor_zero(h);
98
    // Non-optimal case, need to do skip if needed.
99
0
    for (i[0] = 0; i[0] < hdim[0]; i[0]++)
100
0
    {
101
0
      float* const ap0 = ap + i[0] * astride[0];
102
0
      float* const hp0 = hp + i[0] * hstride[0];
103
0
      float* const bp0 = bdim[0] == 1 ? bp : bp + i[0] * bstride[0];
104
0
      for (i[1] = 0; i[1] < hdim[1]; i[1]++)
105
0
      {
106
0
        float* ap1 = ap0 + i[1] * astride[1];
107
0
        float* hp1 = hp0 + i[1] * hstride[1];
108
0
        float* const bp1 = bdim[1] == 1 ? bp0 : bp0 + i[1] * bstride[1];
109
0
        for (i[2] = 0; i[2] < hdim[2]; i[2]++)
110
0
        {
111
0
          float* const bp2 = bdim[2] == 1 ? bp1 : bp1 + i[2] * bstride[2];
112
0
          if (bdim[3] == 1)
113
0
          {
114
0
            for (x = 0; x < hdim[3]; x++)
115
0
              if (ap1[x] == bp2[0])
116
0
                hp1[x] = 1;
117
0
          } else {
118
0
            for (x = 0; x < hdim[3]; x++)
119
0
              if (ap1[x] == bp2[x])
120
0
                hp1[x] = 1;
121
0
          }
122
0
          hp1 += hstride[2];
123
0
          ap1 += astride[2];
124
0
        }
125
0
      }
126
0
    }
127
0
    return CCV_NNC_EXEC_SUCCESS;
128
0
  }
129
0
  ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
130
0
  ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
131
0
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
132
0
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
133
0
  assert(ccv_nnc_tensor_nd(h->info.dim) <= CCV_NNC_MAX_DIM + 2);
134
0
  assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2);
135
0
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
136
0
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
137
  // Assuming this is float 32.
138
0
  int hdim[CCV_NNC_MAX_DIM_ALLOC];
139
0
  int gdim[CCV_NNC_MAX_DIM_ALLOC];
140
0
  ccv_nnc_tensor_view_get_dim(h, hdim);
141
0
  ccv_nnc_tensor_view_get_dim(g, gdim);
142
0
  assert(ccv_nnc_tensor_view_check_broadcast_dim(g, hdim));
143
0
  assert(ccv_nnc_tensor_view_check_dim(a, hdim));
144
0
  assert(ccv_nnc_tensor_view_check_dim(b, gdim));
145
0
  int hstride[CCV_NNC_MAX_DIM_ALLOC];
146
0
  int gstride[CCV_NNC_MAX_DIM_ALLOC];
147
0
  int astride[CCV_NNC_MAX_DIM_ALLOC];
148
0
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
149
0
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
150
0
  ccv_nnc_tensor_view_get_stride(h, hstride);
151
0
  ccv_nnc_tensor_view_get_stride(g, gstride);
152
0
  ccv_nnc_tensor_view_get_stride(a, astride);
153
0
  ccv_nnc_tensor_view_get_stride(b, bstride);
154
0
  int i[CCV_NNC_MAX_DIM + 2];
155
0
  int x;
156
0
  float* const hp = h->data.f32;
157
0
  float* const gp = g->data.f32;
158
0
  float* const ap = a->data.f32;
159
0
  float* const bp = b->data.f32;
160
0
  ccv_nnc_tensor_zero(h);
161
  // Non-optimal case, need to do skip if needed.
162
0
  for (i[0] = 0; i[0] < hdim[0]; i[0]++)
163
0
  {
164
0
    float* const ap0 = ap + i[0] * astride[0];
165
0
    float* const hp0 = hp + i[0] * hstride[0];
166
0
    float* const gp0 = gdim[0] == 1 ? gp : gp + i[0] * gstride[0];
167
0
    float* const bp0 = gdim[0] == 1 ? bp : bp + i[0] * bstride[0];
168
0
    for (i[1] = 0; i[1] < hdim[1]; i[1]++)
169
0
    {
170
0
      float* ap1 = ap0 + i[1] * astride[1];
171
0
      float* hp1 = hp0 + i[1] * hstride[1];
172
0
      float* const gp1 = gdim[1] == 1 ? gp0 : gp0 + i[1] * gstride[1];
173
0
      float* const bp1 = gdim[1] == 1 ? bp0 : bp0 + i[1] * bstride[1];
174
0
      for (i[2] = 0; i[2] < hdim[2]; i[2]++)
175
0
      {
176
0
        float* const gp2 = gdim[2] == 1 ? gp1 : gp1 + i[2] * gstride[2];
177
0
        float* const bp2 = gdim[2] == 1 ? bp1 : bp1 + i[2] * bstride[2];
178
0
        if (gdim[3] == 1)
179
0
        {
180
0
          for (x = 0; x < hdim[3]; x++)
181
0
            if (ap1[x] == bp2[0])
182
0
              hp1[x] = gp2[0];
183
0
        } else {
184
0
          for (x = 0; x < hdim[3]; x++)
185
0
            if (ap1[x] == bp2[x])
186
0
              hp1[x] = gp2[x];
187
0
        }
188
0
        hp1 += hstride[2];
189
0
        ap1 += astride[2];
190
0
      }
191
0
    }
192
0
  }
193
0
  return CCV_NNC_EXEC_SUCCESS;
194
0
}
195
196
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_MIN_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
197
1
{
198
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
199
1
  registry->tensor_datatypes = CCV_32F;
200
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
201
1
  registry->algorithms = 1;
202
1
  registry->exec = _ccv_nnc_reduce_min_forw;
203
1
}
204
205
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_MIN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
206
1
{
207
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
208
1
  registry->tensor_datatypes = CCV_32F;
209
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
210
1
  registry->algorithms = 1;
211
1
  registry->exec = _ccv_nnc_reduce_min_back;
212
1
}