Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/compare/ccv_nnc_min_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_min_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
4
{
15
4
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
16
4
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[1];
17
4
  ccv_nnc_tensor_view_t* const c = (ccv_nnc_tensor_view_t*)outputs[0];
18
  // Assuming this is float 32.
19
4
  int dim[CCV_NNC_MAX_DIM_ALLOC];
20
4
  int astride[CCV_NNC_MAX_DIM_ALLOC];
21
4
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
22
4
  int cstride[CCV_NNC_MAX_DIM_ALLOC];
23
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
24
4
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
25
4
  assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2);
26
4
  ccv_nnc_tensor_view_get_dim(a, dim);
27
4
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
28
4
  assert(ccv_nnc_tensor_view_check_dim(c, dim));
29
4
  int x;
30
4
  if (!CCV_IS_TENSOR_VIEW(a) && 
!3
CCV_IS_TENSOR_VIEW3
(b) &&
!3
CCV_IS_TENSOR_VIEW3
(c))
31
3
  {
32
    // Super optimal case, just do one for-loop for sum.
33
3
    const int tensor_count = ccv_nnc_tensor_count(a->info);
34
1.02k
    for (x = 0; x < tensor_count; 
x++1.02k
)
35
1.02k
      c->data.f32[x] = ccv_min(a->data.f32[x], b->data.f32[x]);
36
3
    return CCV_NNC_EXEC_SUCCESS;
37
3
  }
38
1
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
39
1
  ccv_nnc_tensor_view_get_stride(a, astride);
40
1
  ccv_nnc_tensor_view_get_stride(b, bstride);
41
1
  ccv_nnc_tensor_view_get_stride(c, cstride);
42
1
  int i[CCV_NNC_MAX_DIM + 2];
43
1
  float* const ap = a->data.f32;
44
1
  float* const bp = b->data.f32;
45
1
  float* const cp = c->data.f32;
46
1
  const int count = dim[2] * dim[3];
47
1
  if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3])
48
1
  {
49
    // Special casing if the ainc[3] is the same as dim[3]
50
2
    for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
51
1
    {
52
1
      float* ap0 = ap + i[0] * astride[0];
53
1
      float* bp0 = bp + i[0] * bstride[0];
54
1
      float* cp0 = cp + i[0] * cstride[0];
55
3
      for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
56
2
      {
57
14
        for (x = 0; x < count; 
x++12
)
58
12
          cp0[x] = ccv_min(ap0[x], bp0[x]);
59
2
        ap0 += astride[1];
60
2
        bp0 += bstride[1];
61
2
        cp0 += cstride[1];
62
2
      }
63
1
    }
64
1
    return CCV_NNC_EXEC_SUCCESS;
65
1
  }
66
  // Non-optimal case, need to do skip copy.
67
0
  for (i[0] = 0; i[0] < dim[0]; i[0]++)
68
0
  {
69
0
    float* const ap0 = ap + i[0] * astride[0];
70
0
    float* const bp0 = bp + i[0] * bstride[0];
71
0
    float* const cp0 = cp + i[0] * cstride[0];
72
0
    for (i[1] = 0; i[1] < dim[1]; i[1]++)
73
0
    {
74
0
      float* ap1 = ap0 + i[1] * astride[1];
75
0
      float* bp1 = bp0 + i[1] * bstride[1];
76
0
      float* cp1 = cp0 + i[1] * cstride[1];
77
0
      for (i[2] = 0; i[2] < dim[2]; i[2]++)
78
0
      {
79
0
        for (x = 0; x < dim[3]; x++)
80
0
          cp1[x] = ccv_min(ap1[x], bp1[x]);
81
0
        ap1 += astride[2];
82
0
        bp1 += bstride[2];
83
0
        cp1 += cstride[2];
84
0
      }
85
0
    }
86
0
  }
87
0
  return CCV_NNC_EXEC_SUCCESS;
88
1
}
89
90
static int _ccv_nnc_min_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
91
5
{
92
5
  ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
93
5
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
94
5
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
95
5
  ccv_nnc_tensor_view_t* const ha = (ccv_nnc_tensor_view_t*)outputs[0];
96
5
  ccv_nnc_tensor_view_t* const hb = (ccv_nnc_tensor_view_t*)outputs[1];
97
  // Assuming this is float 32.
98
5
  int dim[CCV_NNC_MAX_DIM_ALLOC];
99
5
  int gstride[CCV_NNC_MAX_DIM_ALLOC];
100
5
  int astride[CCV_NNC_MAX_DIM_ALLOC];
101
5
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
102
5
  int hastride[CCV_NNC_MAX_DIM_ALLOC];
103
5
  int hbstride[CCV_NNC_MAX_DIM_ALLOC];
104
5
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
105
5
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
106
5
  assert(ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2);
107
5
  assert(ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2);
108
5
  ccv_nnc_tensor_view_get_dim(a, dim);
109
5
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
110
5
  assert(ccv_nnc_tensor_view_check_dim(ha, dim));
111
5
  assert(ccv_nnc_tensor_view_check_dim(hb, dim));
112
5
  if (g)
113
3
  {
114
3
    assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2);
115
3
    assert(ccv_nnc_tensor_view_check_dim(g, dim));
116
3
    int x;
117
3
    if (!CCV_IS_TENSOR_VIEW(g) && !CCV_IS_TENSOR_VIEW(a) && 
!2
CCV_IS_TENSOR_VIEW2
(b) &&
!2
CCV_IS_TENSOR_VIEW2
(ha) &&
!2
CCV_IS_TENSOR_VIEW2
(hb))
118
2
    {
119
      // Super optimal case, just do one for-loop for sum.
120
2
      const int tensor_count = ccv_nnc_tensor_count(a->info);
121
1.01k
      for (x = 0; x < tensor_count; 
x++1.01k
)
122
1.01k
        if (a->data.f32[x] < b->data.f32[x])
123
497
        {
124
497
          ha->data.f32[x] = g->data.f32[x];
125
497
          hb->data.f32[x] = 0;
126
515
        } else if (a->data.f32[x] > b->data.f32[x]) {
127
512
          hb->data.f32[x] = g->data.f32[x];
128
512
          ha->data.f32[x] = 0;
129
512
        } else
130
3
          ha->data.f32[x] = hb->data.f32[x] = g->data.f32[x];
131
2
      return CCV_NNC_EXEC_SUCCESS;
132
2
    }
133
1
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
134
1
    ccv_nnc_tensor_view_get_stride(g, gstride);
135
1
    ccv_nnc_tensor_view_get_stride(a, astride);
136
1
    ccv_nnc_tensor_view_get_stride(b, bstride);
137
1
    ccv_nnc_tensor_view_get_stride(ha, hastride);
138
1
    ccv_nnc_tensor_view_get_stride(hb, hbstride);
139
1
    int i[CCV_NNC_MAX_DIM + 2];
140
1
    float* const gp = g->data.f32;
141
1
    float* const ap = a->data.f32;
142
1
    float* const bp = b->data.f32;
143
1
    float* const hap = ha->data.f32;
144
1
    float* const hbp = hb->data.f32;
145
1
    const int count = dim[2] * dim[3];
146
1
    if (astride[2] == dim[3] && bstride[2] == dim[3] && hastride[2] == dim[3] && hbstride[2] == dim[3])
147
1
    {
148
      // Special casing if the ainc[3] is the same as dim[3]
149
2
      for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
150
1
      {
151
1
        float* gp0 = gp + i[0] * gstride[0];
152
1
        float* ap0 = ap + i[0] * astride[0];
153
1
        float* bp0 = bp + i[0] * bstride[0];
154
1
        float* hap0 = hap + i[0] * hastride[0];
155
1
        float* hbp0 = hbp + i[0] * hbstride[0];
156
3
        for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
157
2
        {
158
14
          for (x = 0; x < count; 
x++12
)
159
12
            if (ap0[x] < bp0[x]) {
160
0
              hap0[x] = gp0[x];
161
0
              hbp0[x] = 0;
162
12
            } else if (ap0[x] > bp0[x]) {
163
9
              hbp0[x] = gp0[x];
164
9
              hap0[x] = 0;
165
9
            } else
166
3
              hap0[x] = hbp0[x] = gp0[x];
167
2
          gp0 += gstride[1];
168
2
          ap0 += astride[1];
169
2
          bp0 += bstride[1];
170
2
          hap0 += hastride[1];
171
2
          hbp0 += hbstride[1];
172
2
        }
173
1
      }
174
1
      return CCV_NNC_EXEC_SUCCESS;
175
1
    }
176
    // Non-optimal case, need to do skip copy.
177
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
178
0
    {
179
0
      float* const gp0 = gp + i[0] * gstride[0];
180
0
      float* const ap0 = ap + i[0] * astride[0];
181
0
      float* const bp0 = bp + i[0] * bstride[0];
182
0
      float* const hap0 = hap + i[0] * hastride[0];
183
0
      float* const hbp0 = hbp + i[0] * hbstride[0];
184
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
185
0
      {
186
0
        float* gp1 = gp0 + i[1] * gstride[1];
187
0
        float* ap1 = ap0 + i[1] * astride[1];
188
0
        float* bp1 = bp0 + i[1] * bstride[1];
189
0
        float* hap1 = hap0 + i[1] * hastride[1];
190
0
        float* hbp1 = hbp0 + i[1] * hbstride[1];
191
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
192
0
        {
193
0
          for (x = 0; x < dim[3]; x++)
194
0
            if (ap1[x] < bp1[x]) {
195
0
              hap1[x] = gp1[x];
196
0
              hbp1[x] = 0;
197
0
            } else if (ap1[x] > bp1[x]) {
198
0
              hbp1[x] = gp1[x];
199
0
              hap1[x] = 0;
200
0
            } else
201
0
              hap1[x] = hbp1[x] = gp1[x];
202
0
          gp1 += gstride[2];
203
0
          ap1 += astride[2];
204
0
          bp1 += bstride[2];
205
0
          hap1 += hastride[2];
206
0
          hbp1 += hbstride[2];
207
0
        }
208
0
      }
209
0
    }
210
2
  } else {
211
2
    int x;
212
2
    if (!CCV_IS_TENSOR_VIEW(a) && 
!1
CCV_IS_TENSOR_VIEW1
(b) &&
!1
CCV_IS_TENSOR_VIEW1
(ha) &&
!1
CCV_IS_TENSOR_VIEW1
(hb))
213
1
    {
214
      // Super optimal case, just do one for-loop for sum.
215
1
      const int tensor_count = ccv_nnc_tensor_count(a->info);
216
13
      for (x = 0; x < tensor_count; 
x++12
)
217
12
        if (a->data.f32[x] < b->data.f32[x]) {
218
0
          ha->data.f32[x] = 1;
219
0
          hb->data.f32[x] = 0;
220
12
        } else if (a->data.f32[x] > b->data.f32[x]) {
221
9
          ha->data.f32[x] = 0;
222
9
          hb->data.f32[x] = 1;
223
9
        } else
224
3
          ha->data.f32[x] = hb->data.f32[x] = 1;
225
1
      return CCV_NNC_EXEC_SUCCESS;
226
1
    }
227
1
    assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
228
1
    ccv_nnc_tensor_view_get_stride(a, astride);
229
1
    ccv_nnc_tensor_view_get_stride(b, bstride);
230
1
    ccv_nnc_tensor_view_get_stride(ha, hastride);
231
1
    ccv_nnc_tensor_view_get_stride(hb, hbstride);
232
1
    int i[CCV_NNC_MAX_DIM + 2];
233
1
    float* const ap = a->data.f32;
234
1
    float* const bp = b->data.f32;
235
1
    float* const hap = ha->data.f32;
236
1
    float* const hbp = hb->data.f32;
237
1
    const int count = dim[2] * dim[3];
238
1
    if (astride[2] == dim[3] && bstride[2] == dim[3] && hastride[2] == dim[3] && hbstride[2] == dim[3])
239
1
    {
240
      // Special casing if the ainc[3] is the same as dim[3]
241
2
      for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
242
1
      {
243
1
        float* ap0 = ap + i[0] * astride[0];
244
1
        float* bp0 = bp + i[0] * bstride[0];
245
1
        float* hap0 = hap + i[0] * hastride[0];
246
1
        float* hbp0 = hbp + i[0] * hbstride[0];
247
3
        for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
248
2
        {
249
14
          for (x = 0; x < count; 
x++12
)
250
12
            if (ap0[x] < bp0[x]) {
251
0
              hap0[x] = 1;
252
0
              hbp0[x] = 0;
253
12
            } else if (ap0[x] > bp0[x]) {
254
9
              hap0[x] = 0;
255
9
              hbp0[x] = 1;
256
9
            } else
257
3
              hap0[x] = hbp0[x] = 1;
258
2
          ap0 += astride[1];
259
2
          bp0 += bstride[1];
260
2
          hap0 += hastride[1];
261
2
          hbp0 += hbstride[1];
262
2
        }
263
1
      }
264
1
      return CCV_NNC_EXEC_SUCCESS;
265
1
    }
266
    // Non-optimal case, need to do skip copy.
267
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
268
0
    {
269
0
      float* const ap0 = ap + i[0] * astride[0];
270
0
      float* const bp0 = bp + i[0] * bstride[0];
271
0
      float* const hap0 = hap + i[0] * hastride[0];
272
0
      float* const hbp0 = hbp + i[0] * hbstride[0];
273
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
274
0
      {
275
0
        float* ap1 = ap0 + i[1] * astride[1];
276
0
        float* bp1 = bp0 + i[1] * bstride[1];
277
0
        float* hap1 = hap0 + i[1] * hastride[1];
278
0
        float* hbp1 = hbp0 + i[1] * hbstride[1];
279
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
280
0
        {
281
0
          for (x = 0; x < dim[3]; x++)
282
0
            if (ap1[x] < bp1[x]) {
283
0
              hap1[x] = 1;
284
0
              hbp1[x] = 0;
285
0
            } else if (ap1[x] > bp1[x]) {
286
0
              hap1[x] = 0;
287
0
              hbp1[x] = 1;
288
0
            } else
289
0
              hap1[x] = hbp1[x] = 1;
290
0
          ap1 += astride[2];
291
0
          bp1 += bstride[2];
292
0
          hap1 += hastride[2];
293
0
          hbp1 += hbstride[2];
294
0
        }
295
0
      }
296
0
    }
297
0
  }
298
0
  return CCV_NNC_EXEC_SUCCESS;
299
5
}
300
301
REGISTER_COMMAND_BACKEND(CCV_NNC_MIN_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
302
1
{
303
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
304
1
  registry->tensor_datatypes = CCV_32F;
305
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
306
1
  registry->algorithms = 1;
307
1
  registry->exec = _ccv_nnc_min_forw;
308
1
}
309
310
REGISTER_COMMAND_BACKEND(CCV_NNC_MIN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
311
1
{
312
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
313
1
  registry->tensor_datatypes = CCV_32F;
314
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
315
1
  registry->algorithms = 1;
316
1
  registry->exec = _ccv_nnc_min_back;
317
1
}