Coverage Report

Created: 2025-04-03 22:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/compare/ccv_nnc_max_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_max_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
4
{
15
4
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
16
4
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[1];
17
4
  ccv_nnc_tensor_view_t* const c = (ccv_nnc_tensor_view_t*)outputs[0];
18
  // Assuming this is float 32.
19
4
  int dim[CCV_NNC_MAX_DIM_ALLOC];
20
4
  int astride[CCV_NNC_MAX_DIM_ALLOC];
21
4
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
22
4
  int cstride[CCV_NNC_MAX_DIM_ALLOC];
23
4
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
24
4
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
25
4
  assert(ccv_nnc_tensor_nd(c->info.dim) <= CCV_NNC_MAX_DIM + 2);
26
4
  ccv_nnc_tensor_view_get_dim(a, dim);
27
4
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
28
4
  assert(ccv_nnc_tensor_view_check_dim(c, dim));
29
4
  int x;
30
4
  if (!CCV_IS_TENSOR_VIEW(a) && 
!3
CCV_IS_TENSOR_VIEW3
(b) &&
!3
CCV_IS_TENSOR_VIEW3
(c))
31
3
  {
32
    // Super optimal case, just do one for-loop for sum.
33
3
    const int tensor_count = ccv_nnc_tensor_count(a->info);
34
1.02k
    for (x = 0; x < tensor_count; 
x++1.02k
)
35
1.02k
      c->data.f32[x] = ccv_max(a->data.f32[x], b->data.f32[x]);
36
3
    return CCV_NNC_EXEC_SUCCESS;
37
3
  }
38
4
  assert
(CCV_NNC_MAX_DIM == 2)1
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
39
1
  ccv_nnc_tensor_view_get_stride(a, astride);
40
1
  ccv_nnc_tensor_view_get_stride(b, bstride);
41
1
  ccv_nnc_tensor_view_get_stride(c, cstride);
42
1
  int i[CCV_NNC_MAX_DIM + 2];
43
1
  float* const ap = a->data.f32;
44
1
  float* const bp = b->data.f32;
45
1
  float* const cp = c->data.f32;
46
1
  const int count = dim[2] * dim[3];
47
1
  if (astride[2] == dim[3] && bstride[2] == dim[3] && cstride[2] == dim[3])
48
1
  {
49
    // Special casing if the ainc[3] is the same as dim[3]
50
2
    for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
51
1
    {
52
1
      float* ap0 = ap + i[0] * astride[0];
53
1
      float* bp0 = bp + i[0] * bstride[0];
54
1
      float* cp0 = cp + i[0] * cstride[0];
55
3
      for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
56
2
      {
57
14
        for (x = 0; x < count; 
x++12
)
58
12
          cp0[x] = ccv_max(ap0[x], bp0[x]);
59
2
        ap0 += astride[1];
60
2
        bp0 += bstride[1];
61
2
        cp0 += cstride[1];
62
2
      }
63
1
    }
64
1
    return CCV_NNC_EXEC_SUCCESS;
65
1
  }
66
  // Non-optimal case, need to do skip copy.
67
0
  for (i[0] = 0; i[0] < dim[0]; i[0]++)
68
0
  {
69
0
    float* const ap0 = ap + i[0] * astride[0];
70
0
    float* const bp0 = bp + i[0] * bstride[0];
71
0
    float* const cp0 = cp + i[0] * cstride[0];
72
0
    for (i[1] = 0; i[1] < dim[1]; i[1]++)
73
0
    {
74
0
      float* ap1 = ap0 + i[1] * astride[1];
75
0
      float* bp1 = bp0 + i[1] * bstride[1];
76
0
      float* cp1 = cp0 + i[1] * cstride[1];
77
0
      for (i[2] = 0; i[2] < dim[2]; i[2]++)
78
0
      {
79
0
        for (x = 0; x < dim[3]; x++)
80
0
          cp1[x] = ccv_max(ap1[x], bp1[x]);
81
0
        ap1 += astride[2];
82
0
        bp1 += bstride[2];
83
0
        cp1 += cstride[2];
84
0
      }
85
0
    }
86
0
  }
87
0
  return CCV_NNC_EXEC_SUCCESS;
88
1
}
89
90
static int _ccv_nnc_max_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
91
5
{
92
5
  ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
93
5
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
94
5
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2];
95
5
  ccv_nnc_tensor_view_t* const ha = (ccv_nnc_tensor_view_t*)outputs[0];
96
5
  ccv_nnc_tensor_view_t* const hb = (ccv_nnc_tensor_view_t*)outputs[1];
97
  // Assuming this is float 32.
98
5
  int dim[CCV_NNC_MAX_DIM_ALLOC];
99
5
  int gstride[CCV_NNC_MAX_DIM_ALLOC];
100
5
  int astride[CCV_NNC_MAX_DIM_ALLOC];
101
5
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
102
5
  int hastride[CCV_NNC_MAX_DIM_ALLOC];
103
5
  int hbstride[CCV_NNC_MAX_DIM_ALLOC];
104
5
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
105
5
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
106
5
  assert(ccv_nnc_tensor_nd(ha->info.dim) <= CCV_NNC_MAX_DIM + 2);
107
5
  assert(ccv_nnc_tensor_nd(hb->info.dim) <= CCV_NNC_MAX_DIM + 2);
108
5
  ccv_nnc_tensor_view_get_dim(a, dim);
109
5
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
110
5
  assert(ccv_nnc_tensor_view_check_dim(ha, dim));
111
5
  assert(ccv_nnc_tensor_view_check_dim(hb, dim));
112
5
  if (g)
113
3
  {
114
3
    assert(ccv_nnc_tensor_nd(g->info.dim) <= CCV_NNC_MAX_DIM + 2);
115
3
    assert(ccv_nnc_tensor_view_check_dim(g, dim));
116
3
    int x;
117
3
    if (!CCV_IS_TENSOR_VIEW(g) && !CCV_IS_TENSOR_VIEW(a) && 
!2
CCV_IS_TENSOR_VIEW2
(b) &&
!2
CCV_IS_TENSOR_VIEW2
(ha) &&
!2
CCV_IS_TENSOR_VIEW2
(hb))
118
2
    {
119
      // Super optimal case, just do one for-loop for sum.
120
2
      const int tensor_count = ccv_nnc_tensor_count(a->info);
121
1.01k
      for (x = 0; x < tensor_count; 
x++1.01k
)
122
1.01k
        if (a->data.f32[x] > b->data.f32[x]) {
123
503
          ha->data.f32[x] = g->data.f32[x];
124
503
          hb->data.f32[x] = 0;
125
509
        } else if (a->data.f32[x] < b->data.f32[x]) {
126
506
          hb->data.f32[x] = g->data.f32[x];
127
506
          ha->data.f32[x] = 0;
128
506
        } else
129
3
          ha->data.f32[x] = hb->data.f32[x] = g->data.f32[x];
130
2
      return CCV_NNC_EXEC_SUCCESS;
131
2
    }
132
3
    assert
(CCV_NNC_MAX_DIM == 2)1
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
133
1
    ccv_nnc_tensor_view_get_stride(g, gstride);
134
1
    ccv_nnc_tensor_view_get_stride(a, astride);
135
1
    ccv_nnc_tensor_view_get_stride(b, bstride);
136
1
    ccv_nnc_tensor_view_get_stride(ha, hastride);
137
1
    ccv_nnc_tensor_view_get_stride(hb, hbstride);
138
1
    int i[CCV_NNC_MAX_DIM + 2];
139
1
    float* const gp = g->data.f32;
140
1
    float* const ap = a->data.f32;
141
1
    float* const bp = b->data.f32;
142
1
    float* const hap = ha->data.f32;
143
1
    float* const hbp = hb->data.f32;
144
1
    const int count = dim[2] * dim[3];
145
1
    if (astride[2] == dim[3] && bstride[2] == dim[3] && hastride[2] == dim[3] && hbstride[2] == dim[3])
146
1
    {
147
      // Special casing if the ainc[3] is the same as dim[3]
148
2
      for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
149
1
      {
150
1
        float* gp0 = gp + i[0] * gstride[0];
151
1
        float* ap0 = ap + i[0] * astride[0];
152
1
        float* bp0 = bp + i[0] * bstride[0];
153
1
        float* hap0 = hap + i[0] * hastride[0];
154
1
        float* hbp0 = hbp + i[0] * hbstride[0];
155
3
        for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
156
2
        {
157
14
          for (x = 0; x < count; 
x++12
)
158
12
            if (ap0[x] > bp0[x]) {
159
0
              hap0[x] = gp0[x];
160
0
              hbp0[x] = 0;
161
12
            } else if (ap0[x] < bp0[x]) {
162
9
              hbp0[x] = gp0[x];
163
9
              hap0[x] = 0;
164
9
            } else
165
3
              hap0[x] = hbp0[x] = gp0[x];
166
2
          gp0 += gstride[1];
167
2
          ap0 += astride[1];
168
2
          bp0 += bstride[1];
169
2
          hap0 += hastride[1];
170
2
          hbp0 += hbstride[1];
171
2
        }
172
1
      }
173
1
      return CCV_NNC_EXEC_SUCCESS;
174
1
    }
175
    // Non-optimal case, need to do skip copy.
176
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
177
0
    {
178
0
      float* const gp0 = gp + i[0] * gstride[0];
179
0
      float* const ap0 = ap + i[0] * astride[0];
180
0
      float* const bp0 = bp + i[0] * bstride[0];
181
0
      float* const hap0 = hap + i[0] * hastride[0];
182
0
      float* const hbp0 = hbp + i[0] * hbstride[0];
183
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
184
0
      {
185
0
        float* gp1 = gp0 + i[1] * gstride[1];
186
0
        float* ap1 = ap0 + i[1] * astride[1];
187
0
        float* bp1 = bp0 + i[1] * bstride[1];
188
0
        float* hap1 = hap0 + i[1] * hastride[1];
189
0
        float* hbp1 = hbp0 + i[1] * hbstride[1];
190
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
191
0
        {
192
0
          for (x = 0; x < dim[3]; x++)
193
0
            if (ap1[x] > bp1[x]) {
194
0
              hap1[x] = gp1[x];
195
0
              hbp1[x] = 0;
196
0
            } else if (ap1[x] < bp1[x]) {
197
0
              hbp1[x] = gp1[x];
198
0
              hap1[x] = 0;
199
0
            } else
200
0
              hap1[x] = hbp1[x] = gp1[x];
201
0
          gp1 += gstride[2];
202
0
          ap1 += astride[2];
203
0
          bp1 += bstride[2];
204
0
          hap1 += hastride[2];
205
0
          hbp1 += hbstride[2];
206
0
        }
207
0
      }
208
0
    }
209
2
  } else {
210
2
    int x;
211
2
    if (!CCV_IS_TENSOR_VIEW(a) && 
!1
CCV_IS_TENSOR_VIEW1
(b) &&
!1
CCV_IS_TENSOR_VIEW1
(ha) &&
!1
CCV_IS_TENSOR_VIEW1
(hb))
212
1
    {
213
      // Super optimal case, just do one for-loop for sum.
214
1
      const int tensor_count = ccv_nnc_tensor_count(a->info);
215
13
      for (x = 0; x < tensor_count; 
x++12
)
216
12
        if (a->data.f32[x] > b->data.f32[x]) {
217
0
          ha->data.f32[x] = 1;
218
0
          hb->data.f32[x] = 0;
219
12
        } else if (a->data.f32[x] < b->data.f32[x]) {
220
9
          ha->data.f32[x] = 0;
221
9
          hb->data.f32[x] = 1;
222
9
        } else
223
3
          ha->data.f32[x] = hb->data.f32[x] = 1;
224
1
      return CCV_NNC_EXEC_SUCCESS;
225
1
    }
226
2
    assert
(CCV_NNC_MAX_DIM == 2)1
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
227
1
    ccv_nnc_tensor_view_get_stride(a, astride);
228
1
    ccv_nnc_tensor_view_get_stride(b, bstride);
229
1
    ccv_nnc_tensor_view_get_stride(ha, hastride);
230
1
    ccv_nnc_tensor_view_get_stride(hb, hbstride);
231
1
    int i[CCV_NNC_MAX_DIM + 2];
232
1
    float* const ap = a->data.f32;
233
1
    float* const bp = b->data.f32;
234
1
    float* const hap = ha->data.f32;
235
1
    float* const hbp = hb->data.f32;
236
1
    const int count = dim[2] * dim[3];
237
1
    if (astride[2] == dim[3] && bstride[2] == dim[3] && hastride[2] == dim[3] && hbstride[2] == dim[3])
238
1
    {
239
      // Special casing if the ainc[3] is the same as dim[3]
240
2
      for (i[0] = 0; i[0] < dim[0]; 
i[0]++1
)
241
1
      {
242
1
        float* ap0 = ap + i[0] * astride[0];
243
1
        float* bp0 = bp + i[0] * bstride[0];
244
1
        float* hap0 = hap + i[0] * hastride[0];
245
1
        float* hbp0 = hbp + i[0] * hbstride[0];
246
3
        for (i[1] = 0; i[1] < dim[1]; 
i[1]++2
)
247
2
        {
248
14
          for (x = 0; x < count; 
x++12
)
249
12
            if (ap0[x] > bp0[x]) {
250
0
              hap0[x] = 1;
251
0
              hbp0[x] = 0;
252
12
            } else if (ap0[x] < bp0[x]) {
253
9
              hap0[x] = 0;
254
9
              hbp0[x] = 1;
255
9
            } else
256
3
              hap0[x] = hbp0[x] = 1;
257
2
          ap0 += astride[1];
258
2
          bp0 += bstride[1];
259
2
          hap0 += hastride[1];
260
2
          hbp0 += hbstride[1];
261
2
        }
262
1
      }
263
1
      return CCV_NNC_EXEC_SUCCESS;
264
1
    }
265
    // Non-optimal case, need to do skip copy.
266
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
267
0
    {
268
0
      float* const ap0 = ap + i[0] * astride[0];
269
0
      float* const bp0 = bp + i[0] * bstride[0];
270
0
      float* const hap0 = hap + i[0] * hastride[0];
271
0
      float* const hbp0 = hbp + i[0] * hbstride[0];
272
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
273
0
      {
274
0
        float* ap1 = ap0 + i[1] * astride[1];
275
0
        float* bp1 = bp0 + i[1] * bstride[1];
276
0
        float* hap1 = hap0 + i[1] * hastride[1];
277
0
        float* hbp1 = hbp0 + i[1] * hbstride[1];
278
0
        for (i[2] = 0; i[2] < dim[2]; i[2]++)
279
0
        {
280
0
          for (x = 0; x < dim[3]; x++)
281
0
            if (ap1[x] > bp1[x]) {
282
0
              hap1[x] = 1;
283
0
              hbp1[x] = 0;
284
0
            } else if (ap1[x] < bp1[x]) {
285
0
              hap1[x] = 0;
286
0
              hbp1[x] = 1;
287
0
            } else
288
0
              hap1[x] = hbp1[x] = 1;
289
0
          ap1 += astride[2];
290
0
          bp1 += bstride[2];
291
0
          hap1 += hastride[2];
292
0
          hbp1 += hbstride[2];
293
0
        }
294
0
      }
295
0
    }
296
0
  }
297
0
  return CCV_NNC_EXEC_SUCCESS;
298
5
}
299
300
REGISTER_COMMAND_BACKEND(CCV_NNC_MAX_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
301
1
{
302
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
303
1
  registry->tensor_datatypes = CCV_32F;
304
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
305
1
  registry->algorithms = 1;
306
1
  registry->exec = _ccv_nnc_max_forw;
307
1
}
308
309
REGISTER_COMMAND_BACKEND(CCV_NNC_MAX_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
310
1
{
311
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
312
1
  registry->tensor_datatypes = CCV_32F;
313
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
314
1
  registry->algorithms = 1;
315
1
  registry->exec = _ccv_nnc_max_back;
316
1
}