Coverage Report

Created: 2025-05-09 19:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/isnan/ccv_nnc_reduce_isnan_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
// Shared methods.
14
#include "../_ccv_nnc_cpu_ref.h"
15
16
static int is_fp16_nan(unsigned short fp16) // From Claude.
17
6
{
18
  // Extract the exponent and fraction bits
19
6
  unsigned short exponent = (fp16 >> 10) & 0x1F;  // Bits 10-14 (5 bits)
20
6
  unsigned short fraction = fp16 & 0x3FF;         // Bits 0-9 (10 bits)
21
22
  // For FP16, a value is NaN if:
23
  // 1. The exponent is all 1s (0x1F)
24
  // 2. The fraction is non-zero
25
6
  return (exponent == 0x1F) && 
(fraction != 0)1
;
26
6
}
27
28
static int _ccv_nnc_reduce_isnan_forw_f16(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
29
1
{
30
1
  assert(input_size == 1);
31
1
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
32
1
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
33
1
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
34
1
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
35
  // Assuming this is float 32.
36
1
  int adim[CCV_NNC_MAX_DIM_ALLOC];
37
1
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
38
1
  ccv_nnc_tensor_view_get_dim(a, adim);
39
1
  ccv_nnc_tensor_view_get_dim(b, bdim);
40
1
  assert(ccv_nnc_tensor_view_check_broadcast_dim(b, adim));
41
1
  int astride[CCV_NNC_MAX_DIM_ALLOC];
42
1
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
43
1
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
44
1
  ccv_nnc_tensor_view_get_stride(a, astride);
45
1
  ccv_nnc_tensor_view_get_stride(b, bstride);
46
1
  int i[CCV_NNC_MAX_DIM + 2];
47
1
  int x;
48
1
  _ccv_nnc_tensor_set_cpu_ref_i32(b, 0);
49
1
  unsigned short* const ap = (unsigned short*)a->data.f16;
50
1
  int* const bp = b->data.i32;
51
  // Non-optimal case, need to do skip if needed.
52
2
  for (i[0] = 0; i[0] < adim[0]; 
i[0]++1
)
53
1
  {
54
1
    unsigned short* const ap0 = ap + i[0] * astride[0];
55
1
    int* const bp0 = bdim[0] == 1 ? bp : 
bp + i[0] * bstride[0]0
;
56
2
    for (i[1] = 0; i[1] < adim[1]; 
i[1]++1
)
57
1
    {
58
1
      unsigned short* ap1 = ap0 + i[1] * astride[1];
59
1
      int* const bp1 = bdim[1] == 1 ? bp0 : 
bp0 + i[1] * bstride[1]0
;
60
3
      for (i[2] = 0; i[2] < adim[2]; 
i[2]++2
)
61
2
      {
62
2
        int* const bp2 = bdim[2] == 1 ? bp1 : 
bp1 + i[2] * bstride[2]0
;
63
2
        if (bdim[3] == 1)
64
2
        {
65
8
          for (x = 0; x < adim[3]; 
x++6
)
66
6
            if (is_fp16_nan(ap1[x]))
67
1
              bp2[0] = 1;
68
2
        } else {
69
0
          for (x = 0; x < adim[3]; x++)
70
0
            if (is_fp16_nan(ap1[x]))
71
0
              bp2[x] = 1;
72
0
        }
73
2
        ap1 += astride[2];
74
2
      }
75
1
    }
76
1
  }
77
1
  return CCV_NNC_EXEC_SUCCESS;
78
1
}
79
80
static int _ccv_nnc_reduce_isnan_forw_f32(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
81
2
{
82
2
  assert(input_size == 1);
83
2
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
84
2
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
85
2
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
86
2
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
87
  // Assuming this is float 32.
88
2
  int adim[CCV_NNC_MAX_DIM_ALLOC];
89
2
  int bdim[CCV_NNC_MAX_DIM_ALLOC];
90
2
  ccv_nnc_tensor_view_get_dim(a, adim);
91
2
  ccv_nnc_tensor_view_get_dim(b, bdim);
92
2
  assert(ccv_nnc_tensor_view_check_broadcast_dim(b, adim));
93
2
  int astride[CCV_NNC_MAX_DIM_ALLOC];
94
2
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
95
2
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
96
2
  ccv_nnc_tensor_view_get_stride(a, astride);
97
2
  ccv_nnc_tensor_view_get_stride(b, bstride);
98
2
  int i[CCV_NNC_MAX_DIM + 2];
99
2
  int x;
100
2
  _ccv_nnc_tensor_set_cpu_ref_i32(b, 0);
101
2
  float* const ap = a->data.f32;
102
2
  int* const bp = b->data.i32;
103
  // Non-optimal case, need to do skip if needed.
104
4
  for (i[0] = 0; i[0] < adim[0]; 
i[0]++2
)
105
2
  {
106
2
    float* const ap0 = ap + i[0] * astride[0];
107
2
    int* const bp0 = bdim[0] == 1 ? bp : 
bp + i[0] * bstride[0]0
;
108
4
    for (i[1] = 0; i[1] < adim[1]; 
i[1]++2
)
109
2
    {
110
2
      float* ap1 = ap0 + i[1] * astride[1];
111
2
      int* const bp1 = bdim[1] == 1 ? bp0 : 
bp0 + i[1] * bstride[1]0
;
112
6
      for (i[2] = 0; i[2] < adim[2]; 
i[2]++4
)
113
4
      {
114
4
        int* const bp2 = bdim[2] == 1 ? 
bp12
:
bp1 + i[2] * bstride[2]2
;
115
4
        if (bdim[3] == 1)
116
2
        {
117
8
          for (x = 0; x < adim[3]; 
x++6
)
118
6
            if (isnan(ap1[x]))
119
1
              bp2[0] = 1;
120
2
        } else {
121
8
          for (x = 0; x < adim[3]; 
x++6
)
122
6
            if (isnan(ap1[x]))
123
1
              bp2[x] = 1;
124
2
        }
125
4
        ap1 += astride[2];
126
4
      }
127
2
    }
128
2
  }
129
2
  return CCV_NNC_EXEC_SUCCESS;
130
2
}
131
132
static int _ccv_nnc_reduce_isnan_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
133
3
{
134
3
  assert(input_size == 1);
135
3
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
136
3
  if (a->info.datatype == CCV_32F)
137
2
    return _ccv_nnc_reduce_isnan_forw_f32(cmd, hint, flags, inputs, input_size, outputs, output_size, stream_context);
138
1
  else if (a->info.datatype == CCV_16F)
139
1
    return _ccv_nnc_reduce_isnan_forw_f16(cmd, hint, flags, inputs, input_size, outputs, output_size, stream_context);
140
0
  return CCV_NNC_EXEC_INVALID;
141
3
}
142
143
static int _ccv_nnc_reduce_isnan_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
144
0
{
145
0
  return CCV_NNC_EXEC_INVALID;
146
0
}
147
148
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_ISNAN_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
149
1
{
150
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
151
1
  registry->tensor_datatypes = CCV_32F | CCV_32S | CCV_16F;
152
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
153
1
  registry->algorithms = 1;
154
1
  registry->exec = _ccv_nnc_reduce_isnan_forw;
155
1
}
156
157
REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_ISNAN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
158
1
{
159
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
160
1
  registry->tensor_datatypes = CCV_32F | CCV_32S | CCV_16F;
161
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
162
1
  registry->algorithms = 1;
163
1
  registry->exec = _ccv_nnc_reduce_isnan_back;
164
1
}