/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/isnan/ccv_nnc_reduce_isnan_cpu_ref.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #ifdef USE_OPENMP |
7 | | #include <omp.h> |
8 | | #endif |
9 | | #ifdef USE_DISPATCH |
10 | | #include <dispatch/dispatch.h> |
11 | | #endif |
12 | | |
13 | | // Shared methods. |
14 | | #include "../_ccv_nnc_cpu_ref.h" |
15 | | |
16 | | static int is_fp16_nan(unsigned short fp16) // From Claude. |
17 | 6 | { |
18 | | // Extract the exponent and fraction bits |
19 | 6 | unsigned short exponent = (fp16 >> 10) & 0x1F; // Bits 10-14 (5 bits) |
20 | 6 | unsigned short fraction = fp16 & 0x3FF; // Bits 0-9 (10 bits) |
21 | | |
22 | | // For FP16, a value is NaN if: |
23 | | // 1. The exponent is all 1s (0x1F) |
24 | | // 2. The fraction is non-zero |
25 | 6 | return (exponent == 0x1F) && (fraction != 0)1 ; |
26 | 6 | } |
27 | | |
28 | | static int _ccv_nnc_reduce_isnan_forw_f16(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
29 | 1 | { |
30 | 1 | assert(input_size == 1); |
31 | 1 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0]; |
32 | 1 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; |
33 | 1 | assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2); |
34 | 1 | assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2); |
35 | | // Assuming this is float 32. |
36 | 1 | int adim[CCV_NNC_MAX_DIM_ALLOC]; |
37 | 1 | int bdim[CCV_NNC_MAX_DIM_ALLOC]; |
38 | 1 | ccv_nnc_tensor_view_get_dim(a, adim); |
39 | 1 | ccv_nnc_tensor_view_get_dim(b, bdim); |
40 | 1 | assert(ccv_nnc_tensor_view_check_broadcast_dim(b, adim)); |
41 | 1 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
42 | 1 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
43 | 1 | assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number. |
44 | 1 | ccv_nnc_tensor_view_get_stride(a, astride); |
45 | 1 | ccv_nnc_tensor_view_get_stride(b, bstride); |
46 | 1 | int i[CCV_NNC_MAX_DIM + 2]; |
47 | 1 | int x; |
48 | 1 | _ccv_nnc_tensor_set_cpu_ref_i32(b, 0); |
49 | 1 | unsigned short* const ap = (unsigned short*)a->data.f16; |
50 | 1 | int* const bp = b->data.i32; |
51 | | // Non-optimal case, need to do skip if needed. |
52 | 2 | for (i[0] = 0; i[0] < adim[0]; i[0]++1 ) |
53 | 1 | { |
54 | 1 | unsigned short* const ap0 = ap + i[0] * astride[0]; |
55 | 1 | int* const bp0 = bdim[0] == 1 ? bp : bp + i[0] * bstride[0]0 ; |
56 | 2 | for (i[1] = 0; i[1] < adim[1]; i[1]++1 ) |
57 | 1 | { |
58 | 1 | unsigned short* ap1 = ap0 + i[1] * astride[1]; |
59 | 1 | int* const bp1 = bdim[1] == 1 ? bp0 : bp0 + i[1] * bstride[1]0 ; |
60 | 3 | for (i[2] = 0; i[2] < adim[2]; i[2]++2 ) |
61 | 2 | { |
62 | 2 | int* const bp2 = bdim[2] == 1 ? bp1 : bp1 + i[2] * bstride[2]0 ; |
63 | 2 | if (bdim[3] == 1) |
64 | 2 | { |
65 | 8 | for (x = 0; x < adim[3]; x++6 ) |
66 | 6 | if (is_fp16_nan(ap1[x])) |
67 | 1 | bp2[0] = 1; |
68 | 2 | } else { |
69 | 0 | for (x = 0; x < adim[3]; x++) |
70 | 0 | if (is_fp16_nan(ap1[x])) |
71 | 0 | bp2[x] = 1; |
72 | 0 | } |
73 | 2 | ap1 += astride[2]; |
74 | 2 | } |
75 | 1 | } |
76 | 1 | } |
77 | 1 | return CCV_NNC_EXEC_SUCCESS; |
78 | 1 | } |
79 | | |
80 | | static int _ccv_nnc_reduce_isnan_forw_f32(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
81 | 2 | { |
82 | 2 | assert(input_size == 1); |
83 | 2 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0]; |
84 | 2 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; |
85 | 2 | assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2); |
86 | 2 | assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2); |
87 | | // Assuming this is float 32. |
88 | 2 | int adim[CCV_NNC_MAX_DIM_ALLOC]; |
89 | 2 | int bdim[CCV_NNC_MAX_DIM_ALLOC]; |
90 | 2 | ccv_nnc_tensor_view_get_dim(a, adim); |
91 | 2 | ccv_nnc_tensor_view_get_dim(b, bdim); |
92 | 2 | assert(ccv_nnc_tensor_view_check_broadcast_dim(b, adim)); |
93 | 2 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
94 | 2 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
95 | 2 | assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number. |
96 | 2 | ccv_nnc_tensor_view_get_stride(a, astride); |
97 | 2 | ccv_nnc_tensor_view_get_stride(b, bstride); |
98 | 2 | int i[CCV_NNC_MAX_DIM + 2]; |
99 | 2 | int x; |
100 | 2 | _ccv_nnc_tensor_set_cpu_ref_i32(b, 0); |
101 | 2 | float* const ap = a->data.f32; |
102 | 2 | int* const bp = b->data.i32; |
103 | | // Non-optimal case, need to do skip if needed. |
104 | 4 | for (i[0] = 0; i[0] < adim[0]; i[0]++2 ) |
105 | 2 | { |
106 | 2 | float* const ap0 = ap + i[0] * astride[0]; |
107 | 2 | int* const bp0 = bdim[0] == 1 ? bp : bp + i[0] * bstride[0]0 ; |
108 | 4 | for (i[1] = 0; i[1] < adim[1]; i[1]++2 ) |
109 | 2 | { |
110 | 2 | float* ap1 = ap0 + i[1] * astride[1]; |
111 | 2 | int* const bp1 = bdim[1] == 1 ? bp0 : bp0 + i[1] * bstride[1]0 ; |
112 | 6 | for (i[2] = 0; i[2] < adim[2]; i[2]++4 ) |
113 | 4 | { |
114 | 4 | int* const bp2 = bdim[2] == 1 ? bp12 : bp1 + i[2] * bstride[2]2 ; |
115 | 4 | if (bdim[3] == 1) |
116 | 2 | { |
117 | 8 | for (x = 0; x < adim[3]; x++6 ) |
118 | 6 | if (isnan(ap1[x])) |
119 | 1 | bp2[0] = 1; |
120 | 2 | } else { |
121 | 8 | for (x = 0; x < adim[3]; x++6 ) |
122 | 6 | if (isnan(ap1[x])) |
123 | 1 | bp2[x] = 1; |
124 | 2 | } |
125 | 4 | ap1 += astride[2]; |
126 | 4 | } |
127 | 2 | } |
128 | 2 | } |
129 | 2 | return CCV_NNC_EXEC_SUCCESS; |
130 | 2 | } |
131 | | |
132 | | static int _ccv_nnc_reduce_isnan_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
133 | 3 | { |
134 | 3 | assert(input_size == 1); |
135 | 3 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0]; |
136 | 3 | if (a->info.datatype == CCV_32F) |
137 | 2 | return _ccv_nnc_reduce_isnan_forw_f32(cmd, hint, flags, inputs, input_size, outputs, output_size, stream_context); |
138 | 1 | else if (a->info.datatype == CCV_16F) |
139 | 1 | return _ccv_nnc_reduce_isnan_forw_f16(cmd, hint, flags, inputs, input_size, outputs, output_size, stream_context); |
140 | 0 | return CCV_NNC_EXEC_INVALID; |
141 | 3 | } |
142 | | |
143 | | static int _ccv_nnc_reduce_isnan_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
144 | 0 | { |
145 | 0 | return CCV_NNC_EXEC_INVALID; |
146 | 0 | } |
147 | | |
148 | | REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_ISNAN_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
149 | 1 | { |
150 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
151 | 1 | registry->tensor_datatypes = CCV_32F | CCV_32S | CCV_16F; |
152 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
153 | 1 | registry->algorithms = 1; |
154 | 1 | registry->exec = _ccv_nnc_reduce_isnan_forw; |
155 | 1 | } |
156 | | |
157 | | REGISTER_COMMAND_BACKEND(CCV_NNC_REDUCE_ISNAN_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
158 | 1 | { |
159 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
160 | 1 | registry->tensor_datatypes = CCV_32F | CCV_32S | CCV_16F; |
161 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
162 | 1 | registry->algorithms = 1; |
163 | 1 | registry->exec = _ccv_nnc_reduce_isnan_back; |
164 | 1 | } |