/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/pad/ccv_nnc_pad_cpu_ref.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #ifdef USE_OPENMP |
7 | | #include <omp.h> |
8 | | #endif |
9 | | #ifdef USE_DISPATCH |
10 | | #include <dispatch/dispatch.h> |
11 | | #endif |
12 | | |
13 | | static int _ccv_nnc_pad_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
14 | 11 | { |
15 | 11 | assert(input_size == 1); |
16 | 11 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0]; |
17 | 11 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; |
18 | 11 | assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2); |
19 | 11 | assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2); |
20 | | // Assuming this is float 32. |
21 | 11 | int adim[CCV_NNC_MAX_DIM_ALLOC]; |
22 | 11 | int bdim[CCV_NNC_MAX_DIM_ALLOC]; |
23 | 11 | ccv_nnc_tensor_view_get_dim(a, adim); |
24 | 11 | ccv_nnc_tensor_view_get_dim(b, bdim); |
25 | 11 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
26 | 11 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
27 | 11 | assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number. |
28 | 11 | ccv_nnc_tensor_view_get_stride(a, astride); |
29 | 11 | ccv_nnc_tensor_view_get_stride(b, bstride); |
30 | 11 | int i[CCV_NNC_MAX_DIM + 2]; |
31 | 11 | int x; |
32 | 11 | float* const ap = a->data.f32; |
33 | 11 | float* const bp = b->data.f32; |
34 | 11 | const int nd = ccv_nnc_tensor_nd(a->info.dim); |
35 | 11 | const int offset = CCV_NNC_MAX_DIM + 2 - nd; |
36 | 11 | assert(offset >= 0); |
37 | 39 | for (x = 0; 11 x < nd; x++28 ) // We don't support negative pad. |
38 | 28 | { assert(cmd.info.size.dim[x] >= 0 && cmd.info.pad.end[x] >= 0); } |
39 | 11 | int begin[CCV_NNC_MAX_DIM_ALLOC]; |
40 | 39 | for (x = 0; x < nd; x++28 ) |
41 | 28 | begin[x + offset] = cmd.info.size.dim[x]; |
42 | 27 | for (x = 0; x < offset; x++16 ) |
43 | 16 | begin[x] = 0; |
44 | | // Non-optimal case, need to do skip if needed. |
45 | 11 | if (cmd.info.pad.type == CCV_NNC_PAD_ZERO) |
46 | 6 | { |
47 | 16 | for (i[0] = 0; i[0] < bdim[0]; i[0]++10 ) |
48 | 10 | { |
49 | 10 | float* const ap0 = (i[0] >= begin[0] && i[0] < adim[0] + begin[0]8 ) ? ap + (i[0] - begin[0]) * astride[0]7 : 03 ; |
50 | 10 | float* const bp0 = bp + i[0] * bstride[0]; |
51 | 42 | for (i[1] = 0; i[1] < bdim[1]; i[1]++32 ) |
52 | 32 | { |
53 | 32 | float* const ap1 = (ap0 && i[1] >= begin[1]20 && i[1] < adim[1] + begin[1]15 ) ? ap0 + (i[1] - begin[1]) * astride[1]11 : 021 ; |
54 | 32 | float* bp1 = bp0 + i[1] * bstride[1]; |
55 | 154 | for (i[2] = 0; i[2] < bdim[2]; i[2]++122 ) |
56 | 122 | { |
57 | 122 | float* const ap2 = (ap1 && i[2] >= begin[2]47 && i[2] < adim[2] + begin[2]32 ) ? ap1 + (i[2] - begin[2]) * astride[2]24 : 098 ; |
58 | 789 | for (x = 0; x < bdim[3]; x++667 ) |
59 | 667 | bp1[x] = (ap2 && x >= begin[3]148 && x < adim[3] + begin[3]142 ) ? ap2[x - begin[3]]122 : 0545 ; |
60 | 122 | bp1 += bstride[2]; |
61 | 122 | } |
62 | 32 | } |
63 | 10 | } |
64 | 6 | } else { |
65 | 5 | assert(cmd.info.pad.type == CCV_NNC_PAD_REPLICATE); |
66 | 14 | for (i[0] = 0; 5 i[0] < bdim[0]; i[0]++9 ) |
67 | 9 | { |
68 | 9 | float* const ap0 = ap + ccv_min(adim[0] - 1, ccv_max(0, i[0] - begin[0])) * astride[0]; |
69 | 9 | float* const bp0 = bp + i[0] * bstride[0]; |
70 | 35 | for (i[1] = 0; i[1] < bdim[1]; i[1]++26 ) |
71 | 26 | { |
72 | 26 | float* const ap1 = ap0 + ccv_min(adim[1] - 1, ccv_max(0, i[1] - begin[1])) * astride[1]; |
73 | 26 | float* bp1 = bp0 + i[1] * bstride[1]; |
74 | 106 | for (i[2] = 0; i[2] < bdim[2]; i[2]++80 ) |
75 | 80 | { |
76 | 80 | float* const ap2 = ap1 + ccv_min(adim[2] - 1, ccv_max(0, i[2] - begin[2])) * astride[2]; |
77 | 290 | for (x = 0; x < bdim[3]; x++210 ) |
78 | 210 | bp1[x] = ap2[ccv_min(adim[3] - 1, ccv_max(0, x - begin[3]))]; |
79 | 80 | bp1 += bstride[2]; |
80 | 80 | } |
81 | 26 | } |
82 | 9 | } |
83 | 5 | } |
84 | 11 | return CCV_NNC_EXEC_SUCCESS; |
85 | 11 | } |
86 | | |
87 | | static int _ccv_nnc_pad_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
88 | 4 | { |
89 | 4 | assert(input_size == 1); |
90 | 4 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0]; |
91 | 4 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; |
92 | 4 | assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2); |
93 | 4 | assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2); |
94 | | // Assuming this is float 32. |
95 | 4 | int adim[CCV_NNC_MAX_DIM_ALLOC]; |
96 | 4 | int bdim[CCV_NNC_MAX_DIM_ALLOC]; |
97 | 4 | ccv_nnc_tensor_view_get_dim(a, adim); |
98 | 4 | ccv_nnc_tensor_view_get_dim(b, bdim); |
99 | 4 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
100 | 4 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
101 | 4 | assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number. |
102 | 4 | ccv_nnc_tensor_view_get_stride(a, astride); |
103 | 4 | ccv_nnc_tensor_view_get_stride(b, bstride); |
104 | 4 | int i[CCV_NNC_MAX_DIM + 2]; |
105 | 4 | int x; |
106 | 4 | float* const ap = a->data.f32; |
107 | 4 | float* const bp = b->data.f32; |
108 | 4 | const int nd = ccv_nnc_tensor_nd(a->info.dim); |
109 | 4 | const int offset = CCV_NNC_MAX_DIM + 2 - nd; |
110 | 4 | assert(offset >= 0); |
111 | 14 | for (x = 0; 4 x < nd; x++10 ) // We don't support negative pad. |
112 | 10 | { assert(cmd.info.size.dim[x] >= 0 && cmd.info.pad.end[x] >= 0); } |
113 | 4 | int begin[CCV_NNC_MAX_DIM_ALLOC]; |
114 | 14 | for (x = 0; x < nd; x++10 ) |
115 | 10 | begin[x + offset] = cmd.info.size.dim[x]; |
116 | 10 | for (x = 0; x < offset; x++6 ) |
117 | 6 | begin[x] = 0; |
118 | | // Non-optimal case, need to do skip if needed. |
119 | 9 | for (i[0] = 0; i[0] < bdim[0]; i[0]++5 ) |
120 | 5 | { |
121 | 5 | float* const ap0 = ap + (i[0] + begin[0]) * astride[0]; |
122 | 5 | float* const bp0 = bp + i[0] * bstride[0]; |
123 | 12 | for (i[1] = 0; i[1] < bdim[1]; i[1]++7 ) |
124 | 7 | { |
125 | 7 | float* const ap1 = ap0 + (i[1] + begin[1]) * astride[1]; |
126 | 7 | float* bp1 = bp0 + i[1] * bstride[1]; |
127 | 20 | for (i[2] = 0; i[2] < bdim[2]; i[2]++13 ) |
128 | 13 | { |
129 | 13 | float* const ap2 = ap1 + (i[2] + begin[2]) * astride[2]; |
130 | 39 | for (x = 0; x < bdim[3]; x++26 ) |
131 | 26 | bp1[x] = ap2[x + begin[3]]; |
132 | 13 | bp1 += bstride[2]; |
133 | 13 | } |
134 | 7 | } |
135 | 5 | } |
136 | 4 | return CCV_NNC_EXEC_SUCCESS; |
137 | 4 | } |
138 | | |
139 | | REGISTER_COMMAND_BACKEND(CCV_NNC_PAD_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
140 | 1 | { |
141 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; |
142 | 1 | registry->tensor_datatypes = CCV_32F; |
143 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
144 | 1 | registry->algorithms = 1; |
145 | 1 | registry->exec = _ccv_nnc_pad_forw; |
146 | 1 | } |
147 | | |
148 | | REGISTER_COMMAND_BACKEND(CCV_NNC_PAD_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
149 | 1 | { |
150 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; |
151 | 1 | registry->tensor_datatypes = CCV_32F; |
152 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
153 | 1 | registry->algorithms = 1; |
154 | 1 | registry->exec = _ccv_nnc_pad_back; |
155 | 1 | } |