/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/blas/ccv_nnc_rotate_half_cpu_ref.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #ifdef USE_OPENMP |
7 | | #include <omp.h> |
8 | | #endif |
9 | | #ifdef USE_DISPATCH |
10 | | #include <dispatch/dispatch.h> |
11 | | #endif |
12 | | |
13 | | static int _ccv_nnc_rotate_half_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
14 | 0 | { |
15 | 0 | assert(input_size == 1); |
16 | 0 | assert(output_size == 1); |
17 | 0 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0]; |
18 | 0 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; |
19 | 0 | assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2); |
20 | 0 | assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2); |
21 | 0 | int adim[CCV_NNC_MAX_DIM_ALLOC]; |
22 | 0 | ccv_nnc_tensor_view_get_dim(a, adim); |
23 | 0 | assert(ccv_nnc_tensor_view_check_dim(b, adim)); |
24 | 0 | const int half = adim[CCV_NNC_MAX_DIM + 1] / 2; |
25 | 0 | assert(half > 0); |
26 | 0 | assert(adim[CCV_NNC_MAX_DIM + 1] == half * 2); |
27 | 0 | int x; |
28 | 0 | if (!CCV_IS_TENSOR_VIEW(a) && !CCV_IS_TENSOR_VIEW(b)) |
29 | 0 | { |
30 | 0 | const int count = ccv_nnc_tensor_count(a->info); |
31 | 0 | assert(count % (half * 2) == 0); |
32 | 0 | const int row_count = count / (half * 2); |
33 | 0 | float* const ap = a->data.f32; |
34 | 0 | float* const bp = b->data.f32; |
35 | 0 | int i; |
36 | 0 | if (ap == bp) |
37 | 0 | { |
38 | 0 | for (i = 0; i < row_count; i++) |
39 | 0 | { |
40 | 0 | float* const row = bp + i * half * 2; |
41 | 0 | for (x = 0; x < half; x++) |
42 | 0 | { |
43 | 0 | float t; |
44 | 0 | CCV_SWAP(row[x], row[x + half], t); |
45 | 0 | } |
46 | 0 | } |
47 | 0 | } else { |
48 | 0 | for (i = 0; i < row_count; i++) |
49 | 0 | { |
50 | 0 | const float* const ap0 = ap + i * half * 2; |
51 | 0 | float* const bp0 = bp + i * half * 2; |
52 | 0 | memcpy(bp0, ap0 + half, sizeof(float) * half); |
53 | 0 | memcpy(bp0 + half, ap0, sizeof(float) * half); |
54 | 0 | } |
55 | 0 | } |
56 | 0 | return CCV_NNC_EXEC_SUCCESS; |
57 | 0 | } |
58 | 0 | assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number. |
59 | 0 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
60 | 0 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
61 | 0 | ccv_nnc_tensor_view_get_stride(a, astride); |
62 | 0 | ccv_nnc_tensor_view_get_stride(b, bstride); |
63 | 0 | int i[CCV_NNC_MAX_DIM + 2]; |
64 | 0 | float* const ap = a->data.f32; |
65 | 0 | float* const bp = b->data.f32; |
66 | 0 | for (i[0] = 0; i[0] < adim[0]; i[0]++) |
67 | 0 | { |
68 | 0 | float* const ap0 = ap + i[0] * astride[0]; |
69 | 0 | float* const bp0 = bp + i[0] * bstride[0]; |
70 | 0 | for (i[1] = 0; i[1] < adim[1]; i[1]++) |
71 | 0 | { |
72 | 0 | float* const ap1 = ap0 + i[1] * astride[1]; |
73 | 0 | float* const bp1 = bp0 + i[1] * bstride[1]; |
74 | 0 | for (i[2] = 0; i[2] < adim[2]; i[2]++) |
75 | 0 | { |
76 | 0 | float* const ap2 = ap1 + i[2] * astride[2]; |
77 | 0 | float* const bp2 = bp1 + i[2] * bstride[2]; |
78 | 0 | if (ap2 == bp2 && astride[CCV_NNC_MAX_DIM + 1] == bstride[CCV_NNC_MAX_DIM + 1]) |
79 | 0 | { |
80 | 0 | for (x = 0; x < half; x++) |
81 | 0 | { |
82 | 0 | float t; |
83 | 0 | CCV_SWAP(bp2[x * bstride[CCV_NNC_MAX_DIM + 1]], bp2[(x + half) * bstride[CCV_NNC_MAX_DIM + 1]], t); |
84 | 0 | } |
85 | 0 | } else { |
86 | 0 | for (x = 0; x < half; x++) |
87 | 0 | { |
88 | 0 | bp2[x * bstride[CCV_NNC_MAX_DIM + 1]] = ap2[(x + half) * astride[CCV_NNC_MAX_DIM + 1]]; |
89 | 0 | bp2[(x + half) * bstride[CCV_NNC_MAX_DIM + 1]] = ap2[x * astride[CCV_NNC_MAX_DIM + 1]]; |
90 | 0 | } |
91 | 0 | } |
92 | 0 | } |
93 | 0 | } |
94 | 0 | } |
95 | 0 | return CCV_NNC_EXEC_SUCCESS; |
96 | 0 | } |
97 | | |
98 | | static int _ccv_nnc_rotate_half_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
99 | 0 | { |
100 | 0 | assert(input_size >= 1); |
101 | 0 | assert(output_size == 1); |
102 | 0 | return _ccv_nnc_rotate_half_forw(cmd, hint, flags, inputs, 1, outputs, output_size, stream_context); |
103 | 0 | } |
104 | | |
105 | | REGISTER_COMMAND_BACKEND(CCV_NNC_ROTATE_HALF_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
106 | 1 | { |
107 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
108 | 1 | registry->tensor_datatypes = CCV_32F; |
109 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
110 | 1 | registry->algorithms = 1; |
111 | 1 | registry->exec = _ccv_nnc_rotate_half_forw; |
112 | 1 | } |
113 | | |
114 | | REGISTER_COMMAND_BACKEND(CCV_NNC_ROTATE_HALF_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
115 | 1 | { |
116 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
117 | 1 | registry->tensor_datatypes = CCV_32F; |
118 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
119 | 1 | registry->algorithms = 1; |
120 | 1 | registry->exec = _ccv_nnc_rotate_half_back; |
121 | 1 | } |