/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/index/ccv_nnc_index_select_cpu_ref.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #ifdef USE_OPENMP |
7 | | #include <omp.h> |
8 | | #endif |
9 | | #ifdef USE_DISPATCH |
10 | | #include <dispatch/dispatch.h> |
11 | | #endif |
12 | | |
13 | | static int _ccv_nnc_index_select_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
14 | 7 | { |
15 | 7 | assert(input_size == 2); |
16 | 7 | assert(output_size == 1); |
17 | 7 | const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0]; |
18 | 7 | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); |
19 | 7 | assert(a_nd <= 2); |
20 | 7 | const ccv_nnc_tensor_view_t* const indices = (ccv_nnc_tensor_view_t*)inputs[1]; |
21 | 7 | assert(ccv_nnc_tensor_nd(indices->info.dim) == 1); |
22 | 7 | const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; |
23 | 7 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); |
24 | 7 | assert(b_nd <= 2); |
25 | 7 | const int a_cols = a_nd < 2 ? 11 : a->info.dim[1]6 ; |
26 | 7 | const int a_cols_inc = CCV_IS_TENSOR_VIEW(a) ? (1 a_nd < 21 ? 10 : a->stride[0]1 ) : a_cols6 ; |
27 | 7 | const int a_rows = a->info.dim[0]; |
28 | 7 | const int b_cols = b_nd < 2 ? 11 : b->info.dim[1]6 ; |
29 | 7 | const int b_cols_inc = CCV_IS_TENSOR_VIEW(b) ? (1 b_nd < 21 ? 10 : b->stride[0]1 ) : b_cols6 ; |
30 | 7 | const int b_rows = b->info.dim[0]; |
31 | 7 | assert(b_rows == indices->info.dim[0]); |
32 | 7 | assert(a_cols == b_cols); |
33 | 7 | assert(a->info.datatype == b->info.datatype); |
34 | 7 | const size_t data_size = CCV_GET_DATA_TYPE_SIZE(a->info.datatype); |
35 | 7 | if (indices->info.datatype == CCV_32S) |
36 | 6 | { |
37 | 6 | assert(a->info.datatype == CCV_32F || a->info.datatype == CCV_16F); |
38 | 23 | parallel_for6 (i, b_rows) { |
39 | 23 | const int index = indices->data.i32[i]; |
40 | 23 | assert(index < a_rows); |
41 | 23 | uint8_t* const bp = b->data.u8 + data_size * b_cols_inc * i; |
42 | 23 | uint8_t* const ap = a->data.u8 + data_size * a_cols_inc * index; |
43 | 23 | memcpy(bp, ap, data_size * a_cols); |
44 | 23 | } parallel_endfor |
45 | 6 | } else { |
46 | 1 | assert(indices->info.datatype == CCV_32F); |
47 | 1 | assert(a->info.datatype == CCV_32F); |
48 | 2 | parallel_for1 (i, b_rows) { |
49 | 2 | const int j0 = (int)indices->data.f32[i]; |
50 | 2 | const int j1 = j0 + 1; |
51 | 2 | const float w1 = indices->data.f32[i] - j0; |
52 | 2 | const float w0 = 1 - w1; |
53 | 2 | assert(j0 >= 0); |
54 | 2 | assert(j0 < a_rows); |
55 | 2 | float* const bp = b->data.f32 + b_cols_inc * i; |
56 | 2 | float* const ap0 = a->data.f32 + a_cols_inc * j0; |
57 | 2 | float* const ap1 = a->data.f32 + a_cols_inc * ccv_min(j1, a_rows - 1); |
58 | 2 | int j; |
59 | 6 | for (j = 0; j < a_cols; j++4 ) |
60 | 4 | bp[j] = ap0[j] * w0 + ap1[j] * w1; |
61 | 2 | } parallel_endfor |
62 | 1 | } |
63 | 7 | return CCV_NNC_EXEC_SUCCESS; |
64 | 7 | } |
65 | | |
66 | | static int _ccv_nnc_index_select_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
67 | 4 | { |
68 | 4 | assert(input_size >= 3); |
69 | 4 | assert(output_size <= 2); |
70 | 4 | const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0]; |
71 | 4 | const int g_nd = ccv_nnc_tensor_nd(g->info.dim); |
72 | 4 | assert(g_nd <= 2); |
73 | 4 | const ccv_nnc_tensor_view_t* const indices = (ccv_nnc_tensor_view_t*)inputs[2]; |
74 | 4 | assert(ccv_nnc_tensor_nd(indices->info.dim) == 1); |
75 | 4 | const ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0]; |
76 | 4 | const int h_nd = ccv_nnc_tensor_nd(h->info.dim); |
77 | 4 | assert(h_nd <= 2); |
78 | 4 | ccv_nnc_tensor_zero((ccv_nnc_tensor_t*)h); |
79 | 4 | if (output_size >= 2 && outputs[1]0 ) |
80 | 0 | ccv_nnc_tensor_zero(outputs[1]); |
81 | 4 | const int g_cols = g_nd < 2 ? 11 : g->info.dim[1]3 ; |
82 | 4 | const int g_cols_inc = CCV_IS_TENSOR_VIEW(g) ? (1 g_nd < 21 ? 10 : g->stride[0]1 ) : g_cols3 ; |
83 | 4 | const int g_rows = g->info.dim[0]; |
84 | 4 | const int h_cols = h_nd < 2 ? 11 : h->info.dim[1]3 ; |
85 | 4 | const int h_cols_inc = CCV_IS_TENSOR_VIEW(h) ? (1 h_nd < 21 ? 10 : h->stride[0]1 ) : h_cols3 ; |
86 | 4 | const int h_rows = h->info.dim[0]; |
87 | 4 | assert(g_rows == indices->info.dim[0]); |
88 | 4 | assert(g_cols == h_cols); |
89 | 4 | assert(indices->info.datatype == CCV_32S); |
90 | 4 | assert(g->info.datatype == h->info.datatype); |
91 | 4 | assert(g->info.datatype == CCV_32F || g->info.datatype == CCV_16F); |
92 | 4 | int i; |
93 | 4 | if (g->info.datatype == CCV_32F) |
94 | 3 | { |
95 | 10 | for (i = 0; i < g_rows; i++7 ) |
96 | 7 | { |
97 | 7 | const int index = indices->data.i32[i]; |
98 | 7 | assert(index < h_rows); |
99 | 7 | float* const hp = h->data.f32 + h_cols_inc * index; |
100 | 7 | float* const gp = g->data.f32 + g_cols_inc * i; |
101 | 11 | parallel_for7 (j, g_cols) { |
102 | 11 | hp[j] += gp[j]; |
103 | 11 | } parallel_endfor |
104 | 7 | } |
105 | 3 | } else { |
106 | 11 | for (i = 0; i < g_rows; i++10 ) |
107 | 10 | { |
108 | 10 | const int index = indices->data.i32[i]; |
109 | 10 | assert(index < h_rows); |
110 | 10 | ccv_float16_t* const hp = h->data.f16 + h_cols_inc * index; |
111 | 10 | ccv_float16_t* const gp = g->data.f16 + g_cols_inc * i; |
112 | 100 | parallel_for10 (j, g_cols) { |
113 | 100 | float t, v; |
114 | 100 | ccv_half_precision_to_float((uint16_t*)gp + j, &t, 1); |
115 | 100 | ccv_half_precision_to_float((uint16_t*)hp + j, &v, 1); |
116 | 100 | v += t; |
117 | 100 | ccv_float_to_half_precision(&v, (uint16_t*)hp + j, 1); |
118 | 100 | } parallel_endfor |
119 | 10 | } |
120 | 1 | } |
121 | 4 | return CCV_NNC_EXEC_SUCCESS; |
122 | 4 | } |
123 | | |
124 | | REGISTER_COMMAND_BACKEND(CCV_NNC_INDEX_SELECT_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
125 | 1 | { |
126 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; |
127 | 1 | registry->tensor_datatypes = CCV_32F | CCV_16F | CCV_32S; |
128 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
129 | 1 | registry->algorithms = 1; |
130 | 1 | registry->exec = _ccv_nnc_index_select_forw; |
131 | 1 | } |
132 | | |
133 | | REGISTER_COMMAND_BACKEND(CCV_NNC_INDEX_SELECT_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
134 | 1 | { |
135 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; |
136 | 1 | registry->tensor_datatypes = CCV_32F | CCV_16F | CCV_32S; |
137 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
138 | 1 | registry->algorithms = 1; |
139 | 1 | registry->exec = _ccv_nnc_index_select_back; |
140 | 1 | } |