/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/grid_sample/ccv_nnc_grid_sample_cpu_ref.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #include <math.h> |
7 | | |
8 | | static int _ccv_nnc_grid_sample_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
9 | 0 | { |
10 | 0 | assert(input_size == 2); |
11 | 0 | assert(output_size == 1); |
12 | 0 | const ccv_nnc_tensor_view_t* const a = (const ccv_nnc_tensor_view_t*)inputs[0]; |
13 | 0 | const ccv_nnc_tensor_view_t* const grid = (const ccv_nnc_tensor_view_t*)inputs[1]; |
14 | 0 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; |
15 | 0 | assert(a->info.datatype == CCV_32F); |
16 | 0 | assert(grid->info.datatype == CCV_32F); |
17 | 0 | assert(b->info.datatype == CCV_32F); |
18 | 0 | assert(a->info.format == b->info.format); |
19 | 0 | assert(a->info.format == CCV_TENSOR_FORMAT_NCHW || a->info.format == CCV_TENSOR_FORMAT_NHWC); |
20 | 0 | assert(grid->info.format == CCV_TENSOR_FORMAT_NHWC); |
21 | 0 | int adim[CCV_NNC_MAX_DIM_ALLOC]; |
22 | 0 | int bdim[CCV_NNC_MAX_DIM_ALLOC]; |
23 | 0 | int griddim[CCV_NNC_MAX_DIM_ALLOC]; |
24 | 0 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
25 | 0 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
26 | 0 | int gridstride[CCV_NNC_MAX_DIM_ALLOC]; |
27 | 0 | ccv_nnc_tensor_view_get_dim(a, adim); |
28 | 0 | ccv_nnc_tensor_view_get_dim(b, bdim); |
29 | 0 | ccv_nnc_tensor_view_get_dim(grid, griddim); |
30 | 0 | ccv_nnc_tensor_view_get_stride(a, astride); |
31 | 0 | ccv_nnc_tensor_view_get_stride(b, bstride); |
32 | 0 | ccv_nnc_tensor_view_get_stride(grid, gridstride); |
33 | |
|
34 | 0 | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); |
35 | 0 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); |
36 | 0 | const int grid_nd = ccv_nnc_tensor_nd(grid->info.dim); |
37 | 0 | assert(a_nd == 3 || a_nd == 4); |
38 | 0 | assert(b_nd == 3 || b_nd == 4); |
39 | 0 | assert(grid_nd == 3 || grid_nd == 4); |
40 | | |
41 | 0 | const int ahw = ccv_nnc_tensor_hw(a->info, a_nd, CCV_NNC_MAX_DIM); |
42 | 0 | const int bhw = ccv_nnc_tensor_hw(b->info, b_nd, CCV_NNC_MAX_DIM); |
43 | 0 | const int ghw = ccv_nnc_tensor_hw(grid->info, grid_nd, CCV_NNC_MAX_DIM); |
44 | 0 | assert(ahw >= 0); |
45 | 0 | assert(bhw >= 0); |
46 | 0 | assert(ghw >= 0); |
47 | | |
48 | 0 | const int N = ccv_nnc_tensor_get_n(a->info); |
49 | 0 | const int C = ccv_nnc_tensor_get_c(a->info); |
50 | 0 | const int H_in = adim[ahw]; |
51 | 0 | const int W_in = adim[ahw + 1]; |
52 | 0 | const int N_out = ccv_nnc_tensor_get_n(b->info); |
53 | 0 | const int C_out = ccv_nnc_tensor_get_c(b->info); |
54 | 0 | const int H_out = bdim[bhw]; |
55 | 0 | const int W_out = bdim[bhw + 1]; |
56 | 0 | assert(N_out == N); |
57 | 0 | assert(C_out == C); |
58 | 0 | const int N_grid = ccv_nnc_tensor_get_n(grid->info); |
59 | 0 | assert(griddim[ghw + 2] == 2); |
60 | 0 | assert(N_grid == N); |
61 | 0 | assert(griddim[ghw] == H_out); |
62 | 0 | assert(griddim[ghw + 1] == W_out); |
63 | | |
64 | 0 | const int align_corners = cmd.info.grid_sample.align_corners; |
65 | 0 | const float* const ap = a->data.f32; |
66 | 0 | const float* const gridp = grid->data.f32; |
67 | 0 | float* const bp = b->data.f32; |
68 | 0 | const int anstride = (a_nd == CCV_NNC_MAX_DIM + 2) ? astride[0] : 0; |
69 | 0 | const int ahstride = astride[ahw]; |
70 | 0 | const int awstride = astride[ahw + 1]; |
71 | 0 | const int acstride = (a->info.format == CCV_TENSOR_FORMAT_NCHW) ? astride[ahw - 1] : astride[ahw + 2]; |
72 | 0 | const int bnstride = (b_nd == CCV_NNC_MAX_DIM + 2) ? bstride[0] : 0; |
73 | 0 | const int bhstride = bstride[bhw]; |
74 | 0 | const int bwstride = bstride[bhw + 1]; |
75 | 0 | const int bcstride = (b->info.format == CCV_TENSOR_FORMAT_NCHW) ? bstride[bhw - 1] : bstride[bhw + 2]; |
76 | 0 | const int gnstride = (grid_nd == CCV_NNC_MAX_DIM + 2) ? gridstride[0] : 0; |
77 | 0 | const int ghstride = gridstride[ghw]; |
78 | 0 | const int gwstride = gridstride[ghw + 1]; |
79 | 0 | const int gcstride = gridstride[ghw + 2]; |
80 | |
|
81 | 0 | for (int n = 0; n < N; n++) |
82 | 0 | for (int y = 0; y < H_out; y++) |
83 | 0 | for (int x = 0; x < W_out; x++) |
84 | 0 | { |
85 | 0 | const int grid_offset = n * gnstride + y * ghstride + x * gwstride; |
86 | 0 | const float gx = gridp[grid_offset]; |
87 | 0 | const float gy = gridp[grid_offset + gcstride]; |
88 | 0 | const float ix = align_corners ? (gx + 1) * (W_in - 1) * 0.5f : ((gx + 1) * W_in - 1) * 0.5f; |
89 | 0 | const float iy = align_corners ? (gy + 1) * (H_in - 1) * 0.5f : ((gy + 1) * H_in - 1) * 0.5f; |
90 | 0 | const int x0 = (int)floorf(ix); |
91 | 0 | const int y0 = (int)floorf(iy); |
92 | 0 | const int x1 = x0 + 1; |
93 | 0 | const int y1 = y0 + 1; |
94 | 0 | const float wx1 = ix - x0; |
95 | 0 | const float wy1 = iy - y0; |
96 | 0 | const float wx0 = 1.0f - wx1; |
97 | 0 | const float wy0 = 1.0f - wy1; |
98 | 0 | const int a_offset_nc = n * anstride; |
99 | 0 | const int b_offset_nc = n * bnstride; |
100 | 0 | for (int c = 0; c < C; c++) |
101 | 0 | { |
102 | 0 | float v00 = 0, v01 = 0, v10 = 0, v11 = 0; |
103 | 0 | const int a_offset_ncc = a_offset_nc + c * acstride; |
104 | 0 | if (y0 >= 0 && y0 < H_in && x0 >= 0 && x0 < W_in) |
105 | 0 | v00 = ap[a_offset_ncc + y0 * ahstride + x0 * awstride]; |
106 | 0 | if (y0 >= 0 && y0 < H_in && x1 >= 0 && x1 < W_in) |
107 | 0 | v01 = ap[a_offset_ncc + y0 * ahstride + x1 * awstride]; |
108 | 0 | if (y1 >= 0 && y1 < H_in && x0 >= 0 && x0 < W_in) |
109 | 0 | v10 = ap[a_offset_ncc + y1 * ahstride + x0 * awstride]; |
110 | 0 | if (y1 >= 0 && y1 < H_in && x1 >= 0 && x1 < W_in) |
111 | 0 | v11 = ap[a_offset_ncc + y1 * ahstride + x1 * awstride]; |
112 | 0 | const float v = v00 * wy0 * wx0 + v01 * wy0 * wx1 + v10 * wy1 * wx0 + v11 * wy1 * wx1; |
113 | 0 | bp[b_offset_nc + c * bcstride + y * bhstride + x * bwstride] = v; |
114 | 0 | } |
115 | 0 | } |
116 | 0 | return CCV_NNC_EXEC_SUCCESS; |
117 | 0 | } |
118 | | |
119 | | REGISTER_COMMAND_BACKEND(CCV_NNC_GRID_SAMPLE_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
120 | 1 | { |
121 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC; |
122 | 1 | registry->tensor_datatypes = CCV_32F; |
123 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
124 | 1 | registry->algorithms = 1; |
125 | 1 | registry->exec = _ccv_nnc_grid_sample_forw; |
126 | 1 | } |