/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/pool/ccv_nnc_max_pool_cpu_ref.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #ifdef USE_OPENMP |
7 | | #include <omp.h> |
8 | | #endif |
9 | | #ifdef USE_DISPATCH |
10 | | #include <dispatch/dispatch.h> |
11 | | #endif |
12 | | |
13 | | static int _ccv_nnc_max_pool_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
14 | 326 | { |
15 | 326 | assert(input_size == 1); |
16 | 326 | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0]; |
17 | 326 | assert(output_size == 1); |
18 | 326 | ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0]; |
19 | 326 | const int *dim = cmd.info.size.dim; |
20 | 326 | int i[CCV_NNC_MAX_DIM]; |
21 | 326 | int n[CCV_NNC_MAX_DIM]; |
22 | 326 | int m[CCV_NNC_MAX_DIM]; |
23 | 326 | int j[CCV_NNC_MAX_DIM]; |
24 | 326 | int c; |
25 | 326 | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); |
26 | 326 | assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2); |
27 | 326 | const int* adim = (a_nd == CCV_NNC_MAX_DIM + 1) ? a->info.dim19 : a->info.dim + 1307 ; |
28 | 326 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); |
29 | 326 | assert(b_nd == CCV_NNC_MAX_DIM + 1 || b_nd == CCV_NNC_MAX_DIM + 2); |
30 | 326 | const int* bdim = (b_nd == CCV_NNC_MAX_DIM + 1) ? b->info.dim19 : b->info.dim + 1307 ; |
31 | 326 | float* ap = a->data.f32; |
32 | 326 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
33 | 326 | ccv_nnc_tensor_view_get_stride(a, astride); |
34 | 326 | float* bp = b->data.f32; |
35 | 326 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
36 | 326 | ccv_nnc_tensor_view_get_stride(b, bstride); |
37 | 5.44k | for (i[0] = 0; i[0] < bdim[0]; i[0]++5.12k ) |
38 | 5.12k | { |
39 | 5.12k | SET_BORDER_OFFSET_SIZE_FOR(0, i, hint, dim, adim, n, m); |
40 | 108k | for (i[1] = 0; i[1] < bdim[1]; i[1]++103k ) |
41 | 103k | { |
42 | 103k | SET_BORDER_OFFSET_SIZE_FOR(1, i, hint, dim, adim, n, m); |
43 | 5.25M | for (c = 0; c < bdim[2]; c++5.14M ) |
44 | 5.14M | { |
45 | 5.14M | float* apz = ap + ccv_max(i[1] * hint.stride.dim[1] - hint.border.begin[1], 0) * astride[CCV_NNC_MAX_DIM] + c; |
46 | 5.14M | float v = apz[0]; |
47 | 20.5M | for (j[0] = 0; j[0] < m[0]; j[0]++15.4M ) |
48 | 15.4M | { |
49 | 61.7M | for (j[1] = 0; j[1] < m[1]; j[1]++46.3M ) |
50 | 46.3M | if (apz[j[1] * astride[CCV_NNC_MAX_DIM]] > v) |
51 | 7.41M | v = apz[j[1] * astride[CCV_NNC_MAX_DIM]]; |
52 | 15.4M | apz += astride[CCV_NNC_MAX_DIM - 1]; |
53 | 15.4M | } |
54 | 5.14M | bp[i[1] * bstride[CCV_NNC_MAX_DIM] + c] = v; |
55 | 5.14M | } |
56 | 103k | } |
57 | 5.12k | bp += bstride[CCV_NNC_MAX_DIM - 1]; |
58 | 5.12k | ap += astride[CCV_NNC_MAX_DIM - 1] * (ccv_max((i[0] + 1) * hint.stride.dim[0] - hint.border.begin[0], 0) - ccv_max(i[0] * hint.stride.dim[0] - hint.border.begin[0], 0)); |
59 | 5.12k | } |
60 | 326 | return CCV_NNC_EXEC_SUCCESS; |
61 | 326 | } |
62 | | |
63 | | static int _ccv_nnc_max_pool_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
64 | 369 | { |
65 | 369 | assert(input_size == 3); |
66 | 369 | const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0]; // gradients |
67 | 369 | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[1]; |
68 | 369 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[2]; |
69 | 369 | assert(output_size == 1); |
70 | 369 | ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0]; |
71 | 369 | const int *dim = cmd.info.size.dim; |
72 | 369 | int i[CCV_NNC_MAX_DIM]; |
73 | 369 | int n[CCV_NNC_MAX_DIM]; |
74 | 369 | int m[CCV_NNC_MAX_DIM]; |
75 | 369 | int j[CCV_NNC_MAX_DIM]; |
76 | 369 | int c; |
77 | 369 | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); |
78 | 369 | assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2); |
79 | 369 | const int* adim = (a_nd == CCV_NNC_MAX_DIM + 1) ? a->info.dim2 : a->info.dim + 1367 ; |
80 | 369 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); |
81 | 369 | assert(b_nd == CCV_NNC_MAX_DIM + 1 || b_nd == CCV_NNC_MAX_DIM + 2); |
82 | 369 | const int* bdim = (b_nd == CCV_NNC_MAX_DIM + 1) ? b->info.dim2 : b->info.dim + 1367 ; |
83 | 369 | const int g_nd = ccv_nnc_tensor_nd(g->info.dim); |
84 | 369 | assert(g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2); |
85 | 369 | const int h_nd = ccv_nnc_tensor_nd(h->info.dim); |
86 | 369 | assert(h_nd == CCV_NNC_MAX_DIM + 1 || h_nd == CCV_NNC_MAX_DIM + 2); |
87 | 369 | float* ap = a->data.f32; |
88 | 369 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
89 | 369 | ccv_nnc_tensor_view_get_stride(a, astride); |
90 | 369 | float* bp = b->data.f32; |
91 | 369 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
92 | 369 | ccv_nnc_tensor_view_get_stride(b, bstride); |
93 | 369 | float* gp = g->data.f32; |
94 | 369 | int gstride[CCV_NNC_MAX_DIM_ALLOC]; |
95 | 369 | ccv_nnc_tensor_view_get_stride(g, gstride); |
96 | 369 | float* hp = h->data.f32; |
97 | 369 | int hstride[CCV_NNC_MAX_DIM_ALLOC]; |
98 | 369 | ccv_nnc_tensor_view_get_stride(h, hstride); |
99 | 1.84k | for (c = 0; c < CCV_NNC_MAX_DIM_ALLOC; c++1.47k ) |
100 | 1.84k | { |
101 | 1.84k | assert(a->info.dim[c] == h->info.dim[c]); |
102 | 1.84k | if (a->info.dim[c] == 0 || h->info.dim[c] == 01.47k ) |
103 | 369 | break; |
104 | 1.84k | } |
105 | 1.84k | for (c = 0; 369 c < CCV_NNC_MAX_DIM_ALLOC; c++1.47k ) |
106 | 1.84k | { |
107 | 1.84k | assert(b->info.dim[c] == g->info.dim[c]); |
108 | 1.84k | if (b->info.dim[c] == 0 || g->info.dim[c] == 01.47k ) |
109 | 369 | break; |
110 | 1.84k | } |
111 | 369 | ccv_nnc_tensor_zero(h); |
112 | | // Using b->info.dim and a->info.dim directly because they equal to g->info.dim and h->info.dim |
113 | 5.88k | for (i[0] = 0; i[0] < bdim[0]; i[0]++5.51k ) |
114 | 5.51k | { |
115 | 5.51k | SET_BORDER_OFFSET_SIZE_FOR(0, i, hint, dim, adim, n, m); |
116 | 88.1k | for (i[1] = 0; i[1] < bdim[1]; i[1]++82.5k ) |
117 | 82.5k | { |
118 | 82.5k | SET_BORDER_OFFSET_SIZE_FOR(1, i, hint, dim, adim, n, m); |
119 | 2.72M | for (c = 0; c < bdim[CCV_NNC_MAX_DIM]; c++2.64M ) |
120 | 2.64M | { |
121 | 2.64M | float* apz = ap + ccv_max(i[1] * hint.stride.dim[1] - hint.border.begin[1], 0) * astride[CCV_NNC_MAX_DIM] + c; |
122 | 2.64M | float* hpz = hp + ccv_max(i[1] * hint.stride.dim[1] - hint.border.begin[1], 0) * hstride[CCV_NNC_MAX_DIM] + c; |
123 | 2.64M | float v = bp[i[1] * bstride[CCV_NNC_MAX_DIM] + c]; |
124 | 2.64M | float u = gp[i[1] * gstride[CCV_NNC_MAX_DIM] + c]; |
125 | 10.5M | for (j[0] = 0; j[0] < m[0]; j[0]++7.92M ) |
126 | 7.92M | { |
127 | 31.7M | for (j[1] = 0; j[1] < m[1]; j[1]++23.7M ) |
128 | 23.7M | if (apz[j[1] * astride[CCV_NNC_MAX_DIM]] == v) |
129 | 2.67M | hpz[j[1] * hstride[CCV_NNC_MAX_DIM]] += u; |
130 | 7.92M | apz += astride[CCV_NNC_MAX_DIM - 1]; |
131 | 7.92M | hpz += hstride[CCV_NNC_MAX_DIM - 1]; |
132 | 7.92M | } |
133 | 2.64M | } |
134 | 82.5k | } |
135 | 5.51k | gp += gstride[CCV_NNC_MAX_DIM - 1]; |
136 | 5.51k | bp += bstride[CCV_NNC_MAX_DIM - 1]; |
137 | 5.51k | ap += astride[CCV_NNC_MAX_DIM - 1] * (ccv_max((i[0] + 1) * hint.stride.dim[0] - hint.border.begin[0], 0) - ccv_max(i[0] * hint.stride.dim[0] - hint.border.begin[0], 0)); |
138 | 5.51k | hp += hstride[CCV_NNC_MAX_DIM - 1] * (ccv_max((i[0] + 1) * hint.stride.dim[0] - hint.border.begin[0], 0) - ccv_max(i[0] * hint.stride.dim[0] - hint.border.begin[0], 0)); |
139 | 5.51k | } |
140 | 369 | return CCV_NNC_EXEC_SUCCESS; |
141 | 369 | } |
142 | | |
143 | | REGISTER_COMMAND_BACKEND(CCV_NNC_MAX_POOL_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
144 | 1 | { |
145 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; |
146 | 1 | registry->tensor_datatypes = CCV_32F; |
147 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
148 | 1 | registry->algorithms = 1; |
149 | 1 | registry->exec = _ccv_nnc_max_pool_forw; |
150 | 1 | } |
151 | | |
152 | | REGISTER_COMMAND_BACKEND(CCV_NNC_MAX_POOL_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
153 | 1 | { |
154 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; |
155 | 1 | registry->tensor_datatypes = CCV_32F; |
156 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
157 | 1 | registry->algorithms = 1; |
158 | 1 | registry->exec = _ccv_nnc_max_pool_back; |
159 | 1 | } |