/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/pool/ccv_nnc_avg_pool_cpu_ref.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #ifdef USE_OPENMP |
7 | | #include <omp.h> |
8 | | #endif |
9 | | #ifdef USE_DISPATCH |
10 | | #include <dispatch/dispatch.h> |
11 | | #endif |
12 | | |
13 | | static int _ccv_nnc_avg_pool_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
14 | 631 | { |
15 | 631 | assert(input_size == 1); |
16 | 631 | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0]; |
17 | 631 | assert(output_size == 1); |
18 | 631 | ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0]; |
19 | 631 | const int *dim = cmd.info.size.dim; |
20 | 631 | int i[CCV_NNC_MAX_DIM]; |
21 | 631 | int n[CCV_NNC_MAX_DIM]; |
22 | 631 | int m[CCV_NNC_MAX_DIM]; |
23 | 631 | int j[CCV_NNC_MAX_DIM]; |
24 | 631 | int c; |
25 | 631 | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); |
26 | 631 | assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2); |
27 | 631 | const int* adim = (a_nd == CCV_NNC_MAX_DIM + 1) ? a->info.dim16 : a->info.dim + 1615 ; |
28 | 631 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); |
29 | 631 | assert(b_nd == CCV_NNC_MAX_DIM + 1 || b_nd == CCV_NNC_MAX_DIM + 2); |
30 | 631 | const int* bdim = (b_nd == CCV_NNC_MAX_DIM + 1) ? b->info.dim17 : b->info.dim + 1614 ; |
31 | 631 | float* ap = a->data.f32; |
32 | 631 | int astride[CCV_NNC_MAX_DIM_ALLOC]; |
33 | 631 | ccv_nnc_tensor_view_get_stride(a, astride); |
34 | 631 | float* bp = b->data.f32; |
35 | 631 | int bstride[CCV_NNC_MAX_DIM_ALLOC]; |
36 | 631 | ccv_nnc_tensor_view_get_stride(b, bstride); |
37 | 3.83k | for (i[0] = 0; i[0] < bdim[0]; i[0]++3.20k ) |
38 | 3.20k | { |
39 | 3.20k | SET_BORDER_OFFSET_SIZE_FOR(0, i, hint, dim, adim, n, m); |
40 | 23.1k | for (i[1] = 0; i[1] < bdim[1]; i[1]++19.9k ) |
41 | 19.9k | { |
42 | 19.9k | SET_BORDER_OFFSET_SIZE_FOR(1, i, hint, dim, adim, n, m); |
43 | 681k | for (c = 0; c < bdim[CCV_NNC_MAX_DIM]; c++661k ) |
44 | 661k | { |
45 | 661k | float* apz = ap + ccv_max(i[1] * hint.stride.dim[1] - hint.border.begin[1], 0) * astride[CCV_NNC_MAX_DIM] + c; |
46 | 661k | float v = 0; |
47 | 2.64M | for (j[0] = 0; j[0] < m[0]; j[0]++1.98M ) |
48 | 1.98M | { |
49 | 7.93M | for (j[1] = 0; j[1] < m[1]; j[1]++5.94M ) |
50 | 5.94M | v += apz[j[1] * astride[CCV_NNC_MAX_DIM]]; |
51 | 1.98M | apz += astride[CCV_NNC_MAX_DIM - 1]; |
52 | 1.98M | } |
53 | 661k | bp[i[1] * bstride[CCV_NNC_MAX_DIM] + c] = v / (m[0] * m[1]); |
54 | 661k | } |
55 | 19.9k | } |
56 | 3.20k | bp += bstride[CCV_NNC_MAX_DIM - 1]; |
57 | 3.20k | ap += astride[CCV_NNC_MAX_DIM - 1] * (ccv_max((i[0] + 1) * hint.stride.dim[0] - hint.border.begin[0], 0) - ccv_max(i[0] * hint.stride.dim[0] - hint.border.begin[0], 0)); |
58 | 3.20k | } |
59 | 631 | return CCV_NNC_EXEC_SUCCESS; |
60 | 631 | } |
61 | | |
62 | | static int _ccv_nnc_avg_pool_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
63 | 738 | { |
64 | 738 | assert(input_size >= 1); |
65 | 738 | const ccv_nnc_tensor_view_t* g = (ccv_nnc_tensor_view_t*)inputs[0]; |
66 | 738 | assert(output_size == 1); |
67 | 738 | ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0]; |
68 | 738 | const int *dim = cmd.info.size.dim; |
69 | 738 | int i[CCV_NNC_MAX_DIM]; |
70 | 738 | int n[CCV_NNC_MAX_DIM]; |
71 | 738 | int m[CCV_NNC_MAX_DIM]; |
72 | 738 | int j[CCV_NNC_MAX_DIM]; |
73 | 738 | int c; |
74 | 738 | const int g_nd = ccv_nnc_tensor_nd(g->info.dim); |
75 | 738 | assert(g_nd == CCV_NNC_MAX_DIM + 1 || g_nd == CCV_NNC_MAX_DIM + 2); |
76 | 738 | const int* gdim = (g_nd == CCV_NNC_MAX_DIM + 1) ? g->info.dim4 : g->info.dim + 1734 ; |
77 | 738 | const int h_nd = ccv_nnc_tensor_nd(h->info.dim); |
78 | 738 | assert(h_nd == CCV_NNC_MAX_DIM + 1 || h_nd == CCV_NNC_MAX_DIM + 2); |
79 | 738 | const int* hdim = (h_nd == CCV_NNC_MAX_DIM + 1) ? h->info.dim4 : h->info.dim + 1734 ; |
80 | 738 | float* gp = g->data.f32; |
81 | 738 | int gstride[CCV_NNC_MAX_DIM_ALLOC]; |
82 | 738 | ccv_nnc_tensor_view_get_stride(g, gstride); |
83 | 738 | float* hp = h->data.f32; |
84 | 738 | int hstride[CCV_NNC_MAX_DIM_ALLOC]; |
85 | 738 | ccv_nnc_tensor_view_get_stride(h, hstride); |
86 | 738 | ccv_nnc_tensor_zero(h); |
87 | 4.42k | for (i[0] = 0; i[0] < gdim[0]; i[0]++3.68k ) |
88 | 3.68k | { |
89 | 3.68k | SET_BORDER_OFFSET_SIZE_FOR(0, i, hint, dim, hdim, n, m); |
90 | 25.0k | for (i[1] = 0; i[1] < gdim[1]; i[1]++21.3k ) |
91 | 21.3k | { |
92 | 21.3k | SET_BORDER_OFFSET_SIZE_FOR(1, i, hint, dim, hdim, n, m); |
93 | 808k | for (c = 0; c < gdim[CCV_NNC_MAX_DIM]; c++787k ) |
94 | 787k | { |
95 | 787k | float* hpz = hp + ccv_max(i[1] * hint.stride.dim[1] - hint.border.begin[1], 0) * hstride[CCV_NNC_MAX_DIM] + c; |
96 | 787k | float u = gp[i[1] * gstride[CCV_NNC_MAX_DIM] + c] / (m[0] * m[1]); |
97 | 3.14M | for (j[0] = 0; j[0] < m[0]; j[0]++2.36M ) |
98 | 2.36M | { |
99 | 9.44M | for (j[1] = 0; j[1] < m[1]; j[1]++7.08M ) |
100 | 7.08M | hpz[j[1] * hstride[CCV_NNC_MAX_DIM]] += u; |
101 | 2.36M | hpz += hstride[CCV_NNC_MAX_DIM - 1]; |
102 | 2.36M | } |
103 | 787k | } |
104 | 21.3k | } |
105 | 3.68k | gp += gstride[CCV_NNC_MAX_DIM - 1]; |
106 | 3.68k | hp += hstride[CCV_NNC_MAX_DIM - 1] * (ccv_max((i[0] + 1) * hint.stride.dim[0] - hint.border.begin[0], 0) - ccv_max(i[0] * hint.stride.dim[0] - hint.border.begin[0], 0)); |
107 | 3.68k | } |
108 | 738 | return CCV_NNC_EXEC_SUCCESS; |
109 | 738 | } |
110 | | |
111 | | REGISTER_COMMAND_BACKEND(CCV_NNC_AVERAGE_POOL_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
112 | 1 | { |
113 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; |
114 | 1 | registry->tensor_datatypes = CCV_32F; |
115 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
116 | 1 | registry->algorithms = 1; |
117 | 1 | registry->exec = _ccv_nnc_avg_pool_forw; |
118 | 1 | } |
119 | | |
120 | | REGISTER_COMMAND_BACKEND(CCV_NNC_AVERAGE_POOL_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
121 | 1 | { |
122 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; |
123 | 1 | registry->tensor_datatypes = CCV_32F; |
124 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
125 | 1 | registry->algorithms = 1; |
126 | 1 | registry->exec = _ccv_nnc_avg_pool_back; |
127 | 1 | } |