/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/convolution/ccv_nnc_conv_cpu_opt.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | |
7 | | #include "_ccv_nnc_conv_cpu_opt.h" |
8 | | |
9 | | FIND_FILE(cpu_opt/_ccv_nnc_conv_cpu_4x4_3x3_winograd.c, cpu_opt/_ccv_nnc_conv_cpu_fft.c, cpu_opt/_ccv_nnc_conv_cpu_gemm.c, cpu_opt/_ccv_nnc_conv_cpu_opt.c) |
10 | | |
11 | | enum { |
12 | | CCV_NNC_CMD_OPT_CONV_ALGO_DC, // Direct convolution |
13 | | CCV_NNC_CMD_OPT_CONV_ALGO_GEMM, // GEMM (for 1x1) |
14 | | CCV_NNC_CMD_OPT_CONV_ALGO_WINOGRAD, // Winograd algorithm |
15 | | CCV_NNC_CMD_OPT_CONV_ALGO_FFT, // Fast Fourier transform |
16 | | CCV_NNC_CMD_OPT_CONV_ALGO_COUNT |
17 | | }; |
18 | | |
19 | | static int _ccv_nnc_conv_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
20 | 1.66k | { |
21 | 1.66k | assert(input_size >= 2); |
22 | 1.66k | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0]; |
23 | 1.66k | const ccv_nnc_tensor_t* w = inputs[1]; |
24 | 1.66k | assert(CCV_IS_TENSOR_CONTIGUOUS(w)); |
25 | 1.66k | const ccv_nnc_tensor_t* bias = input_size > 2 ? inputs[2]1.66k : 01 ; |
26 | 1.66k | assert(!bias || !CCV_IS_TENSOR_VIEW(bias)); |
27 | 1.66k | assert(output_size == 1); |
28 | 1.66k | ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0]; |
29 | 1.66k | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); |
30 | 1.66k | assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2); |
31 | 1.66k | const int* adim = (a_nd == CCV_NNC_MAX_DIM + 1) ? a->info.dim490 : a->info.dim + 11.17k ; |
32 | 1.66k | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); |
33 | 1.66k | assert(b_nd == CCV_NNC_MAX_DIM + 1 || b_nd == CCV_NNC_MAX_DIM + 2); |
34 | 1.66k | const int* bdim = (b_nd == CCV_NNC_MAX_DIM + 1) ? b->info.dim490 : b->info.dim + 11.17k ; |
35 | 1.66k | assert(w->info.dim[CCV_NNC_MAX_DIM + 1] == adim[CCV_NNC_MAX_DIM]); |
36 | 1.66k | assert(bdim[CCV_NNC_MAX_DIM] == cmd.info.convolution.count); |
37 | 1.66k | if (cmd.info.convolution.groups != 1) |
38 | 0 | return CCV_NNC_EXEC_INVALID; |
39 | 1.66k | if (cmd.info.convolution.dilation[0] > 1 || cmd.info.convolution.dilation[1] > 1) |
40 | 0 | return CCV_NNC_EXEC_INVALID; |
41 | 1.66k | int i; |
42 | | // Make sure the weights dimension matches the network dimension |
43 | 5.44k | for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC; i++3.77k ) |
44 | 5.44k | { |
45 | 5.44k | if (w->info.dim[i] == 0 || cmd.info.size.dim[i - 1] == 04.98k ) |
46 | 1.66k | break; |
47 | 3.77k | assert(w->info.dim[i] == cmd.info.size.dim[i - 1]); |
48 | 3.77k | } |
49 | 1.66k | switch (cmd.algorithm) |
50 | 1.66k | { |
51 | 1.14k | case CCV_NNC_CMD_OPT_CONV_ALGO_DC: |
52 | 1.14k | return _ccv_nnc_conv_forw_cpu_opt(a, w, bias, hint, b); |
53 | 169 | case CCV_NNC_CMD_OPT_CONV_ALGO_GEMM: |
54 | 169 | if (w->info.dim[1] == 1 && w->info.dim[2] == 14 && hint.stride.dim[0] <= 14 && hint.stride.dim[1] <= 14 && |
55 | 169 | hint.border.begin[0] == 04 && hint.border.begin[1] == 04 && hint.border.end[0] == 04 && hint.border.end[1] == 04 && |
56 | 169 | !4 CCV_IS_TENSOR_VIEW4 (a) && !4 CCV_IS_TENSOR_VIEW4 (b) && !4 CCV_IS_TENSOR_VIEW4 (w) && (4 !bias4 || !4 CCV_IS_TENSOR_VIEW4 (bias))) |
57 | 4 | return _ccv_nnc_conv_forw_gemm_cpu_opt(a, w, bias, hint, b); |
58 | 165 | return CCV_NNC_EXEC_INVALID; |
59 | 185 | case CCV_NNC_CMD_OPT_CONV_ALGO_WINOGRAD: |
60 | 185 | if (w->info.dim[1] == 3 && w->info.dim[2] == 3119 && hint.stride.dim[0] <= 1119 && hint.stride.dim[1] <= 1119 ) |
61 | 119 | return _ccv_nnc_conv_forw_4x4_3x3_winograd_cpu_opt(a, w, bias, hint, b, stream_context); |
62 | 66 | return CCV_NNC_EXEC_INVALID; |
63 | 168 | case CCV_NNC_CMD_OPT_CONV_ALGO_FFT: |
64 | 168 | return CCV_NNC_EXEC_INVALID; // Placeholder, for fft. |
65 | 0 | case -1: |
66 | | // Pass-through |
67 | 0 | break; |
68 | 1.66k | } |
69 | | // If the size is 3x3, and no stride, choose Winograd kernel |
70 | 0 | if (w->info.dim[1] == 3 && w->info.dim[2] == 3 && hint.stride.dim[0] <= 1 && hint.stride.dim[1] <= 1) |
71 | 0 | return _ccv_nnc_conv_forw_4x4_3x3_winograd_cpu_opt(a, w, bias, hint, b, stream_context); |
72 | | // If the size is 1x1, and no stride, and not a tensor view object, no padding, choose GEMM kernel |
73 | 0 | if (w->info.dim[1] == 1 && w->info.dim[2] == 1 && hint.stride.dim[0] <= 1 && hint.stride.dim[1] <= 1 && |
74 | 0 | hint.border.begin[0] == 0 && hint.border.begin[1] == 0 && hint.border.end[0] == 0 && hint.border.end[1] == 0 && |
75 | 0 | !CCV_IS_TENSOR_VIEW(a) && !CCV_IS_TENSOR_VIEW(b) && !CCV_IS_TENSOR_VIEW(w) && (!bias || !CCV_IS_TENSOR_VIEW(bias))) |
76 | 0 | return _ccv_nnc_conv_forw_gemm_cpu_opt(a, w, bias, hint, b); |
77 | | // Otherwise, use direct convolution kernel |
78 | 0 | return _ccv_nnc_conv_forw_cpu_opt(a, w, bias, hint, b); |
79 | 0 | } |
80 | | |
81 | | REGISTER_COMMAND_BACKEND(CCV_NNC_CONVOLUTION_FORWARD, CCV_NNC_BACKEND_CPU_OPT)(ccv_nnc_cmd_backend_registry_t* const registry) |
82 | 1 | { |
83 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC; |
84 | 1 | registry->tensor_datatypes = CCV_32F; |
85 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
86 | 1 | registry->algorithms = CCV_NNC_CMD_OPT_CONV_ALGO_COUNT; |
87 | 1 | registry->exec = _ccv_nnc_conv_forw; |
88 | 1 | } |