/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/convolution/ccv_nnc_convolution.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "nnc/ccv_nnc.h" |
3 | | #include "nnc/ccv_nnc_easy.h" |
4 | | #include "nnc/ccv_nnc_internal.h" |
5 | | |
6 | | static int _ccv_nnc_conv_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
7 | 119 | { |
8 | 119 | if (input_size == 3 && (input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2))109 && output_bitmasks[0] == 1u109 ) |
9 | 109 | return 1; |
10 | | // Ignore bias. |
11 | 10 | if (input_size == 2 && (input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1)) && output_bitmasks[0] == 1u) |
12 | 10 | return 1; |
13 | 0 | return 0; |
14 | 10 | } |
15 | | |
16 | | static int _ccv_nnc_conv_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
17 | 451 | { |
18 | | // Output the propagated error, gradient w.r.t. w and bias. |
19 | 451 | if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2) | (0 << 3)) && output_bitmasks[0] == ((1u << 0) | (1u << 1) | (1u << 2))138 ) |
20 | 124 | return 1; |
21 | | // Ignore bias. |
22 | 327 | if ((input_bitmasks[0] & 7u) == ((1u << 0) | (1u << 1) | (1u << 2) | (0 << 3)) && output_bitmasks[0] == ((1u << 0) | (1u << 1) | (0 << 2))14 ) |
23 | 0 | return 1; |
24 | | // Don't propagate error, only gradient w.r.t. w and bias. |
25 | 327 | if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1) | (0 << 2) | (0 << 3)) && output_bitmasks[0] == ((0 << 0) | (1u << 1) | (1u << 2))124 ) |
26 | 48 | return 1; |
27 | | // Ignore bias. |
28 | 279 | if ((input_bitmasks[0] & 3u) == ((1u << 0) | (1u << 1) | (0 << 2) | (0 << 3)) && output_bitmasks[0] == ((0 << 0) | (1u << 1) | (0 << 2))76 ) |
29 | 0 | return 1; |
30 | | // Ignore weight. |
31 | 279 | if ((input_bitmasks[0] & 5u) == ((1u << 0) | (0 << 1) | (1u << 2) | (0 << 3)) && output_bitmasks[0] == ((1u << 0) | (0 << 1) | (1u << 2))91 ) |
32 | 1 | return 1; |
33 | | // Ignore bias and weight. |
34 | 278 | if ((input_bitmasks[0] & 5u) == ((1u << 0) | (0 << 1) | (1u << 2) | (0 << 3)) && output_bitmasks[0] == ((1u << 0) | (0 << 1) | (0 << 2))90 ) |
35 | 4 | return 1; |
36 | 274 | return 0; |
37 | 278 | } |
38 | | |
39 | | static void _ccv_nnc_conv_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* outputs, const int output_size) |
40 | 2.19k | { |
41 | 2.19k | assert(output_size == 1); |
42 | 2.19k | outputs[0].type = inputs[0].type; |
43 | 2.19k | outputs[0].format = inputs[0].format; |
44 | 2.19k | outputs[0].datatype = inputs[0].datatype; |
45 | | // Get the channel output from the weight matrix. |
46 | 2.19k | const int count = ccv_nnc_tensor_get_n(inputs[1]); |
47 | 2.19k | assert(count == cmd.convolution.count); |
48 | 2.19k | ccv_nnc_tensor_set_c(outputs, ccv_nnc_tensor_nd(inputs[0].dim), count); |
49 | 2.19k | ccv_nnc_tensor_set_n(outputs, ccv_nnc_tensor_get_n(inputs[0])); |
50 | 2.19k | ccv_nnc_cmd_param_t modified_cmd = cmd; |
51 | 2.19k | int i = 0; |
52 | 2.19k | const int size_nd = ccv_nnc_tensor_nd(cmd.size.dim) - 1; |
53 | 2.19k | assert(size_nd == 2 || size_nd == 3); // Support 3D convolution. |
54 | 6.57k | for (i = 0; 2.19k i < size_nd; i++4.38k ) |
55 | 4.38k | modified_cmd.size.dim[i] = (modified_cmd.size.dim[i] - 1) * ccv_max(cmd.convolution.dilation[i], 1) + 1; |
56 | 2.19k | ccv_nnc_hint_tensor_forward(modified_cmd, inputs[0], hint, outputs); |
57 | 2.19k | } |
58 | | |
59 | | REGISTER_COMMAND(CCV_NNC_CONVOLUTION_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
60 | | FIND_BACKEND(ccv_nnc_conv_cpu_ref.c, ccv_nnc_conv_cpu_opt.c, gpu/ccv_nnc_conv_gpu_cudnn.cu, mps/ccv_nnc_conv_mps.m) |
61 | 1 | { |
62 | 1 | registry->bitmask = _ccv_nnc_conv_forw_bitmask; |
63 | 1 | registry->tensor_auto = _ccv_nnc_conv_tensor_auto_forw; |
64 | 1 | } |
65 | | |
66 | | REGISTER_COMMAND(CCV_NNC_CONVOLUTION_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
67 | | FIND_BACKEND(ccv_nnc_conv_cpu_ref.c, ccv_nnc_conv_cpu_opt.c, gpu/ccv_nnc_conv_gpu_cudnn.cu, mps/ccv_nnc_conv_mps.m) |
68 | 1 | { |
69 | 1 | registry->bitmask = _ccv_nnc_conv_back_bitmask; |
70 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs; |
71 | 1 | } |
72 | | |
73 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_CONVOLUTION_FORWARD) |
74 | | #define CMD_CONVOLUTION_FORWARD(_groups, _count, ...) ccv_nnc_cmd(CCV_NNC_CONVOLUTION_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={__VA_ARGS__}},.convolution={.count=_count,.groups=_groups}}), 0) |
75 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_CONVOLUTION_BACKWARD) |
76 | | #define CMD_CONVOLUTION_BACKWARD(_groups, _count, ...) ccv_nnc_cmd(CCV_NNC_CONVOLUTION_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={__VA_ARGS__}},.convolution={.count=_count,.groups=_groups}}), 0) |
77 | | |
78 | | static void _ccv_nnc_conv_transpose_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* outputs, const int output_size) |
79 | 0 | { |
80 | 0 | assert(output_size == 1); |
81 | 0 | outputs[0].type = inputs[0].type; |
82 | 0 | outputs[0].format = inputs[0].format; |
83 | 0 | outputs[0].datatype = inputs[0].datatype; |
84 | | // Get the channel output from the weight matrix. |
85 | 0 | const int count = ccv_nnc_tensor_get_c(inputs[1]); |
86 | 0 | assert(count == cmd.convolution_transpose.count); |
87 | 0 | ccv_nnc_tensor_set_c(outputs, ccv_nnc_tensor_nd(inputs[0].dim), count); |
88 | 0 | ccv_nnc_tensor_set_n(outputs, ccv_nnc_tensor_get_n(inputs[0])); |
89 | 0 | ccv_nnc_cmd_param_t modified_cmd = cmd; |
90 | 0 | int i = 0; |
91 | 0 | const int size_nd = ccv_nnc_tensor_nd(cmd.size.dim) - 1; |
92 | 0 | assert(size_nd == 2 || size_nd == 3); // Support 3D convolution. |
93 | 0 | for (i = 0; i < size_nd; i++) |
94 | 0 | modified_cmd.size.dim[i] = (modified_cmd.size.dim[i] - 1) * ccv_max(cmd.convolution.dilation[i], 1) + 1; |
95 | 0 | ccv_nnc_hint_tensor_forward(modified_cmd, inputs[0], hint, outputs); |
96 | 0 | assert(inputs[0].format == outputs[0].format); |
97 | 0 | const int nd = ccv_nnc_tensor_nd(inputs[0].dim); |
98 | 0 | assert(nd == size_nd + 1 || nd == size_nd + 2); |
99 | 0 | int hw = ccv_nnc_tensor_hw(inputs[0], nd, size_nd); |
100 | 0 | assert(hw >= 0); |
101 | 0 | for (i = 0; i < size_nd; i++) |
102 | 0 | { |
103 | 0 | const int stride = ccv_max(1, hint.stride.dim[i]); |
104 | 0 | const int size_dim = (modified_cmd.size.dim[i] - 1) * ccv_max(cmd.convolution_transpose.dilation[i], 1) + 1; |
105 | 0 | outputs[0].dim[i + hw] = (inputs[0].dim[i + hw] - 1) * stride + size_dim - hint.border.begin[i] - hint.border.end[i] + cmd.convolution_transpose.output_padding; |
106 | 0 | } |
107 | 0 | } |
108 | | |
109 | | REGISTER_COMMAND(CCV_NNC_CONVOLUTION_TRANSPOSE_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
110 | | FIND_BACKEND(ccv_nnc_conv_transpose_cpu_ref.c, gpu/ccv_nnc_conv_transpose_gpu_cudnn.cu, mps/ccv_nnc_conv_transpose_mps.m) |
111 | 1 | { |
112 | 1 | registry->bitmask = _ccv_nnc_conv_forw_bitmask; |
113 | 1 | registry->tensor_auto = _ccv_nnc_conv_transpose_tensor_auto_forw; |
114 | 1 | } |
115 | | |
116 | | REGISTER_COMMAND(CCV_NNC_CONVOLUTION_TRANSPOSE_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
117 | | FIND_BACKEND(ccv_nnc_conv_transpose_cpu_ref.c, gpu/ccv_nnc_conv_transpose_gpu_cudnn.cu, mps/ccv_nnc_conv_transpose_mps.m) |
118 | 1 | { |
119 | 1 | registry->bitmask = _ccv_nnc_conv_back_bitmask; |
120 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs; |
121 | 1 | } |
122 | | |
123 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_CONVOLUTION_TRANSPOSE_FORWARD) |
124 | | #define CMD_CONVOLUTION_TRANSPOSE_FORWARD(_groups, _count, _output_padding, ...) ccv_nnc_cmd(CCV_NNC_CONVOLUTION_TRANSPOSE_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={__VA_ARGS__}},.convolution_transpose={.count=_count,.groups=_groups,.output_padding=_output_padding}}), 0) |
125 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_CONVOLUTION_TRANSPOSE_BACKWARD) |
126 | | #define CMD_CONVOLUTION_TRANSPOSE_BACKWARD(_groups, _count, _output_padding, ...) ccv_nnc_cmd(CCV_NNC_CONVOLUTION_TRANSPOSE_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={__VA_ARGS__}},.convolution_transpose={.count=_count,.groups=_groups,.output_padding=_output_padding}}), 0) |