/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/compression/ccv_nnc_compression.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "ccv.h" |
2 | | #include "nnc/ccv_nnc.h" |
3 | | #include "nnc/ccv_nnc_internal.h" |
4 | | |
5 | | static int _ccv_nnc_lssc_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
6 | 0 | { |
7 | 0 | int i, j; |
8 | 0 | int input_flag = 0; |
9 | 0 | int input_bitcount = 0; |
10 | 0 | for (i = 0; i < input_bitmask_size; i++) |
11 | 0 | { |
12 | 0 | for (j = 0; j < 64; j++) |
13 | 0 | if (input_bitmasks[i] & (uint64_t)1 << j) |
14 | 0 | { |
15 | 0 | if (input_flag) |
16 | 0 | return 0; |
17 | 0 | } else |
18 | 0 | break; |
19 | 0 | input_bitcount += j; |
20 | 0 | if (j < 64) |
21 | 0 | input_flag = 1; |
22 | | // Always like 1111100000, no 1110010101 |
23 | 0 | for (; j < 64; j++) |
24 | 0 | if (input_bitmasks[i] & (uint64_t)1 << j) |
25 | 0 | return 0; |
26 | 0 | } |
27 | 0 | int output_flag = 0; |
28 | 0 | int output_bitcount = 0; |
29 | 0 | for (i = 0; i < output_bitmask_size; i++) |
30 | 0 | { |
31 | 0 | for (j = 0; j < 64; j++) |
32 | 0 | if (output_bitmasks[i] & (uint64_t)1 << j) |
33 | 0 | { |
34 | 0 | if (output_flag) |
35 | 0 | return 0; |
36 | 0 | } else |
37 | 0 | break; |
38 | 0 | output_bitcount += j; |
39 | 0 | if (j < 64) |
40 | 0 | output_flag = 1; |
41 | 0 | for (; j < 64; j++) |
42 | 0 | if (output_bitmasks[i] & (uint64_t)1 << j) |
43 | 0 | return 0; |
44 | 0 | } |
45 | 0 | return output_bitcount == input_bitcount && input_size == output_size && input_size == input_bitcount; |
46 | 0 | } |
47 | | |
48 | | static int _ccv_nnc_lssc_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
49 | 0 | { |
50 | 0 | int i, j; |
51 | 0 | int input_flag = 0; |
52 | 0 | int input_bitcount = 0; |
53 | 0 | for (i = 0; i < input_bitmask_size; i++) |
54 | 0 | { |
55 | 0 | for (j = 0; j < 64; j++) |
56 | 0 | if (input_bitmasks[i] & (uint64_t)1 << j) |
57 | 0 | { |
58 | 0 | if (input_flag) |
59 | 0 | return 0; |
60 | 0 | } else |
61 | 0 | break; |
62 | 0 | input_bitcount += j; |
63 | 0 | if (j < 64) |
64 | 0 | input_flag = 1; |
65 | | // Always like 1111100000, no 1110010101 |
66 | 0 | for (; j < 64; j++) |
67 | 0 | if (input_bitmasks[i] & (uint64_t)1 << j) |
68 | 0 | return 0; |
69 | 0 | } |
70 | 0 | int output_flag = 0; |
71 | 0 | int output_bitcount = 0; |
72 | 0 | for (i = 0; i < output_bitmask_size; i++) |
73 | 0 | { |
74 | 0 | for (j = 0; j < 64; j++) |
75 | 0 | if (output_bitmasks[i] & (uint64_t)1 << j) |
76 | 0 | { |
77 | 0 | if (output_flag) |
78 | 0 | return 0; |
79 | 0 | } else |
80 | 0 | break; |
81 | 0 | output_bitcount += j; |
82 | 0 | if (j < 64) |
83 | 0 | output_flag = 1; |
84 | 0 | for (; j < 64; j++) |
85 | 0 | if (output_bitmasks[i] & (uint64_t)1 << j) |
86 | 0 | return 0; |
87 | 0 | } |
88 | 0 | return output_bitcount <= input_bitcount && output_bitcount == output_size; |
89 | 0 | } |
90 | | |
91 | | static void _ccv_nnc_lssc_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* outputs, const int output_size) |
92 | 10 | { |
93 | 10 | int i, j; |
94 | 10 | assert(output_size <= input_size); |
95 | 20 | for (i = 0; 10 i < output_size; i++10 ) |
96 | 10 | { |
97 | 10 | assert(inputs[i].datatype == CCV_16F); |
98 | 10 | const int nd = ccv_nnc_tensor_nd(inputs[i].dim); |
99 | 10 | const int hw = ccv_nnc_tensor_hw(inputs[i], nd); |
100 | 10 | outputs[i] = inputs[i]; |
101 | 20 | for (j = 0; j < CCV_NNC_MAX_DIM - 1; j++10 ) |
102 | 10 | outputs[i].dim[j + hw] = (inputs[i].dim[j + hw] + 3) / 4; |
103 | 10 | outputs[i].dim[CCV_NNC_MAX_DIM - 1 + hw] = (inputs[i].dim[CCV_NNC_MAX_DIM - 1 + hw] + 3) / 4 * 4; |
104 | 10 | } |
105 | 10 | } |
106 | | |
107 | | static void _ccv_nnc_lssc_tensor_auto_back(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* outputs, const int output_size) |
108 | 0 | { |
109 | 0 | int i, j; |
110 | 0 | assert(output_size <= input_size); |
111 | 0 | for (i = 0; i < output_size; i++) |
112 | 0 | { |
113 | 0 | assert(inputs[i].datatype == CCV_16F); |
114 | 0 | const int nd = ccv_nnc_tensor_nd(inputs[i].dim); |
115 | 0 | const int hw = ccv_nnc_tensor_hw(inputs[i], nd); |
116 | 0 | outputs[i] = inputs[i]; |
117 | 0 | for (j = 0; j < CCV_NNC_MAX_DIM - 1; j++) |
118 | 0 | outputs[i].dim[j + hw] = inputs[i].dim[j + hw] * 4; |
119 | 0 | assert(outputs[i].dim[CCV_NNC_MAX_DIM - 1 + hw] % 4 == 0); |
120 | 0 | } |
121 | 0 | } |
122 | | |
123 | | REGISTER_COMMAND(CCV_NNC_COMPRESSION_LSSC_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
124 | | FIND_BACKEND(ccv_nnc_lssc_cpu_ref.c, gpu/ccv_nnc_lssc_gpu_ref.cu) |
125 | 1 | { |
126 | 1 | registry->bitmask = _ccv_nnc_lssc_forw_bitmask; |
127 | 1 | registry->tensor_auto = _ccv_nnc_lssc_tensor_auto_forw; |
128 | 1 | } |
129 | | |
130 | | REGISTER_COMMAND(CCV_NNC_COMPRESSION_LSSC_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
131 | | FIND_BACKEND(ccv_nnc_lssc_cpu_ref.c, gpu/ccv_nnc_lssc_gpu_ref.cu) |
132 | 1 | { |
133 | 1 | registry->bitmask = _ccv_nnc_lssc_back_bitmask; |
134 | 1 | registry->tensor_auto = _ccv_nnc_lssc_tensor_auto_back; |
135 | 1 | } |
136 | | |
137 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_COMPRESSION_LSSC_FORWARD) |
138 | | #define CMD_COMPRESSION_LSSC_FORWARD() ccv_nnc_cmd(CCV_NNC_COMPRESSION_LSSC_FORWARD, 0, ccv_nnc_cmd_auto, 0) |
139 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_COMPRESSION_LSSC_BACKWARD) |
140 | | #define CMD_COMPRESSION_LSSC_BACKWARD() ccv_nnc_cmd(CCV_NNC_COMPRESSION_LSSC_BACKWARD, 0, ccv_nnc_cmd_auto, 0) |