/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/rnn/ccv_nnc_lstm.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "nnc/ccv_nnc.h" |
3 | | #include "nnc/ccv_nnc_easy.h" |
4 | | #include "nnc/ccv_nnc_internal.h" |
5 | | |
6 | | static int _ccv_nnc_lstm_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
7 | 4 | { |
8 | | // input: x, [xs], [hx], [cx], w. |
9 | | // output: y, [hy], [cy], r |
10 | 4 | if (input_size == 5 && (input_bitmasks[0] & 19u) == ((1u << 0) | (1u << 1) | (0u << 2) | (0u << 3) | (1u << 4)) && (output_bitmasks[0] & 0x9u) == ((1u << 0) | (0u << 1) | (0u << 2) | (1u << 3))) |
11 | 3 | return 1; |
12 | 1 | return 0; |
13 | 4 | } |
14 | | |
15 | | static int _ccv_nnc_lstm_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
16 | 1 | { |
17 | | // input: dy, [dhy], [dcy], [dr] |
18 | | // output: dx, [dxs], [dhx], [dcx]. |
19 | 1 | if ((input_bitmasks[0] & 4929u) == ((1u << 0) | (0u << 1) | (0u << 2) | (0u << 3) | (0u << 4) | (0u << 5) | (1u << 6) | (0u << 7) | (1u << 8) | (1u << 9) | (0u << 10) | (0u << 11) | (1u << 12)) && (output_bitmasks[0] & 13u) == ((1u << 0) | (0u << 1) | (1u << 2) | (1u << 3) | (0u << 4))0 ) |
20 | 0 | return 1; |
21 | | // Output dx, [dxs], [dhx], [dcx] and dw. |
22 | 1 | if ((input_bitmasks[0] & 4945u) == ((1u << 0) | (0u << 1) | (0u << 2) | (0u << 3) | (1u << 4) | (0u << 5) | (1u << 6) | (0u << 7) | (1u << 8) | (1u << 9) | (0u << 10) | (0u << 11) | (1u << 12)) && (output_bitmasks[0] & 29u) == ((1u << 0) | (0u << 1) | (1u << 2) | (1u << 3) | 1u << 4)0 ) |
23 | 0 | return 1; |
24 | | // Output dw (this needs to be invoked after dx, dhx, dcx computed, thus, functionally the same as above). |
25 | 1 | if ((input_bitmasks[0] & 4945u) == ((1u << 0) | (0u << 1) | (0u << 2) | (0u << 3) | (1u << 4) | (0u << 5) | (1u << 6) | (0u << 7) | (1u << 8) | (1u << 9) | (0u << 10) | (0u << 11) | (1u << 12)) && (output_bitmasks[0] & 16u) == ((0u << 0) | (0u << 1) | (0u << 2) | (0u << 3) | 1u << 4)0 ) |
26 | 0 | return 1; |
27 | 1 | return 0; |
28 | 1 | } |
29 | | |
30 | | typedef size_t(*ccv_nnc_lstm_reserve_space_size_f)(const ccv_nnc_cmd_t cmd, const int datatype, const int feature_size, const int batch_count, const int max_seq_count); |
31 | | |
32 | | static void _ccv_nnc_lstm_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* outputs, const int output_size) |
33 | 11 | { |
34 | 11 | assert(output_size >= 1 && input_size >= 5); |
35 | 11 | const int x_nd = ccv_nnc_tensor_nd(inputs[0].dim); |
36 | 11 | assert(x_nd == 3 || x_nd == 2); |
37 | 11 | const int batch_count = x_nd == 3 ? (cmd.rnn.batch_first ? inputs[0].dim[0] : inputs[0].dim[1]0 ) : 10 ; |
38 | 11 | assert(batch_count > 0); |
39 | 11 | const int max_seq_count = x_nd == 3 ? (cmd.rnn.batch_first ? inputs[0].dim[1] : inputs[0].dim[0]0 ) : inputs[0].dim[0]0 ; |
40 | 11 | const int feature_count = inputs[0].dim[x_nd - 1]; |
41 | 11 | const int proj_size = cmd.rnn.proj_size == 0 ? cmd.rnn.hidden_size : cmd.rnn.proj_size0 ; |
42 | 11 | const int output_feature_count = proj_size * (!!cmd.rnn.bidirectional + 1); |
43 | 11 | memset(outputs[0].dim, 0, sizeof(outputs[0].dim)); |
44 | 11 | outputs[0].dim[0] = cmd.rnn.batch_first ? batch_count : max_seq_count0 ; |
45 | 11 | outputs[0].dim[1] = cmd.rnn.batch_first ? max_seq_count : batch_count0 ; |
46 | 11 | outputs[0].dim[2] = output_feature_count; |
47 | 11 | outputs[0].type = inputs[0].type; |
48 | 11 | outputs[0].format = inputs[0].format; |
49 | 11 | outputs[0].datatype = inputs[0].datatype; |
50 | 11 | if (output_size >= 4) |
51 | 11 | { |
52 | 11 | ccv_nnc_cmd_t lstm = ccv_nnc_cmd(CCV_NNC_LSTM_FORWARD, 0, cmd, 0); |
53 | 11 | const int backend = ccv_nnc_cmd_find_backend(lstm, CCV_TENSOR_GET_MEMORY(inputs[0].type), inputs[0].format, inputs[0].datatype); |
54 | 11 | lstm.backend = backend; |
55 | 11 | ccv_nnc_lstm_reserve_space_size_f reserve_space_size = (ccv_nnc_lstm_reserve_space_size_f)ccv_nnc_cmd_aux(lstm); |
56 | 11 | size_t total_size = reserve_space_size(lstm, inputs[0].datatype, feature_count, batch_count, max_seq_count); |
57 | 11 | memset(outputs[3].dim, 0, sizeof(outputs[3].dim)); |
58 | 11 | outputs[3].dim[0] = (int)((total_size + cmd.rnn.hidden_size - 1) / cmd.rnn.hidden_size); |
59 | 11 | outputs[3].dim[1] = cmd.rnn.hidden_size; |
60 | 11 | outputs[3].type = inputs[0].type; |
61 | 11 | outputs[3].format = inputs[0].format; |
62 | 11 | outputs[3].datatype = inputs[0].datatype; |
63 | 11 | } |
64 | 11 | int i; |
65 | 11 | if (input_size >= 4 && output_size >= 3) |
66 | 33 | for (i = 0; 11 i < 2; i++22 ) |
67 | 22 | outputs[i + 1] = inputs[i + 2]; |
68 | 11 | } |
69 | | |
70 | | REGISTER_COMMAND(CCV_NNC_LSTM_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
71 | | FIND_BACKEND(gpu/ccv_nnc_lstm_gpu_cudnn.cu) |
72 | 1 | { |
73 | 1 | registry->bitmask = _ccv_nnc_lstm_forw_bitmask; |
74 | 1 | registry->tensor_auto = _ccv_nnc_lstm_tensor_auto_forw; |
75 | 1 | } |
76 | | |
77 | | REGISTER_COMMAND(CCV_NNC_LSTM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
78 | | FIND_BACKEND(gpu/ccv_nnc_lstm_gpu_cudnn.cu) |
79 | 1 | { |
80 | 1 | registry->bitmask = _ccv_nnc_lstm_back_bitmask; |
81 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs; |
82 | 1 | } |
83 | | |
84 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_LSTM_FORWARD) |
85 | | #define CMD_LSTM_FORWARD(_hidden_size, _proj_size, _num_layers, _bias, _batch_first, _bidirectional, _dropout, _is_test) ccv_nnc_cmd(CCV_NNC_LSTM_FORWARD, 0, ((ccv_nnc_cmd_param_t){.rnn={.hidden_size=_hidden_size,.proj_size=_proj_size,.num_layers=_num_layers,.bias=_bias,.batch_first=_batch_first,.bidirectional=_bidirectional,.dropout=_dropout,.is_test=_is_test}}), 0) |
86 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_LSTM_BACKWARD) |
87 | | #define CMD_LSTM_BACKWARD(_hidden_size, _proj_size, _num_layers, _bias, _batch_first, _bidirectional, _dropout, _is_test) ccv_nnc_cmd(CCV_NNC_LSTM_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.rnn={.hidden_size=_hidden_size,.proj_size=_proj_size,.num_layers=_num_layers,.bias=_bias,.batch_first=_batch_first,.bidirectional=_bidirectional,.dropout=_dropout,.is_test=_is_test}}), 0) |