/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/dropout/ccv_nnc_dropout.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "nnc/ccv_nnc.h" |
3 | | #include "nnc/ccv_nnc_internal.h" |
4 | | |
5 | | static int _ccv_nnc_dropout_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
6 | 46 | { |
7 | | // 1 input (x) |
8 | | // 1 output (y, mask) |
9 | 46 | if (input_bitmasks[0] == 1u && output_bitmasks[0] == 3u) |
10 | 34 | return 1; |
11 | 12 | return 0; |
12 | 46 | } |
13 | | |
14 | | static int _ccv_nnc_xy_inplace(const ccv_nnc_cmd_param_t cmd, const int input_idx, const int input_size, const int output_idx, const int output_size) |
15 | 104 | { |
16 | 104 | if (input_idx == 0 && output_idx == 0) |
17 | 52 | return 1; |
18 | 52 | return 0; |
19 | 104 | } |
20 | | |
21 | | static void _ccv_nnc_dropout_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size) |
22 | 552 | { |
23 | 552 | assert(input_size == 1); |
24 | 552 | assert(output_size <= 2); |
25 | 552 | outputs[0] = inputs[0]; |
26 | 552 | if (output_size == 1) |
27 | 0 | return; |
28 | 552 | outputs[1] = inputs[0]; |
29 | 552 | memset(outputs[1].dim, 0, sizeof(outputs[1].dim)); |
30 | 552 | if (!cmd.dropout.entirety) |
31 | 552 | { |
32 | 552 | int i; |
33 | | // Reset to 0. |
34 | 552 | const int inc = (int)CCV_GET_DATA_TYPE_SIZE(inputs[0].datatype); |
35 | | // Align to 128-bytes boundary, for each computed result. |
36 | 552 | int line = ((inputs[0].dim[0] + 127) >> 7); |
37 | 1.45k | for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && inputs[0].dim[i] > 0; i++906 ) |
38 | 906 | line *= inputs[0].dim[i]; |
39 | 552 | assert((128 % inc) == 0); |
40 | 552 | outputs[1].dim[0] = 128 / inc; |
41 | 552 | outputs[1].dim[1] = line; // Aligned to 128 bytes, reserved space. |
42 | 552 | } else { |
43 | 0 | outputs[1].dim[0] = 1; |
44 | 0 | } |
45 | 552 | } |
46 | | |
47 | | static int _ccv_nnc_dropout_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size) |
48 | 108 | { |
49 | | // 0b10001 |
50 | | // Inputs (dy, 0, 0, 0, mask) |
51 | | // Output the propagated error |
52 | 108 | if ((input_bitmasks[0] & 17u) == 17u && (output_bitmasks[0] & 1u) == 1u56 ) |
53 | 56 | return 1; |
54 | 52 | return 0; |
55 | 108 | } |
56 | | |
57 | | REGISTER_COMMAND(CCV_NNC_DROPOUT_FORWARD)(ccv_nnc_cmd_registry_t* const registry) |
58 | | FIND_BACKEND(ccv_nnc_dropout_cpu_ref.c, gpu/ccv_nnc_dropout_gpu_cudnn.cu) |
59 | 1 | { |
60 | 1 | registry->bitmask = _ccv_nnc_dropout_forw_bitmask; |
61 | 1 | registry->tensor_auto = _ccv_nnc_dropout_tensor_auto_forw; |
62 | 1 | registry->allow_inplace = _ccv_nnc_xy_inplace; |
63 | 1 | } |
64 | | |
65 | | REGISTER_COMMAND(CCV_NNC_DROPOUT_BACKWARD)(ccv_nnc_cmd_registry_t* const registry) |
66 | | FIND_BACKEND(ccv_nnc_dropout_cpu_ref.c, gpu/ccv_nnc_dropout_gpu_cudnn.cu) |
67 | 1 | { |
68 | 1 | registry->bitmask = _ccv_nnc_dropout_back_bitmask; |
69 | 1 | registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_gradient; |
70 | 1 | } |
71 | | |
72 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_DROPOUT_FORWARD) |
73 | | #define CMD_DROPOUT_FORWARD_X_F(...) ("This should not be used, you should have either 1 parameter or 2 parameters for CMD_DROPOUT_FORWARD") |
74 | | #define CMD_DROPOUT_FORWARD_X_1(_p) ccv_nnc_cmd(CCV_NNC_DROPOUT_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.dropout={.p=_p,.entirety=0}}), 0) |
75 | | #define CMD_DROPOUT_FORWARD_X_2(_p, _entirety) ccv_nnc_cmd(CCV_NNC_DROPOUT_FORWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.dropout={.p=_p,.entirety=_entirety}}), 0) |
76 | | #define CMD_DROPOUT_FORWARD_X_SEL(_0, _1, _2, _FX, ...) _FX |
77 | | #define CMD_DROPOUT_FORWARD(...) CMD_DROPOUT_FORWARD_X_SEL(CMD_DROPOUT_FORWARD_X_F, ##__VA_ARGS__, CMD_DROPOUT_FORWARD_X_2, CMD_DROPOUT_FORWARD_X_1, CMD_DROPOUT_FORWARD_X_F)(__VA_ARGS__) |
78 | | //@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_DROPOUT_BACKWARD) |
79 | | #define CMD_DROPOUT_BACKWARD_X_F(...) ("This should not be used, you should have either 1 parameter or 2 parameters for CMD_DROPOUT_FORWARD") |
80 | | #define CMD_DROPOUT_BACKWARD_X_1(_p) ccv_nnc_cmd(CCV_NNC_DROPOUT_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.dropout={.p=_p,.entirety=0}}), 0) |
81 | | #define CMD_DROPOUT_BACKWARD_X_2(_p, _entirety) ccv_nnc_cmd(CCV_NNC_DROPOUT_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.size={.dim={1,1,1}},.dropout={.p=_p,.entirety=_entirety}}), 0) |
82 | | #define CMD_DROPOUT_BACKWARD_X_SEL(_0, _1, _2, _FX, ...) _FX |
83 | | #define CMD_DROPOUT_BACKWARD(...) CMD_DROPOUT_BACKWARD_X_SEL(CMD_DROPOUT_BACKWARD_X_F, ##__VA_ARGS__, CMD_DROPOUT_BACKWARD_X_2, CMD_DROPOUT_BACKWARD_X_1, CMD_DROPOUT_BACKWARD_X_F)(__VA_ARGS__) |