Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/rnn/ccv_nnc_lstm.c
Line
Count
Source
1
#include "ccv.h"
2
#include "nnc/ccv_nnc.h"
3
#include "nnc/ccv_nnc_easy.h"
4
#include "nnc/ccv_nnc_internal.h"
5
6
static int _ccv_nnc_lstm_forw_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
7
4
{
8
  // input: x, [xs], [hx], [cx], w.
9
  // output: y, [hy], [cy], r
10
4
  if (input_size == 5 && (input_bitmasks[0] & 19u) == ((1u << 0) | (1u << 1) | (0u << 2) | (0u << 3) | (1u << 4)) && (output_bitmasks[0] & 0x9u) == ((1u << 0) | (0u << 1) | (0u << 2) | (1u << 3)))
11
3
    return 1;
12
1
  return 0;
13
4
}
14
15
static int _ccv_nnc_lstm_back_bitmask(const ccv_nnc_cmd_param_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size)
16
1
{
17
  // input: dy, [dhy], [dcy], [dr]
18
  // output: dx, [dxs], [dhx], [dcx].
19
1
  if ((input_bitmasks[0] & 4929u) == ((1u << 0) | (0u << 1) | (0u << 2) | (0u << 3) | (0u << 4) | (0u << 5) | (1u << 6) | (0u << 7) | (1u << 8) | (1u << 9) | (0u << 10) | (0u << 11) | (1u << 12)) && 
(output_bitmasks[0] & 13u) == ((1u << 0) | (0u << 1) | (1u << 2) | (1u << 3) | (0u << 4))0
)
20
0
    return 1;
21
  // Output dx, [dxs], [dhx], [dcx] and dw.
22
1
  if ((input_bitmasks[0] & 4945u) == ((1u << 0) | (0u << 1) | (0u << 2) | (0u << 3) | (1u << 4) | (0u << 5) | (1u << 6) | (0u << 7) | (1u << 8) | (1u << 9) | (0u << 10) | (0u << 11) | (1u << 12)) && 
(output_bitmasks[0] & 29u) == ((1u << 0) | (0u << 1) | (1u << 2) | (1u << 3) | 1u << 4)0
)
23
0
    return 1;
24
  // Output dw (this needs to be invoked after dx, dhx, dcx computed, thus, functionally the same as above).
25
1
  if ((input_bitmasks[0] & 4945u) == ((1u << 0) | (0u << 1) | (0u << 2) | (0u << 3) | (1u << 4) | (0u << 5) | (1u << 6) | (0u << 7) | (1u << 8) | (1u << 9) | (0u << 10) | (0u << 11) | (1u << 12)) && 
(output_bitmasks[0] & 16u) == ((0u << 0) | (0u << 1) | (0u << 2) | (0u << 3) | 1u << 4)0
)
26
0
    return 1;
27
1
  return 0;
28
1
}
29
30
typedef size_t(*ccv_nnc_lstm_reserve_space_size_f)(const ccv_nnc_cmd_t cmd, const int datatype, const int feature_size, const int batch_count, const int max_seq_count);
31
32
static void _ccv_nnc_lstm_tensor_auto_forw(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t* inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* outputs, const int output_size)
33
11
{
34
11
  assert(output_size >= 1 && input_size >= 5);
35
11
  const int x_nd = ccv_nnc_tensor_nd(inputs[0].dim);
36
11
  assert(x_nd == 3 || x_nd == 2);
37
11
  const int batch_count = x_nd == 3 ? (cmd.rnn.batch_first ? inputs[0].dim[0] : 
inputs[0].dim[1]0
) :
10
;
38
11
  assert(batch_count > 0);
39
11
  const int max_seq_count = x_nd == 3 ? (cmd.rnn.batch_first ? inputs[0].dim[1] : 
inputs[0].dim[0]0
) :
inputs[0].dim[0]0
;
40
11
  const int feature_count = inputs[0].dim[x_nd - 1];
41
11
  const int proj_size = cmd.rnn.proj_size == 0 ? cmd.rnn.hidden_size : 
cmd.rnn.proj_size0
;
42
11
  const int output_feature_count = proj_size * (!!cmd.rnn.bidirectional + 1);
43
11
  memset(outputs[0].dim, 0, sizeof(outputs[0].dim));
44
11
  outputs[0].dim[0] = cmd.rnn.batch_first ? batch_count : 
max_seq_count0
;
45
11
  outputs[0].dim[1] = cmd.rnn.batch_first ? max_seq_count : 
batch_count0
;
46
11
  outputs[0].dim[2] = output_feature_count;
47
11
  outputs[0].type = inputs[0].type;
48
11
  outputs[0].format = inputs[0].format;
49
11
  outputs[0].datatype = inputs[0].datatype;
50
11
  if (output_size >= 4)
51
11
  {
52
11
    ccv_nnc_cmd_t lstm = ccv_nnc_cmd(CCV_NNC_LSTM_FORWARD, 0, cmd, 0);
53
11
    const int backend = ccv_nnc_cmd_find_backend(lstm, CCV_TENSOR_GET_MEMORY(inputs[0].type), inputs[0].format, inputs[0].datatype);
54
11
    lstm.backend = backend;
55
11
    ccv_nnc_lstm_reserve_space_size_f reserve_space_size = (ccv_nnc_lstm_reserve_space_size_f)ccv_nnc_cmd_aux(lstm);
56
11
    size_t total_size = reserve_space_size(lstm, inputs[0].datatype, feature_count, batch_count, max_seq_count);
57
11
    memset(outputs[3].dim, 0, sizeof(outputs[3].dim));
58
11
    outputs[3].dim[0] = (int)((total_size + cmd.rnn.hidden_size - 1) / cmd.rnn.hidden_size);
59
11
    outputs[3].dim[1] = cmd.rnn.hidden_size;
60
11
    outputs[3].type = inputs[0].type;
61
11
    outputs[3].format = inputs[0].format;
62
11
    outputs[3].datatype = inputs[0].datatype;
63
11
  }
64
11
  int i;
65
11
  if (input_size >= 4 && output_size >= 3)
66
33
    
for (i = 0; 11
i < 2;
i++22
)
67
22
      outputs[i + 1] = inputs[i + 2];
68
11
}
69
70
REGISTER_COMMAND(CCV_NNC_LSTM_FORWARD)(ccv_nnc_cmd_registry_t* const registry)
71
  FIND_BACKEND(gpu/ccv_nnc_lstm_gpu_cudnn.cu)
72
1
{
73
1
  registry->bitmask = _ccv_nnc_lstm_forw_bitmask;
74
1
  registry->tensor_auto = _ccv_nnc_lstm_tensor_auto_forw;
75
1
}
76
77
REGISTER_COMMAND(CCV_NNC_LSTM_BACKWARD)(ccv_nnc_cmd_registry_t* const registry)
78
  FIND_BACKEND(gpu/ccv_nnc_lstm_gpu_cudnn.cu)
79
1
{
80
1
  registry->bitmask = _ccv_nnc_lstm_back_bitmask;
81
1
  registry->tensor_auto = ccv_nnc_hint_tensor_auto_backward_from_inputs;
82
1
}
83
84
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_LSTM_FORWARD)
85
#define CMD_LSTM_FORWARD(_hidden_size, _proj_size, _num_layers, _bias, _batch_first, _bidirectional, _dropout, _is_test) ccv_nnc_cmd(CCV_NNC_LSTM_FORWARD, 0, ((ccv_nnc_cmd_param_t){.rnn={.hidden_size=_hidden_size,.proj_size=_proj_size,.num_layers=_num_layers,.bias=_bias,.batch_first=_batch_first,.bidirectional=_bidirectional,.dropout=_dropout,.is_test=_is_test}}), 0)
86
//@REGISTER_EASY_COMMAND_MACRO(CCV_NNC_LSTM_BACKWARD)
87
#define CMD_LSTM_BACKWARD(_hidden_size, _proj_size, _num_layers, _bias, _batch_first, _bidirectional, _dropout, _is_test) ccv_nnc_cmd(CCV_NNC_LSTM_BACKWARD, 0, ((ccv_nnc_cmd_param_t){.rnn={.hidden_size=_hidden_size,.proj_size=_proj_size,.num_layers=_num_layers,.bias=_bias,.batch_first=_batch_first,.bidirectional=_bidirectional,.dropout=_dropout,.is_test=_is_test}}), 0)