Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/lstm.tests.c
Line
Count
Source (jump to first uncovered line)
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
static int weight_dim(int bidirectional, int num_layers, int input_size, int hidden_size, int proj_size, int bias)
15
8
{
16
8
  const int D = !!bidirectional + 1;
17
8
  if (hidden_size == proj_size)
18
6
    return (num_layers * (bias ? 8 : 
00
) + (num_layers - 1) * (hidden_size * 4 * D + hidden_size * 4) + input_size * 4 + hidden_size * 4) * D;
19
2
  else
20
2
    return (num_layers * (bias ? 8 : 
00
) + (num_layers - 1) * (proj_size * 4 * D + proj_size * 4) + (proj_size * 4 + input_size * 4) + num_layers * proj_size) * D;
21
8
}
22
23
static int r_dim(int bidirectional, int dropout, int batch_count, int num_layers, int max_seq_count, int hidden_size, int proj_size)
24
8
{
25
8
  const int D = !!bidirectional + 1;
26
8
  if (hidden_size == proj_size)
27
    // 5: i, f, g, o, tanh(c)
28
    // 2: c, h
29
6
    return D * batch_count * ((5 + !!dropout) * num_layers * max_seq_count + 2 * num_layers * (max_seq_count - 1));
30
2
  else
31
    // 6: i, f, g, o, tanh(c), h
32
    // 1: c, h_proj
33
2
    return D * batch_count * ((6 + !!dropout) * num_layers * max_seq_count + 2 * num_layers * (max_seq_count - 1));
34
8
}
35
36
TEST_CASE("LSTM forward")
37
1
{
38
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LSTM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
39
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
40
1
  ccv_nnc_tensor_t* const hx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
41
1
  ccv_nnc_tensor_t* const cx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
42
1
  const int weight_d = weight_dim(0, 6, 24, 24, 24, 1);
43
1
  ccv_nnc_tensor_t* const w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, weight_d, 24), 0);
44
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
45
1
  ccv_nnc_tensor_t* const hy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
46
1
  ccv_nnc_tensor_t* const cy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
47
1
  const int r_d = r_dim(0, 0, 1, 6, 5, 24, 24);
48
1
  ccv_nnc_tensor_t* const r = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, r_d, 24), 0);
49
1
  dsfmt_t dsfmt;
50
1
  dsfmt_init_gen_rand(&dsfmt, 0);
51
1
  int i;
52
121
  for (i = 0; i < 5 * 24; 
i++120
)
53
120
    x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
54
37
  for (i = 0; i < 3 * 12; 
i++36
)
55
36
    hx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
56
73
  for (i = 0; i < 3 * 24; 
i++72
)
57
72
    cx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
58
28.8k
  for (i = 0; i < 24 * weight_d; 
i++28.8k
)
59
28.8k
    w->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
60
1
  ccv_nnc_tensor_t* const gx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
61
1
  ccv_nnc_tensor_t* const ghx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
62
1
  ccv_nnc_tensor_t* const gcx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
63
1
  ccv_nnc_tensor_t* const gw = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, weight_d, 24), 0);
64
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x, hx, cx, w), TENSOR_LIST(gx, ghx, gcx, gw), 0);
65
1
  ccv_nnc_tensor_t* const gy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
66
1
  ccv_nnc_tensor_t* const ghy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
67
1
  ccv_nnc_tensor_t* const gcy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
68
1
  ccv_nnc_tensor_t* const gr = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, r_d, 24), 0);
69
1
  ccv_nnc_cmd_exec(CMD_LSTM_FORWARD(24, 0, 6, 1, 0, 0, 0, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gx, 0, ghx, gcx, gw), TENSOR_LIST(gy, ghy, gcy, gr), 0);
70
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gy, ghy, gcy, gr), TENSOR_LIST(y, hy, cy, r), 0);
71
1
  ccv_nnc_tensor_free(gx);
72
1
  ccv_nnc_tensor_free(ghx);
73
1
  ccv_nnc_tensor_free(gcx);
74
1
  ccv_nnc_tensor_free(gw);
75
1
  ccv_nnc_tensor_free(gy);
76
1
  ccv_nnc_tensor_free(ghy);
77
1
  ccv_nnc_tensor_free(gcy);
78
1
  ccv_nnc_tensor_free(gr);
79
1
  ccv_nnc_tensor_free(x);
80
1
  ccv_nnc_tensor_free(hx);
81
1
  ccv_nnc_tensor_free(cx);
82
1
  ccv_nnc_tensor_free(w);
83
1
  ccv_nnc_tensor_free(y);
84
1
  ccv_nnc_tensor_free(hy);
85
1
  ccv_nnc_tensor_free(cy);
86
1
  ccv_nnc_tensor_free(r);
87
1
}
88
89
TEST_CASE("LSTM forward without hx, cx, hy, cy")
90
1
{
91
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LSTM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
92
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
93
1
  const int weight_d = weight_dim(0, 6, 24, 24, 24, 1);
94
1
  ccv_nnc_tensor_t* const w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, weight_d, 24), 0);
95
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
96
1
  const int r_d = r_dim(0, 0, 1, 6, 5, 24, 24);
97
1
  ccv_nnc_tensor_t* const r = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, r_d, 24), 0);
98
1
  dsfmt_t dsfmt;
99
1
  dsfmt_init_gen_rand(&dsfmt, 0);
100
1
  int i;
101
121
  for (i = 0; i < 5 * 24; 
i++120
)
102
120
    x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
103
28.8k
  for (i = 0; i < 24 * weight_d; 
i++28.8k
)
104
28.8k
    w->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
105
1
  ccv_nnc_tensor_t* const gx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
106
1
  ccv_nnc_tensor_t* const gw = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, weight_d, 24), 0);
107
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x, w), TENSOR_LIST(gx, gw), 0);
108
1
  ccv_nnc_tensor_t* const gy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
109
1
  ccv_nnc_tensor_t* const gr = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, r_d, 24), 0);
110
1
  ccv_nnc_cmd_exec(CMD_LSTM_FORWARD(24, 0, 6, 1, 0, 0, 0, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gx, 0, 0, 0, gw), TENSOR_LIST(gy, 0, 0, gr), 0);
111
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gy, gr), TENSOR_LIST(y, r), 0);
112
1
  ccv_nnc_tensor_free(gx);
113
1
  ccv_nnc_tensor_free(gw);
114
1
  ccv_nnc_tensor_free(gy);
115
1
  ccv_nnc_tensor_free(gr);
116
1
  ccv_nnc_tensor_free(x);
117
1
  ccv_nnc_tensor_free(w);
118
1
  ccv_nnc_tensor_free(y);
119
1
  ccv_nnc_tensor_free(r);
120
1
}
121
122
TEST_CASE("LSTM forward with dropout")
123
1
{
124
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LSTM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
125
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
126
1
  ccv_nnc_tensor_t* const hx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
127
1
  ccv_nnc_tensor_t* const cx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
128
1
  const int weight_d = weight_dim(0, 6, 24, 24, 24, 1);
129
1
  ccv_nnc_tensor_t* const w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, weight_d, 24), 0);
130
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
131
1
  ccv_nnc_tensor_t* const hy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
132
1
  ccv_nnc_tensor_t* const cy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
133
1
  const int r_d = r_dim(0, 1, 1, 6, 5, 24, 24);
134
1
  ccv_nnc_tensor_t* const r = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, r_d, 24), 0);
135
1
  dsfmt_t dsfmt;
136
1
  dsfmt_init_gen_rand(&dsfmt, 0);
137
1
  int i;
138
121
  for (i = 0; i < 5 * 24; 
i++120
)
139
120
    x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
140
37
  for (i = 0; i < 3 * 12; 
i++36
)
141
36
    hx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
142
73
  for (i = 0; i < 3 * 24; 
i++72
)
143
72
    cx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
144
28.8k
  for (i = 0; i < 24 * weight_d; 
i++28.8k
)
145
28.8k
    w->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
146
1
  ccv_nnc_tensor_t* const gx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
147
1
  ccv_nnc_tensor_t* const ghx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
148
1
  ccv_nnc_tensor_t* const gcx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
149
1
  ccv_nnc_tensor_t* const gw = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, weight_d, 24), 0);
150
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x, hx, cx, w), TENSOR_LIST(gx, ghx, gcx, gw), 0);
151
1
  ccv_nnc_tensor_t* const gy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
152
1
  ccv_nnc_tensor_t* const ghy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
153
1
  ccv_nnc_tensor_t* const gcy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
154
1
  ccv_nnc_tensor_t* const gr = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, r_d, 24), 0);
155
1
  ccv_nnc_cmd_exec(CMD_LSTM_FORWARD(24, 0, 6, 1, 0, 0, 0.5, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gx, 0, ghx, gcx, gw), TENSOR_LIST(gy, ghy, gcy, gr), 0);
156
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gy, ghy, gcy, gr), TENSOR_LIST(y, hy, cy, r), 0);
157
1
  ccv_nnc_tensor_free(gx);
158
1
  ccv_nnc_tensor_free(ghx);
159
1
  ccv_nnc_tensor_free(gcx);
160
1
  ccv_nnc_tensor_free(gw);
161
1
  ccv_nnc_tensor_free(gy);
162
1
  ccv_nnc_tensor_free(ghy);
163
1
  ccv_nnc_tensor_free(gcy);
164
1
  ccv_nnc_tensor_free(gr);
165
1
  ccv_nnc_tensor_free(x);
166
1
  ccv_nnc_tensor_free(hx);
167
1
  ccv_nnc_tensor_free(cx);
168
1
  ccv_nnc_tensor_free(w);
169
1
  ccv_nnc_tensor_free(y);
170
1
  ccv_nnc_tensor_free(hy);
171
1
  ccv_nnc_tensor_free(cy);
172
1
  ccv_nnc_tensor_free(r);
173
1
}
174
175
TEST_CASE("LSTM forward with projection")
176
1
{
177
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LSTM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
178
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
179
1
  ccv_nnc_tensor_t* const hx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 12), 0);
180
1
  ccv_nnc_tensor_t* const cx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
181
1
  const int weight_d = weight_dim(0, 6, 24, 24, 12, 1);
182
1
  ccv_nnc_tensor_t* const w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, weight_d, 24), 0);
183
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 12), 0);
184
1
  ccv_nnc_tensor_t* const hy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 12), 0);
185
1
  ccv_nnc_tensor_t* const cy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
186
1
  const int r_d = r_dim(0, 1, 1, 6, 5, 24, 12);
187
1
  ccv_nnc_tensor_t* const r = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, r_d, 24), 0);
188
1
  dsfmt_t dsfmt;
189
1
  dsfmt_init_gen_rand(&dsfmt, 0);
190
1
  int i;
191
121
  for (i = 0; i < 5 * 24; 
i++120
)
192
120
    x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
193
37
  for (i = 0; i < 3 * 12; 
i++36
)
194
36
    hx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
195
73
  for (i = 0; i < 3 * 24; 
i++72
)
196
72
    cx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
197
17.8k
  for (i = 0; i < 24 * weight_d; 
i++17.8k
)
198
17.8k
    w->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
199
1
  ccv_nnc_tensor_t* const gx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
200
1
  ccv_nnc_tensor_t* const ghx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 12), 0);
201
1
  ccv_nnc_tensor_t* const gcx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
202
1
  ccv_nnc_tensor_t* const gw = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, weight_d, 24), 0);
203
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x, hx, cx, w), TENSOR_LIST(gx, ghx, gcx, gw), 0);
204
1
  ccv_nnc_tensor_t* const gy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 12), 0);
205
1
  ccv_nnc_tensor_t* const ghy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 12), 0);
206
1
  ccv_nnc_tensor_t* const gcy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
207
1
  ccv_nnc_tensor_t* const gr = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, r_d, 24), 0);
208
1
  ccv_nnc_cmd_exec(CMD_LSTM_FORWARD(24, 12, 6, 1, 0, 0, 0.5, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gx, 0, ghx, gcx, gw), TENSOR_LIST(gy, ghy, gcy, gr), 0);
209
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gy, ghy, gcy, gr), TENSOR_LIST(y, hy, cy, r), 0);
210
1
  ccv_nnc_tensor_free(gx);
211
1
  ccv_nnc_tensor_free(ghx);
212
1
  ccv_nnc_tensor_free(gcx);
213
1
  ccv_nnc_tensor_free(gw);
214
1
  ccv_nnc_tensor_free(gy);
215
1
  ccv_nnc_tensor_free(ghy);
216
1
  ccv_nnc_tensor_free(gcy);
217
1
  ccv_nnc_tensor_free(gr);
218
1
  ccv_nnc_tensor_free(x);
219
1
  ccv_nnc_tensor_free(hx);
220
1
  ccv_nnc_tensor_free(cx);
221
1
  ccv_nnc_tensor_free(w);
222
1
  ccv_nnc_tensor_free(y);
223
1
  ccv_nnc_tensor_free(hy);
224
1
  ccv_nnc_tensor_free(cy);
225
1
  ccv_nnc_tensor_free(r);
226
1
}
227
228
TEST_CASE("LSTM forward with projection, bidirectional")
229
1
{
230
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LSTM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
231
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
232
1
  ccv_nnc_tensor_t* const hx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 12, 1, 12), 0);
233
1
  ccv_nnc_tensor_t* const cx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 12, 1, 24), 0);
234
1
  const int weight_d = weight_dim(1, 6, 24, 24, 12, 1);
235
1
  ccv_nnc_tensor_t* const w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, weight_d, 24), 0);
236
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
237
1
  ccv_nnc_tensor_t* const hy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 12, 1, 12), 0);
238
1
  ccv_nnc_tensor_t* const cy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 12, 1, 24), 0);
239
1
  const int r_d = r_dim(1, 1, 1, 6, 5, 24, 12);
240
1
  ccv_nnc_tensor_t* const r = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, r_d, 24), 0);
241
1
  dsfmt_t dsfmt;
242
1
  dsfmt_init_gen_rand(&dsfmt, 0);
243
1
  int i;
244
121
  for (i = 0; i < 5 * 24; 
i++120
)
245
120
    x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
246
37
  for (i = 0; i < 3 * 12; 
i++36
)
247
36
    hx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
248
73
  for (i = 0; i < 3 * 24; 
i++72
)
249
72
    cx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
250
47.2k
  for (i = 0; i < 24 * weight_d; 
i++47.2k
)
251
47.2k
    w->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
252
1
  ccv_nnc_tensor_t* const gx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
253
1
  ccv_nnc_tensor_t* const ghx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 12, 1, 12), 0);
254
1
  ccv_nnc_tensor_t* const gcx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 12, 1, 24), 0);
255
1
  ccv_nnc_tensor_t* const gw = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, weight_d, 24), 0);
256
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x, hx, cx, w), TENSOR_LIST(gx, ghx, gcx, gw), 0);
257
1
  ccv_nnc_tensor_t* const gy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
258
1
  ccv_nnc_tensor_t* const ghy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 12, 1, 12), 0);
259
1
  ccv_nnc_tensor_t* const gcy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 12, 1, 24), 0);
260
1
  ccv_nnc_tensor_t* const gr = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, r_d, 24), 0);
261
1
  ccv_nnc_cmd_exec(CMD_LSTM_FORWARD(24, 12, 6, 1, 0, 1, 0.5, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gx, 0, ghx, gcx, gw), TENSOR_LIST(gy, ghy, gcy, gr), 0);
262
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gy, ghy, gcy, gr), TENSOR_LIST(y, hy, cy, r), 0);
263
1
  ccv_nnc_tensor_free(gx);
264
1
  ccv_nnc_tensor_free(ghx);
265
1
  ccv_nnc_tensor_free(gcx);
266
1
  ccv_nnc_tensor_free(gw);
267
1
  ccv_nnc_tensor_free(gy);
268
1
  ccv_nnc_tensor_free(ghy);
269
1
  ccv_nnc_tensor_free(gcy);
270
1
  ccv_nnc_tensor_free(gr);
271
1
  ccv_nnc_tensor_free(x);
272
1
  ccv_nnc_tensor_free(hx);
273
1
  ccv_nnc_tensor_free(cx);
274
1
  ccv_nnc_tensor_free(w);
275
1
  ccv_nnc_tensor_free(y);
276
1
  ccv_nnc_tensor_free(hy);
277
1
  ccv_nnc_tensor_free(cy);
278
1
  ccv_nnc_tensor_free(r);
279
1
}
280
281
TEST_CASE("LSTM backward")
282
1
{
283
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LSTM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
284
1
    ccv_nnc_cmd_ok(CCV_NNC_LSTM_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN));
285
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
286
1
  ccv_nnc_tensor_t* const hx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
287
1
  ccv_nnc_tensor_t* const cx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
288
1
  const int weight_d = weight_dim(0, 6, 24, 24, 24, 1);
289
1
  ccv_nnc_tensor_t* const w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, weight_d, 24), 0);
290
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
291
1
  ccv_nnc_tensor_t* const hy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
292
1
  ccv_nnc_tensor_t* const cy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
293
1
  const int r_d = r_dim(0, 0, 1, 6, 5, 24, 24);
294
1
  ccv_nnc_tensor_t* const r = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, r_d, 24), 0);
295
1
  dsfmt_t dsfmt;
296
1
  dsfmt_init_gen_rand(&dsfmt, 0);
297
1
  int i;
298
121
  for (i = 0; i < 5 * 24; 
i++120
)
299
120
    x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
300
145
  for (i = 0; i < 6 * 24; 
i++144
)
301
144
    hx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
302
145
  for (i = 0; i < 6 * 24; 
i++144
)
303
144
    cx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
304
28.8k
  for (i = 0; i < 24 * weight_d; 
i++28.8k
)
305
28.8k
    w->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
306
1
  ccv_nnc_tensor_t* const gx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
307
1
  ccv_nnc_tensor_t* const ghx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
308
1
  ccv_nnc_tensor_t* const gcx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
309
1
  ccv_nnc_tensor_t* const gw = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, weight_d, 24), 0);
310
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x, hx, cx, w), TENSOR_LIST(gx, ghx, gcx, gw), 0);
311
1
  ccv_nnc_tensor_t* const gy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
312
1
  ccv_nnc_tensor_t* const ghy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
313
1
  ccv_nnc_tensor_t* const gcy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
314
1
  ccv_nnc_tensor_t* const gr = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, r_d, 24), 0);
315
1
  ccv_nnc_cmd_exec(CMD_LSTM_FORWARD(24, 0, 6, 1, 0, 0, 0, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gx, 0, ghx, gcx, gw), TENSOR_LIST(gy, ghy, gcy, gr), 0);
316
1
  ccv_nnc_tensor_t* const dy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
317
1
  ccv_nnc_tensor_t* const dhy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
318
1
  ccv_nnc_tensor_t* const dcy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
319
121
  for (i = 0; i < 5 * 24; 
i++120
)
320
120
    dy->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
321
145
  for (i = 0; i < 6 * 24; 
i++144
)
322
144
    dhy->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
323
145
  for (i = 0; i < 6 * 24; 
i++144
)
324
144
    dcy->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
325
1
  ccv_nnc_tensor_t* const gdy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
326
1
  ccv_nnc_tensor_t* const gdhy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
327
1
  ccv_nnc_tensor_t* const gdcy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
328
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy, dhy, dcy), TENSOR_LIST(gdy, gdhy, gdcy), 0);
329
1
  ccv_nnc_tensor_t* const gdx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
330
1
  ccv_nnc_tensor_t* const gdhx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
331
1
  ccv_nnc_tensor_t* const gdcx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
332
1
  ccv_nnc_tensor_t* const gdw = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, weight_d, 24), 0);
333
1
  ccv_nnc_cmd_exec(CMD_LSTM_BACKWARD(24, 0, 6, 1, 0, 0, 0, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gdy, gdhy, gdcy, 0, gx, 0, ghx, gcx, gw, gy, ghy, gcy, gr), TENSOR_LIST(gdx, 0, gdhx, gdcx, gdw), 0);
334
1
  ccv_nnc_tensor_t* const dx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
335
1
  ccv_nnc_tensor_t* const dhx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
336
1
  ccv_nnc_tensor_t* const dcx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
337
1
  ccv_nnc_tensor_t* const dw = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, weight_d, 24), 0);
338
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gy, ghy, gcy, gr, gdx, gdhx, gdcx, gdw), TENSOR_LIST(y, hy, cy, r, dx, dhx, dcx, dw), 0);
339
1
  ccv_nnc_tensor_free(gx);
340
1
  ccv_nnc_tensor_free(ghx);
341
1
  ccv_nnc_tensor_free(gcx);
342
1
  ccv_nnc_tensor_free(gw);
343
1
  ccv_nnc_tensor_free(gy);
344
1
  ccv_nnc_tensor_free(ghy);
345
1
  ccv_nnc_tensor_free(gcy);
346
1
  ccv_nnc_tensor_free(gr);
347
1
  ccv_nnc_tensor_free(gdy);
348
1
  ccv_nnc_tensor_free(gdhy);
349
1
  ccv_nnc_tensor_free(gdcy);
350
1
  ccv_nnc_tensor_free(gdx);
351
1
  ccv_nnc_tensor_free(gdhx);
352
1
  ccv_nnc_tensor_free(gdcx);
353
1
  ccv_nnc_tensor_free(gdw);
354
1
  ccv_nnc_tensor_free(x);
355
1
  ccv_nnc_tensor_free(hx);
356
1
  ccv_nnc_tensor_free(cx);
357
1
  ccv_nnc_tensor_free(w);
358
1
  ccv_nnc_tensor_free(y);
359
1
  ccv_nnc_tensor_free(hy);
360
1
  ccv_nnc_tensor_free(cy);
361
1
  ccv_nnc_tensor_free(r);
362
1
  ccv_nnc_tensor_free(dy);
363
1
  ccv_nnc_tensor_free(dhy);
364
1
  ccv_nnc_tensor_free(dcy);
365
1
  ccv_nnc_tensor_free(dx);
366
1
  ccv_nnc_tensor_free(dhx);
367
1
  ccv_nnc_tensor_free(dcx);
368
1
  ccv_nnc_tensor_free(dw);
369
1
}
370
371
TEST_CASE("LSTM backward without dhy, dcy, dhx, dcx")
372
1
{
373
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LSTM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
374
1
    ccv_nnc_cmd_ok(CCV_NNC_LSTM_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN));
375
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
376
1
  ccv_nnc_tensor_t* const hx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
377
1
  ccv_nnc_tensor_t* const cx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
378
1
  const int weight_d = weight_dim(0, 6, 24, 24, 24, 1);
379
1
  ccv_nnc_tensor_t* const w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, weight_d, 24), 0);
380
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
381
1
  ccv_nnc_tensor_t* const hy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
382
1
  ccv_nnc_tensor_t* const cy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1, 24), 0);
383
1
  const int r_d = r_dim(0, 0, 1, 6, 5, 24, 24);
384
1
  ccv_nnc_tensor_t* const r = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, r_d, 24), 0);
385
1
  dsfmt_t dsfmt;
386
1
  dsfmt_init_gen_rand(&dsfmt, 0);
387
1
  int i;
388
121
  for (i = 0; i < 5 * 24; 
i++120
)
389
120
    x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
390
145
  for (i = 0; i < 6 * 24; 
i++144
)
391
144
    hx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
392
145
  for (i = 0; i < 6 * 24; 
i++144
)
393
144
    cx->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
394
28.8k
  for (i = 0; i < 24 * weight_d; 
i++28.8k
)
395
28.8k
    w->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
396
1
  ccv_nnc_tensor_t* const gx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
397
1
  ccv_nnc_tensor_t* const ghx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
398
1
  ccv_nnc_tensor_t* const gcx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
399
1
  ccv_nnc_tensor_t* const gw = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, weight_d, 24), 0);
400
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x, hx, cx, w), TENSOR_LIST(gx, ghx, gcx, gw), 0);
401
1
  ccv_nnc_tensor_t* const gy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
402
1
  ccv_nnc_tensor_t* const ghy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
403
1
  ccv_nnc_tensor_t* const gcy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1, 24), 0);
404
1
  ccv_nnc_tensor_t* const gr = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, r_d, 24), 0);
405
1
  ccv_nnc_cmd_exec(CMD_LSTM_FORWARD(24, 0, 6, 1, 0, 0, 0, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gx, 0, ghx, gcx, gw), TENSOR_LIST(gy, ghy, gcy, gr), 0);
406
1
  ccv_nnc_tensor_t* const dy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
407
121
  for (i = 0; i < 5 * 24; 
i++120
)
408
120
    dy->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
409
1
  ccv_nnc_tensor_t* const gdy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
410
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy), TENSOR_LIST(gdy), 0);
411
1
  ccv_nnc_tensor_t* const gdx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
412
1
  ccv_nnc_tensor_t* const gdw = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, weight_d, 24), 0);
413
1
  ccv_nnc_cmd_exec(CMD_LSTM_BACKWARD(24, 0, 6, 1, 0, 0, 0, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gdy, 0, 0, 0, gx, 0, ghx, gcx, gw, gy, ghy, gcy, gr), TENSOR_LIST(gdx, 0, 0, 0, gdw), 0);
414
1
  ccv_nnc_tensor_t* const dx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
415
1
  ccv_nnc_tensor_t* const dw = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, weight_d, 24), 0);
416
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gy, ghy, gcy, gr, gdx, gdw), TENSOR_LIST(y, hy, cy, r, dx, dw), 0);
417
1
  ccv_nnc_tensor_free(gx);
418
1
  ccv_nnc_tensor_free(ghx);
419
1
  ccv_nnc_tensor_free(gcx);
420
1
  ccv_nnc_tensor_free(gw);
421
1
  ccv_nnc_tensor_free(gy);
422
1
  ccv_nnc_tensor_free(ghy);
423
1
  ccv_nnc_tensor_free(gcy);
424
1
  ccv_nnc_tensor_free(gr);
425
1
  ccv_nnc_tensor_free(gdy);
426
1
  ccv_nnc_tensor_free(gdx);
427
1
  ccv_nnc_tensor_free(gdw);
428
1
  ccv_nnc_tensor_free(x);
429
1
  ccv_nnc_tensor_free(hx);
430
1
  ccv_nnc_tensor_free(cx);
431
1
  ccv_nnc_tensor_free(w);
432
1
  ccv_nnc_tensor_free(y);
433
1
  ccv_nnc_tensor_free(hy);
434
1
  ccv_nnc_tensor_free(cy);
435
1
  ccv_nnc_tensor_free(r);
436
1
  ccv_nnc_tensor_free(dy);
437
1
  ccv_nnc_tensor_free(dx);
438
1
  ccv_nnc_tensor_free(dw);
439
1
}
440
441
TEST_CASE("LSTM backward without hx, cx, hy, cy, dhy, dcy, dhx, dcx")
442
1
{
443
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LSTM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
444
1
    ccv_nnc_cmd_ok(CCV_NNC_LSTM_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN));
445
1
  ccv_nnc_tensor_t* const x = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
446
1
  const int weight_d = weight_dim(0, 6, 24, 24, 24, 1);
447
1
  ccv_nnc_tensor_t* const w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, weight_d, 24), 0);
448
1
  ccv_nnc_tensor_t* const y = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
449
1
  const int r_d = r_dim(0, 0, 1, 6, 5, 24, 24);
450
1
  ccv_nnc_tensor_t* const r = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, r_d, 24), 0);
451
1
  dsfmt_t dsfmt;
452
1
  dsfmt_init_gen_rand(&dsfmt, 0);
453
1
  int i;
454
121
  for (i = 0; i < 5 * 24; 
i++120
)
455
120
    x->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
456
28.8k
  for (i = 0; i < 24 * weight_d; 
i++28.8k
)
457
28.8k
    w->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
458
1
  ccv_nnc_tensor_t* const gx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
459
1
  ccv_nnc_tensor_t* const gw = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, weight_d, 24), 0);
460
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x, w), TENSOR_LIST(gx, gw), 0);
461
1
  ccv_nnc_tensor_t* const gy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
462
1
  ccv_nnc_tensor_t* const gr = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, r_d, 24), 0);
463
1
  ccv_nnc_cmd_exec(CMD_LSTM_FORWARD(24, 0, 6, 1, 0, 0, 0, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gx, 0, 0, 0, gw), TENSOR_LIST(gy, 0, 0, gr), 0);
464
1
  ccv_nnc_tensor_t* const dy = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
465
121
  for (i = 0; i < 5 * 24; 
i++120
)
466
120
    dy->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
467
1
  ccv_nnc_tensor_t* const gdy = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
468
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy), TENSOR_LIST(gdy), 0);
469
1
  ccv_nnc_tensor_t* const gdx = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 1, 24), 0);
470
1
  ccv_nnc_tensor_t* const gdw = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, weight_d, 24), 0);
471
1
  ccv_nnc_cmd_exec(CMD_LSTM_BACKWARD(24, 0, 6, 1, 0, 0, 0, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gdy, 0, 0, 0, gx, 0, 0, 0, gw, gy, 0, 0, gr), TENSOR_LIST(gdx, 0, 0, 0, gdw), 0);
472
1
  ccv_nnc_tensor_t* const dx = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 1, 24), 0);
473
1
  ccv_nnc_tensor_t* const dw = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, weight_d, 24), 0);
474
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gy, gr, gdx, gdw), TENSOR_LIST(y, r, dx, dw), 0);
475
1
  ccv_nnc_tensor_free(gx);
476
1
  ccv_nnc_tensor_free(gw);
477
1
  ccv_nnc_tensor_free(gy);
478
1
  ccv_nnc_tensor_free(gr);
479
1
  ccv_nnc_tensor_free(gdy);
480
1
  ccv_nnc_tensor_free(gdx);
481
1
  ccv_nnc_tensor_free(gdw);
482
1
  ccv_nnc_tensor_free(x);
483
1
  ccv_nnc_tensor_free(w);
484
1
  ccv_nnc_tensor_free(y);
485
1
  ccv_nnc_tensor_free(r);
486
1
  ccv_nnc_tensor_free(dy);
487
1
  ccv_nnc_tensor_free(dx);
488
1
  ccv_nnc_tensor_free(dw);
489
1
}
490
491
#include "case_main.h"