Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/lamb.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("lamb in float")
15
1
{
16
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LAMB_FORWARD, CCV_NNC_BACKEND_GPU_REF));
17
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
18
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
19
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
20
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
21
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
22
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
23
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
24
1
  dsfmt_t dsfmt;
25
1
  dsfmt_init_gen_rand(&dsfmt, 0);
26
1
  int i;
27
11
  for (i = 0; i < 10; 
i++10
)
28
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
29
11
  for (i = 0; i < 10; 
i++10
)
30
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
31
11
  for (i = 0; i < 10; 
i++10
)
32
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
33
11
  for (i = 0; i < 10; 
i++10
)
34
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
35
1
  ccv_nnc_cmd_exec(CMD_LAMB_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(b, n, u), 0);
36
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
37
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
38
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
39
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
40
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(gg, ga, gm, gv), 0);
41
1
  ccv_nnc_cmd_exec(CMD_LAMB_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gm, gv), 0);
42
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
43
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
44
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
45
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv), TENSOR_LIST(gbt, gnt, gut), 0);
46
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
47
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
48
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
49
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
50
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
51
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
52
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
53
1
  ccv_nnc_cmd_exec(CMD_LAMB_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gn, gu), 0);
54
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu), TENSOR_LIST(gbt, gnt, gut), 0);
55
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
56
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
57
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
58
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
59
1
  ccv_nnc_cmd_exec(CMD_LAMB_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gm, gv), 0);
60
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gm, gv), TENSOR_LIST(gbt, gnt, gut), 0);
61
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
62
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
63
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
64
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
65
1
  ccv_nnc_cmd_exec(CMD_LAMB_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gn, gu), 0);
66
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gn, gu), TENSOR_LIST(gbt, gnt, gut), 0);
67
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
68
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
69
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
70
1
  ccv_nnc_tensor_free(g);
71
1
  ccv_nnc_tensor_free(a);
72
1
  ccv_nnc_tensor_free(m);
73
1
  ccv_nnc_tensor_free(v);
74
1
  ccv_nnc_tensor_free(b);
75
1
  ccv_nnc_tensor_free(n);
76
1
  ccv_nnc_tensor_free(u);
77
1
  ccv_nnc_tensor_free(gg);
78
1
  ccv_nnc_tensor_free(ga);
79
1
  ccv_nnc_tensor_free(gm);
80
1
  ccv_nnc_tensor_free(gv);
81
1
  ccv_nnc_tensor_free(gb);
82
1
  ccv_nnc_tensor_free(gn);
83
1
  ccv_nnc_tensor_free(gu);
84
1
  ccv_nnc_tensor_free(gbt);
85
1
  ccv_nnc_tensor_free(gnt);
86
1
  ccv_nnc_tensor_free(gut);
87
1
}
88
89
TEST_CASE("lamb in half precision")
90
1
{
91
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LAMB_FORWARD, CCV_NNC_BACKEND_GPU_REF));
92
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
93
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
94
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
95
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
96
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
97
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
98
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
99
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
100
1
  ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
101
1
  ccv_nnc_tensor_t* const m16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
102
1
  ccv_nnc_tensor_t* const v16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
103
1
  ccv_nnc_tensor_t* const b16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
104
1
  ccv_nnc_tensor_t* const n16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
105
1
  ccv_nnc_tensor_t* const u16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
106
1
  dsfmt_t dsfmt;
107
1
  dsfmt_init_gen_rand(&dsfmt, 0);
108
1
  int i;
109
11
  for (i = 0; i < 10; 
i++10
)
110
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
111
11
  for (i = 0; i < 10; 
i++10
)
112
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
113
11
  for (i = 0; i < 10; 
i++10
)
114
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
115
11
  for (i = 0; i < 10; 
i++10
)
116
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
117
1
  ccv_nnc_cmd_exec(CMD_LAMB_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(b, n, u), 0);
118
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
119
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
120
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
121
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
122
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(g16, a16, m16, v16), 0);
123
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a16, m16, v16), TENSOR_LIST(gg, ga, gm, gv), 0);
124
1
  ccv_nnc_cmd_exec(CMD_LAMB_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gm, gv), 0);
125
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
126
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
127
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
128
1
  ccv_nnc_tensor_t* const gbt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
129
1
  ccv_nnc_tensor_t* const gnt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
130
1
  ccv_nnc_tensor_t* const gut16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
131
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv), TENSOR_LIST(gbt16, gnt16, gut16), 0);
132
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16, gut16), TENSOR_LIST(gbt, gnt, gut), 0);
133
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
134
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
135
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
136
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
137
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
138
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
139
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16, m16, v16), TENSOR_LIST(ga, gm, gv), 0);
140
1
  ccv_nnc_cmd_exec(CMD_LAMB_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gn, gu), 0);
141
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu), TENSOR_LIST(gbt16, gnt16, gut16), 0);
142
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16, gut16), TENSOR_LIST(gbt, gnt, gut), 0);
143
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
144
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
145
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
146
1
  ccv_nnc_tensor_free(g);
147
1
  ccv_nnc_tensor_free(a);
148
1
  ccv_nnc_tensor_free(m);
149
1
  ccv_nnc_tensor_free(v);
150
1
  ccv_nnc_tensor_free(b);
151
1
  ccv_nnc_tensor_free(n);
152
1
  ccv_nnc_tensor_free(u);
153
1
  ccv_nnc_tensor_free(g16);
154
1
  ccv_nnc_tensor_free(a16);
155
1
  ccv_nnc_tensor_free(m16);
156
1
  ccv_nnc_tensor_free(v16);
157
1
  ccv_nnc_tensor_free(b16);
158
1
  ccv_nnc_tensor_free(n16);
159
1
  ccv_nnc_tensor_free(u16);
160
1
  ccv_nnc_tensor_free(gg);
161
1
  ccv_nnc_tensor_free(ga);
162
1
  ccv_nnc_tensor_free(gm);
163
1
  ccv_nnc_tensor_free(gv);
164
1
  ccv_nnc_tensor_free(gb);
165
1
  ccv_nnc_tensor_free(gn);
166
1
  ccv_nnc_tensor_free(gu);
167
1
  ccv_nnc_tensor_free(gbt);
168
1
  ccv_nnc_tensor_free(gnt);
169
1
  ccv_nnc_tensor_free(gut);
170
1
  ccv_nnc_tensor_free(gbt16);
171
1
  ccv_nnc_tensor_free(gnt16);
172
1
  ccv_nnc_tensor_free(gut16);
173
1
}
174
175
TEST_CASE("lamb in mixed precision")
176
1
{
177
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_LAMB_FORWARD, CCV_NNC_BACKEND_GPU_REF));
178
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
179
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
180
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
181
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
182
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
183
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
184
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
185
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
186
1
  dsfmt_t dsfmt;
187
1
  dsfmt_init_gen_rand(&dsfmt, 0);
188
1
  int i;
189
11
  for (i = 0; i < 10; 
i++10
)
190
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
191
11
  for (i = 0; i < 10; 
i++10
)
192
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
193
11
  for (i = 0; i < 10; 
i++10
)
194
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
195
11
  for (i = 0; i < 10; 
i++10
)
196
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
197
1
  ccv_nnc_cmd_exec(CMD_LAMB_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(b, n, u), 0);
198
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
199
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
200
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
201
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
202
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g), TENSOR_LIST(g16), 0);
203
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a, m, v), TENSOR_LIST(gg, ga, gm, gv), 0);
204
1
  ccv_nnc_cmd_exec(CMD_LAMB_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gm, gv), 0);
205
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
206
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
207
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
208
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv), TENSOR_LIST(gbt, gnt, gut), 0);
209
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
210
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
211
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
212
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
213
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
214
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
215
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
216
1
  ccv_nnc_cmd_exec(CMD_LAMB_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gn, gu), 0);
217
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu), TENSOR_LIST(gbt, gnt, gut), 0);
218
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
219
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
220
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
221
1
  ccv_nnc_tensor_free(g);
222
1
  ccv_nnc_tensor_free(a);
223
1
  ccv_nnc_tensor_free(m);
224
1
  ccv_nnc_tensor_free(v);
225
1
  ccv_nnc_tensor_free(b);
226
1
  ccv_nnc_tensor_free(n);
227
1
  ccv_nnc_tensor_free(u);
228
1
  ccv_nnc_tensor_free(g16);
229
1
  ccv_nnc_tensor_free(gg);
230
1
  ccv_nnc_tensor_free(ga);
231
1
  ccv_nnc_tensor_free(gm);
232
1
  ccv_nnc_tensor_free(gv);
233
1
  ccv_nnc_tensor_free(gb);
234
1
  ccv_nnc_tensor_free(gn);
235
1
  ccv_nnc_tensor_free(gu);
236
1
  ccv_nnc_tensor_free(gbt);
237
1
  ccv_nnc_tensor_free(gnt);
238
1
  ccv_nnc_tensor_free(gut);
239
1
}
240
241
#include "case_main.h"