Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/adam.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("adam in float")
15
1
{
16
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAM_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ADAM_FORWARD, CCV_NNC_BACKEND_MPS));
17
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
18
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
19
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
20
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
21
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
22
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
23
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
24
1
  dsfmt_t dsfmt;
25
1
  dsfmt_init_gen_rand(&dsfmt, 0);
26
1
  int i;
27
11
  for (i = 0; i < 10; 
i++10
)
28
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
29
11
  for (i = 0; i < 10; 
i++10
)
30
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
31
11
  for (i = 0; i < 10; 
i++10
)
32
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
33
11
  for (i = 0; i < 10; 
i++10
)
34
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
35
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(b, n, u), 0);
36
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
37
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
38
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
39
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
40
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(gg, ga, gm, gv), 0);
41
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gm, gv), 0);
42
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
43
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
44
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
45
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv), TENSOR_LIST(gbt, gnt, gut), 0);
46
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
47
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
48
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
49
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
50
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
51
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
52
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
53
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gn, gu), 0);
54
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu), TENSOR_LIST(gbt, gnt, gut), 0);
55
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
56
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
57
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
58
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
59
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gm, gv), 0);
60
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gm, gv), TENSOR_LIST(gbt, gnt, gut), 0);
61
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
62
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
63
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
64
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
65
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gn, gu), 0);
66
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gn, gu), TENSOR_LIST(gbt, gnt, gut), 0);
67
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
68
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
69
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
70
1
  ccv_nnc_tensor_free(g);
71
1
  ccv_nnc_tensor_free(a);
72
1
  ccv_nnc_tensor_free(m);
73
1
  ccv_nnc_tensor_free(v);
74
1
  ccv_nnc_tensor_free(b);
75
1
  ccv_nnc_tensor_free(n);
76
1
  ccv_nnc_tensor_free(u);
77
1
  ccv_nnc_tensor_free(gg);
78
1
  ccv_nnc_tensor_free(ga);
79
1
  ccv_nnc_tensor_free(gm);
80
1
  ccv_nnc_tensor_free(gv);
81
1
  ccv_nnc_tensor_free(gb);
82
1
  ccv_nnc_tensor_free(gn);
83
1
  ccv_nnc_tensor_free(gu);
84
1
  ccv_nnc_tensor_free(gbt);
85
1
  ccv_nnc_tensor_free(gnt);
86
1
  ccv_nnc_tensor_free(gut);
87
1
}
88
89
TEST_CASE("adam in half precision")
90
1
{
91
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAM_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ADAM_FORWARD, CCV_NNC_BACKEND_MPS));
92
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
93
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
94
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
95
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
96
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
97
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
98
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
99
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
100
1
  ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
101
1
  ccv_nnc_tensor_t* const m16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
102
1
  ccv_nnc_tensor_t* const v16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
103
1
  ccv_nnc_tensor_t* const b16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
104
1
  ccv_nnc_tensor_t* const n16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
105
1
  ccv_nnc_tensor_t* const u16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
106
1
  dsfmt_t dsfmt;
107
1
  dsfmt_init_gen_rand(&dsfmt, 0);
108
1
  int i;
109
11
  for (i = 0; i < 10; 
i++10
)
110
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
111
11
  for (i = 0; i < 10; 
i++10
)
112
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
113
11
  for (i = 0; i < 10; 
i++10
)
114
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
115
11
  for (i = 0; i < 10; 
i++10
)
116
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
117
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(b, n, u), 0);
118
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
119
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
120
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
121
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
122
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(g16, a16, m16, v16), 0);
123
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a16, m16, v16), TENSOR_LIST(gg, ga, gm, gv), 0);
124
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gm, gv), 0);
125
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
126
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
127
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
128
1
  ccv_nnc_tensor_t* const gbt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
129
1
  ccv_nnc_tensor_t* const gnt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
130
1
  ccv_nnc_tensor_t* const gut16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
131
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv), TENSOR_LIST(gbt16, gnt16, gut16), 0);
132
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16, gut16), TENSOR_LIST(gbt, gnt, gut), 0);
133
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
134
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
135
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
136
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
137
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
138
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
139
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16, m16, v16), TENSOR_LIST(ga, gm, gv), 0);
140
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gn, gu), 0);
141
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu), TENSOR_LIST(gbt16, gnt16, gut16), 0);
142
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16, gut16), TENSOR_LIST(gbt, gnt, gut), 0);
143
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
144
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
145
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
146
1
  ccv_nnc_tensor_free(g);
147
1
  ccv_nnc_tensor_free(a);
148
1
  ccv_nnc_tensor_free(m);
149
1
  ccv_nnc_tensor_free(v);
150
1
  ccv_nnc_tensor_free(b);
151
1
  ccv_nnc_tensor_free(n);
152
1
  ccv_nnc_tensor_free(u);
153
1
  ccv_nnc_tensor_free(g16);
154
1
  ccv_nnc_tensor_free(a16);
155
1
  ccv_nnc_tensor_free(m16);
156
1
  ccv_nnc_tensor_free(v16);
157
1
  ccv_nnc_tensor_free(b16);
158
1
  ccv_nnc_tensor_free(n16);
159
1
  ccv_nnc_tensor_free(u16);
160
1
  ccv_nnc_tensor_free(gg);
161
1
  ccv_nnc_tensor_free(ga);
162
1
  ccv_nnc_tensor_free(gm);
163
1
  ccv_nnc_tensor_free(gv);
164
1
  ccv_nnc_tensor_free(gb);
165
1
  ccv_nnc_tensor_free(gn);
166
1
  ccv_nnc_tensor_free(gu);
167
1
  ccv_nnc_tensor_free(gbt);
168
1
  ccv_nnc_tensor_free(gnt);
169
1
  ccv_nnc_tensor_free(gut);
170
1
  ccv_nnc_tensor_free(gbt16);
171
1
  ccv_nnc_tensor_free(gnt16);
172
1
  ccv_nnc_tensor_free(gut16);
173
1
}
174
175
TEST_CASE("adam in mixed precision")
176
1
{
177
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAM_FORWARD, CCV_NNC_BACKEND_GPU_REF));
178
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
179
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
180
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
181
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
182
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
183
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
184
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
185
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
186
1
  dsfmt_t dsfmt;
187
1
  dsfmt_init_gen_rand(&dsfmt, 0);
188
1
  int i;
189
11
  for (i = 0; i < 10; 
i++10
)
190
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
191
11
  for (i = 0; i < 10; 
i++10
)
192
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
193
11
  for (i = 0; i < 10; 
i++10
)
194
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
195
11
  for (i = 0; i < 10; 
i++10
)
196
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
197
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(b, n, u), 0);
198
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
199
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
200
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
201
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
202
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g), TENSOR_LIST(g16), 0);
203
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a, m, v), TENSOR_LIST(gg, ga, gm, gv), 0);
204
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gm, gv), 0);
205
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
206
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
207
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
208
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv), TENSOR_LIST(gbt, gnt, gut), 0);
209
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
210
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
211
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
212
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
213
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
214
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
215
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
216
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gn, gu), 0);
217
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu), TENSOR_LIST(gbt, gnt, gut), 0);
218
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
219
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
220
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
221
1
  ccv_nnc_tensor_free(g);
222
1
  ccv_nnc_tensor_free(a);
223
1
  ccv_nnc_tensor_free(m);
224
1
  ccv_nnc_tensor_free(v);
225
1
  ccv_nnc_tensor_free(b);
226
1
  ccv_nnc_tensor_free(n);
227
1
  ccv_nnc_tensor_free(u);
228
1
  ccv_nnc_tensor_free(g16);
229
1
  ccv_nnc_tensor_free(gg);
230
1
  ccv_nnc_tensor_free(ga);
231
1
  ccv_nnc_tensor_free(gm);
232
1
  ccv_nnc_tensor_free(gv);
233
1
  ccv_nnc_tensor_free(gb);
234
1
  ccv_nnc_tensor_free(gn);
235
1
  ccv_nnc_tensor_free(gu);
236
1
  ccv_nnc_tensor_free(gbt);
237
1
  ccv_nnc_tensor_free(gnt);
238
1
  ccv_nnc_tensor_free(gut);
239
1
}
240
241
TEST_CASE("adamw in float")
242
1
{
243
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAMW_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ADAMW_FORWARD, CCV_NNC_BACKEND_MPS));
244
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
245
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
246
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
247
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
248
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
249
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
250
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
251
1
  dsfmt_t dsfmt;
252
1
  dsfmt_init_gen_rand(&dsfmt, 0);
253
1
  int i;
254
11
  for (i = 0; i < 10; 
i++10
)
255
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
256
11
  for (i = 0; i < 10; 
i++10
)
257
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
258
11
  for (i = 0; i < 10; 
i++10
)
259
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
260
11
  for (i = 0; i < 10; 
i++10
)
261
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
262
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(b, n, u), 0);
263
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
264
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
265
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
266
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
267
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(gg, ga, gm, gv), 0);
268
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gm, gv), 0);
269
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
270
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
271
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
272
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv), TENSOR_LIST(gbt, gnt, gut), 0);
273
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
274
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
275
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
276
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
277
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
278
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
279
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
280
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gn, gu), 0);
281
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu), TENSOR_LIST(gbt, gnt, gut), 0);
282
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
283
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
284
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
285
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
286
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gm, gv), 0);
287
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gm, gv), TENSOR_LIST(gbt, gnt, gut), 0);
288
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
289
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
290
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
291
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
292
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gn, gu), 0);
293
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gn, gu), TENSOR_LIST(gbt, gnt, gut), 0);
294
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
295
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
296
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
297
1
  ccv_nnc_tensor_free(g);
298
1
  ccv_nnc_tensor_free(a);
299
1
  ccv_nnc_tensor_free(m);
300
1
  ccv_nnc_tensor_free(v);
301
1
  ccv_nnc_tensor_free(b);
302
1
  ccv_nnc_tensor_free(n);
303
1
  ccv_nnc_tensor_free(u);
304
1
  ccv_nnc_tensor_free(gg);
305
1
  ccv_nnc_tensor_free(ga);
306
1
  ccv_nnc_tensor_free(gm);
307
1
  ccv_nnc_tensor_free(gv);
308
1
  ccv_nnc_tensor_free(gb);
309
1
  ccv_nnc_tensor_free(gn);
310
1
  ccv_nnc_tensor_free(gu);
311
1
  ccv_nnc_tensor_free(gbt);
312
1
  ccv_nnc_tensor_free(gnt);
313
1
  ccv_nnc_tensor_free(gut);
314
1
}
315
316
TEST_CASE("adamw in half precision")
317
1
{
318
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAMW_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ADAMW_FORWARD, CCV_NNC_BACKEND_MPS));
319
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
320
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
321
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
322
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
323
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
324
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
325
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
326
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
327
1
  ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
328
1
  ccv_nnc_tensor_t* const m16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
329
1
  ccv_nnc_tensor_t* const v16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
330
1
  ccv_nnc_tensor_t* const b16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
331
1
  ccv_nnc_tensor_t* const n16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
332
1
  ccv_nnc_tensor_t* const u16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
333
1
  dsfmt_t dsfmt;
334
1
  dsfmt_init_gen_rand(&dsfmt, 0);
335
1
  int i;
336
11
  for (i = 0; i < 10; 
i++10
)
337
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
338
11
  for (i = 0; i < 10; 
i++10
)
339
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
340
11
  for (i = 0; i < 10; 
i++10
)
341
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
342
11
  for (i = 0; i < 10; 
i++10
)
343
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
344
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(b, n, u), 0);
345
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
346
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
347
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
348
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
349
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(g16, a16, m16, v16), 0);
350
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a16, m16, v16), TENSOR_LIST(gg, ga, gm, gv), 0);
351
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gm, gv), 0);
352
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
353
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
354
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
355
1
  ccv_nnc_tensor_t* const gbt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
356
1
  ccv_nnc_tensor_t* const gnt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
357
1
  ccv_nnc_tensor_t* const gut16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
358
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv), TENSOR_LIST(gbt16, gnt16, gut16), 0);
359
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16, gut16), TENSOR_LIST(gbt, gnt, gut), 0);
360
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
361
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
362
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
363
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
364
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
365
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
366
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16, m16, v16), TENSOR_LIST(ga, gm, gv), 0);
367
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gn, gu), 0);
368
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu), TENSOR_LIST(gbt16, gnt16, gut16), 0);
369
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16, gut16), TENSOR_LIST(gbt, gnt, gut), 0);
370
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
371
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
372
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
373
1
  ccv_nnc_tensor_free(g);
374
1
  ccv_nnc_tensor_free(a);
375
1
  ccv_nnc_tensor_free(m);
376
1
  ccv_nnc_tensor_free(v);
377
1
  ccv_nnc_tensor_free(b);
378
1
  ccv_nnc_tensor_free(n);
379
1
  ccv_nnc_tensor_free(u);
380
1
  ccv_nnc_tensor_free(g16);
381
1
  ccv_nnc_tensor_free(a16);
382
1
  ccv_nnc_tensor_free(m16);
383
1
  ccv_nnc_tensor_free(v16);
384
1
  ccv_nnc_tensor_free(b16);
385
1
  ccv_nnc_tensor_free(n16);
386
1
  ccv_nnc_tensor_free(u16);
387
1
  ccv_nnc_tensor_free(gg);
388
1
  ccv_nnc_tensor_free(ga);
389
1
  ccv_nnc_tensor_free(gm);
390
1
  ccv_nnc_tensor_free(gv);
391
1
  ccv_nnc_tensor_free(gb);
392
1
  ccv_nnc_tensor_free(gn);
393
1
  ccv_nnc_tensor_free(gu);
394
1
  ccv_nnc_tensor_free(gbt);
395
1
  ccv_nnc_tensor_free(gnt);
396
1
  ccv_nnc_tensor_free(gut);
397
1
  ccv_nnc_tensor_free(gbt16);
398
1
  ccv_nnc_tensor_free(gnt16);
399
1
  ccv_nnc_tensor_free(gut16);
400
1
}
401
402
TEST_CASE("adamw in mixed precision")
403
1
{
404
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAMW_FORWARD, CCV_NNC_BACKEND_GPU_REF));
405
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
406
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
407
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
408
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
409
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
410
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
411
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
412
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
413
1
  dsfmt_t dsfmt;
414
1
  dsfmt_init_gen_rand(&dsfmt, 0);
415
1
  int i;
416
11
  for (i = 0; i < 10; 
i++10
)
417
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
418
11
  for (i = 0; i < 10; 
i++10
)
419
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
420
11
  for (i = 0; i < 10; 
i++10
)
421
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
422
11
  for (i = 0; i < 10; 
i++10
)
423
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
424
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v), TENSOR_LIST(b, n, u), 0);
425
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
426
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
427
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
428
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
429
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g), TENSOR_LIST(g16), 0);
430
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a, m, v), TENSOR_LIST(gg, ga, gm, gv), 0);
431
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(ga, gm, gv), 0);
432
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
433
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
434
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
435
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv), TENSOR_LIST(gbt, gnt, gut), 0);
436
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
437
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
438
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
439
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
440
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
441
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
442
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v), TENSOR_LIST(ga, gm, gv), 0);
443
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv), TENSOR_LIST(gb, gn, gu), 0);
444
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu), TENSOR_LIST(gbt, gnt, gut), 0);
445
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
446
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
447
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
448
1
  ccv_nnc_tensor_free(g);
449
1
  ccv_nnc_tensor_free(a);
450
1
  ccv_nnc_tensor_free(m);
451
1
  ccv_nnc_tensor_free(v);
452
1
  ccv_nnc_tensor_free(b);
453
1
  ccv_nnc_tensor_free(n);
454
1
  ccv_nnc_tensor_free(u);
455
1
  ccv_nnc_tensor_free(g16);
456
1
  ccv_nnc_tensor_free(gg);
457
1
  ccv_nnc_tensor_free(ga);
458
1
  ccv_nnc_tensor_free(gm);
459
1
  ccv_nnc_tensor_free(gv);
460
1
  ccv_nnc_tensor_free(gb);
461
1
  ccv_nnc_tensor_free(gn);
462
1
  ccv_nnc_tensor_free(gu);
463
1
  ccv_nnc_tensor_free(gbt);
464
1
  ccv_nnc_tensor_free(gnt);
465
1
  ccv_nnc_tensor_free(gut);
466
1
}
467
468
TEST_CASE("adam amsgrad in float")
469
1
{
470
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAMW_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ADAM_FORWARD, CCV_NNC_BACKEND_MPS));
471
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
472
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
473
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
474
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
475
1
  ccv_nnc_tensor_t* const vm = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
476
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
477
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
478
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
479
1
  ccv_nnc_tensor_t* const um = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
480
1
  dsfmt_t dsfmt;
481
1
  dsfmt_init_gen_rand(&dsfmt, 0);
482
1
  int i;
483
11
  for (i = 0; i < 10; 
i++10
)
484
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
485
11
  for (i = 0; i < 10; 
i++10
)
486
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
487
11
  for (i = 0; i < 10; 
i++10
)
488
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
489
11
  for (i = 0; i < 10; 
i++10
)
490
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
491
11
  for (i = 0; i < 10; 
i++10
)
492
10
    vm->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
493
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v, vm), TENSOR_LIST(b, n, u, um), 0);
494
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
495
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
496
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
497
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
498
1
  ccv_nnc_tensor_t* const gvm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
499
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v, vm), TENSOR_LIST(gg, ga, gm, gv, gvm), 0);
500
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(ga, gm, gv, gvm), 0);
501
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
502
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
503
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
504
1
  ccv_nnc_tensor_t* const gumt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
505
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv, gvm), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
506
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
507
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
508
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
509
1
  REQUIRE_TENSOR_EQ(gumt, um, "cpu result should match");
510
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
511
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
512
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
513
1
  ccv_nnc_tensor_t* const gum = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
514
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v, vm), TENSOR_LIST(ga, gm, gv, gvm), 0);
515
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(gb, gn, gu, gum), 0);
516
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu, gum), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
517
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
518
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
519
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
520
1
  REQUIRE_TENSOR_EQ(gumt, um, "cpu result should match");
521
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v, vm), TENSOR_LIST(ga, gm, gv, gvm), 0);
522
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(gb, gm, gv, gvm), 0);
523
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gm, gv, gvm), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
524
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
525
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
526
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
527
1
  REQUIRE_TENSOR_EQ(gumt, um, "cpu result should match");
528
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v, vm), TENSOR_LIST(ga, gm, gv, gvm), 0);
529
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(ga, gn, gu, gum), 0);
530
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gn, gu, gum), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
531
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
532
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
533
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
534
1
  REQUIRE_TENSOR_EQ(gumt, um, "cpu result should match");
535
1
  ccv_nnc_tensor_free(g);
536
1
  ccv_nnc_tensor_free(a);
537
1
  ccv_nnc_tensor_free(m);
538
1
  ccv_nnc_tensor_free(v);
539
1
  ccv_nnc_tensor_free(vm);
540
1
  ccv_nnc_tensor_free(b);
541
1
  ccv_nnc_tensor_free(n);
542
1
  ccv_nnc_tensor_free(u);
543
1
  ccv_nnc_tensor_free(um);
544
1
  ccv_nnc_tensor_free(gg);
545
1
  ccv_nnc_tensor_free(ga);
546
1
  ccv_nnc_tensor_free(gm);
547
1
  ccv_nnc_tensor_free(gv);
548
1
  ccv_nnc_tensor_free(gvm);
549
1
  ccv_nnc_tensor_free(gb);
550
1
  ccv_nnc_tensor_free(gn);
551
1
  ccv_nnc_tensor_free(gu);
552
1
  ccv_nnc_tensor_free(gum);
553
1
  ccv_nnc_tensor_free(gbt);
554
1
  ccv_nnc_tensor_free(gnt);
555
1
  ccv_nnc_tensor_free(gut);
556
1
  ccv_nnc_tensor_free(gumt);
557
1
}
558
559
TEST_CASE("adam amsgrad in half precision")
560
1
{
561
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAM_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ADAM_FORWARD, CCV_NNC_BACKEND_MPS));
562
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
563
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
564
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
565
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
566
1
  ccv_nnc_tensor_t* const vm = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
567
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
568
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
569
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
570
1
  ccv_nnc_tensor_t* const um = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
571
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
572
1
  ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
573
1
  ccv_nnc_tensor_t* const m16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
574
1
  ccv_nnc_tensor_t* const v16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
575
1
  ccv_nnc_tensor_t* const vm16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
576
1
  ccv_nnc_tensor_t* const b16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
577
1
  ccv_nnc_tensor_t* const n16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
578
1
  ccv_nnc_tensor_t* const u16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
579
1
  ccv_nnc_tensor_t* const um16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
580
1
  dsfmt_t dsfmt;
581
1
  dsfmt_init_gen_rand(&dsfmt, 0);
582
1
  int i;
583
11
  for (i = 0; i < 10; 
i++10
)
584
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
585
11
  for (i = 0; i < 10; 
i++10
)
586
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
587
11
  for (i = 0; i < 10; 
i++10
)
588
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
589
11
  for (i = 0; i < 10; 
i++10
)
590
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
591
11
  for (i = 0; i < 10; 
i++10
)
592
10
    vm->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
593
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v, vm), TENSOR_LIST(b, n, u, um), 0);
594
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
595
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
596
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
597
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
598
1
  ccv_nnc_tensor_t* const gvm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
599
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v, vm), TENSOR_LIST(g16, a16, m16, v16, vm16), 0);
600
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a16, m16, v16, vm16), TENSOR_LIST(gg, ga, gm, gv, gvm), 0);
601
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(ga, gm, gv, gvm), 0);
602
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
603
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
604
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
605
1
  ccv_nnc_tensor_t* const gumt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
606
1
  ccv_nnc_tensor_t* const gbt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
607
1
  ccv_nnc_tensor_t* const gnt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
608
1
  ccv_nnc_tensor_t* const gut16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
609
1
  ccv_nnc_tensor_t* const gumt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
610
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv, gvm), TENSOR_LIST(gbt16, gnt16, gut16, gumt16), 0);
611
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16, gut16, gumt16), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
612
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
613
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
614
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
615
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gumt->data.f32, um->data.f32, 10, 1e-1, "cpu result should match");
616
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
617
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
618
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
619
1
  ccv_nnc_tensor_t* const gum = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
620
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16, m16, v16, vm16), TENSOR_LIST(ga, gm, gv, gvm), 0);
621
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(gb, gn, gu, gum), 0);
622
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu, gum), TENSOR_LIST(gbt16, gnt16, gut16, gumt16), 0);
623
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16, gut16, gumt16), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
624
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
625
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
626
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
627
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gumt->data.f32, um->data.f32, 10, 1e-1, "cpu result should match");
628
1
  ccv_nnc_tensor_free(g);
629
1
  ccv_nnc_tensor_free(a);
630
1
  ccv_nnc_tensor_free(m);
631
1
  ccv_nnc_tensor_free(v);
632
1
  ccv_nnc_tensor_free(vm);
633
1
  ccv_nnc_tensor_free(b);
634
1
  ccv_nnc_tensor_free(n);
635
1
  ccv_nnc_tensor_free(u);
636
1
  ccv_nnc_tensor_free(um);
637
1
  ccv_nnc_tensor_free(g16);
638
1
  ccv_nnc_tensor_free(a16);
639
1
  ccv_nnc_tensor_free(m16);
640
1
  ccv_nnc_tensor_free(v16);
641
1
  ccv_nnc_tensor_free(vm16);
642
1
  ccv_nnc_tensor_free(b16);
643
1
  ccv_nnc_tensor_free(n16);
644
1
  ccv_nnc_tensor_free(u16);
645
1
  ccv_nnc_tensor_free(um16);
646
1
  ccv_nnc_tensor_free(gg);
647
1
  ccv_nnc_tensor_free(ga);
648
1
  ccv_nnc_tensor_free(gm);
649
1
  ccv_nnc_tensor_free(gv);
650
1
  ccv_nnc_tensor_free(gvm);
651
1
  ccv_nnc_tensor_free(gb);
652
1
  ccv_nnc_tensor_free(gn);
653
1
  ccv_nnc_tensor_free(gu);
654
1
  ccv_nnc_tensor_free(gum);
655
1
  ccv_nnc_tensor_free(gbt);
656
1
  ccv_nnc_tensor_free(gnt);
657
1
  ccv_nnc_tensor_free(gut);
658
1
  ccv_nnc_tensor_free(gumt);
659
1
  ccv_nnc_tensor_free(gbt16);
660
1
  ccv_nnc_tensor_free(gnt16);
661
1
  ccv_nnc_tensor_free(gut16);
662
1
  ccv_nnc_tensor_free(gumt16);
663
1
}
664
665
TEST_CASE("adam amsgrad in mixed precision")
666
1
{
667
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAM_FORWARD, CCV_NNC_BACKEND_GPU_REF));
668
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
669
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
670
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
671
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
672
1
  ccv_nnc_tensor_t* const vm = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
673
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
674
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
675
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
676
1
  ccv_nnc_tensor_t* const um = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
677
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
678
1
  dsfmt_t dsfmt;
679
1
  dsfmt_init_gen_rand(&dsfmt, 0);
680
1
  int i;
681
11
  for (i = 0; i < 10; 
i++10
)
682
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
683
11
  for (i = 0; i < 10; 
i++10
)
684
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
685
11
  for (i = 0; i < 10; 
i++10
)
686
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
687
11
  for (i = 0; i < 10; 
i++10
)
688
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
689
11
  for (i = 0; i < 10; 
i++10
)
690
10
    vm->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
691
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v, vm), TENSOR_LIST(b, n, u, um), 0);
692
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
693
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
694
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
695
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
696
1
  ccv_nnc_tensor_t* const gvm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
697
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g), TENSOR_LIST(g16), 0);
698
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a, m, v, vm), TENSOR_LIST(gg, ga, gm, gv, gvm), 0);
699
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(ga, gm, gv, gvm), 0);
700
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
701
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
702
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
703
1
  ccv_nnc_tensor_t* const gumt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
704
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv, gvm), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
705
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
706
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
707
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
708
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gumt->data.f32, um->data.f32, 10, 1e-3, "cpu result should match");
709
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
710
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
711
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
712
1
  ccv_nnc_tensor_t* const gum = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
713
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v, vm), TENSOR_LIST(ga, gm, gv, gvm), 0);
714
1
  ccv_nnc_cmd_exec(CMD_ADAM_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(gb, gn, gu, gum), 0);
715
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu, gum), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
716
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
717
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
718
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
719
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gumt->data.f32, um->data.f32, 10, 1e-3, "cpu result should match");
720
1
  ccv_nnc_tensor_free(g);
721
1
  ccv_nnc_tensor_free(a);
722
1
  ccv_nnc_tensor_free(m);
723
1
  ccv_nnc_tensor_free(v);
724
1
  ccv_nnc_tensor_free(vm);
725
1
  ccv_nnc_tensor_free(b);
726
1
  ccv_nnc_tensor_free(n);
727
1
  ccv_nnc_tensor_free(u);
728
1
  ccv_nnc_tensor_free(um);
729
1
  ccv_nnc_tensor_free(g16);
730
1
  ccv_nnc_tensor_free(gg);
731
1
  ccv_nnc_tensor_free(ga);
732
1
  ccv_nnc_tensor_free(gm);
733
1
  ccv_nnc_tensor_free(gv);
734
1
  ccv_nnc_tensor_free(gvm);
735
1
  ccv_nnc_tensor_free(gb);
736
1
  ccv_nnc_tensor_free(gn);
737
1
  ccv_nnc_tensor_free(gu);
738
1
  ccv_nnc_tensor_free(gum);
739
1
  ccv_nnc_tensor_free(gbt);
740
1
  ccv_nnc_tensor_free(gnt);
741
1
  ccv_nnc_tensor_free(gut);
742
1
  ccv_nnc_tensor_free(gumt);
743
1
}
744
745
TEST_CASE("adamw amsgrad in float")
746
1
{
747
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAMW_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ADAMW_FORWARD, CCV_NNC_BACKEND_MPS));
748
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
749
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
750
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
751
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
752
1
  ccv_nnc_tensor_t* const vm = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
753
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
754
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
755
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
756
1
  ccv_nnc_tensor_t* const um = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
757
1
  dsfmt_t dsfmt;
758
1
  dsfmt_init_gen_rand(&dsfmt, 0);
759
1
  int i;
760
11
  for (i = 0; i < 10; 
i++10
)
761
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
762
11
  for (i = 0; i < 10; 
i++10
)
763
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
764
11
  for (i = 0; i < 10; 
i++10
)
765
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
766
11
  for (i = 0; i < 10; 
i++10
)
767
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
768
11
  for (i = 0; i < 10; 
i++10
)
769
10
    vm->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
770
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v, vm), TENSOR_LIST(b, n, u, um), 0);
771
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
772
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
773
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
774
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
775
1
  ccv_nnc_tensor_t* const gvm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
776
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v, vm), TENSOR_LIST(gg, ga, gm, gv, gvm), 0);
777
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(ga, gm, gv, gvm), 0);
778
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
779
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
780
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
781
1
  ccv_nnc_tensor_t* const gumt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
782
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv, gvm), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
783
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
784
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
785
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
786
1
  REQUIRE_TENSOR_EQ(gumt, um, "cpu result should match");
787
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
788
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
789
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
790
1
  ccv_nnc_tensor_t* const gum = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
791
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v, vm), TENSOR_LIST(ga, gm, gv, gvm), 0);
792
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(gb, gn, gu, gum), 0);
793
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu, gum), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
794
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
795
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
796
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
797
1
  REQUIRE_TENSOR_EQ(gumt, um, "cpu result should match");
798
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v, vm), TENSOR_LIST(ga, gm, gv, gvm), 0);
799
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(gb, gm, gv, gvm), 0);
800
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gm, gv, gvm), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
801
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
802
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
803
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
804
1
  REQUIRE_TENSOR_EQ(gumt, um, "cpu result should match");
805
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v, vm), TENSOR_LIST(ga, gm, gv, gvm), 0);
806
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(ga, gn, gu, gum), 0);
807
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gn, gu, gum), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
808
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
809
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
810
1
  REQUIRE_TENSOR_EQ(gut, u, "cpu result should match");
811
1
  REQUIRE_TENSOR_EQ(gumt, um, "cpu result should match");
812
1
  ccv_nnc_tensor_free(g);
813
1
  ccv_nnc_tensor_free(a);
814
1
  ccv_nnc_tensor_free(m);
815
1
  ccv_nnc_tensor_free(v);
816
1
  ccv_nnc_tensor_free(vm);
817
1
  ccv_nnc_tensor_free(b);
818
1
  ccv_nnc_tensor_free(n);
819
1
  ccv_nnc_tensor_free(u);
820
1
  ccv_nnc_tensor_free(um);
821
1
  ccv_nnc_tensor_free(gg);
822
1
  ccv_nnc_tensor_free(ga);
823
1
  ccv_nnc_tensor_free(gm);
824
1
  ccv_nnc_tensor_free(gv);
825
1
  ccv_nnc_tensor_free(gvm);
826
1
  ccv_nnc_tensor_free(gb);
827
1
  ccv_nnc_tensor_free(gn);
828
1
  ccv_nnc_tensor_free(gu);
829
1
  ccv_nnc_tensor_free(gum);
830
1
  ccv_nnc_tensor_free(gbt);
831
1
  ccv_nnc_tensor_free(gnt);
832
1
  ccv_nnc_tensor_free(gut);
833
1
  ccv_nnc_tensor_free(gumt);
834
1
}
835
836
TEST_CASE("adamw amsgrad in half precision")
837
1
{
838
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAMW_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ADAMW_FORWARD, CCV_NNC_BACKEND_MPS));
839
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
840
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
841
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
842
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
843
1
  ccv_nnc_tensor_t* const vm = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
844
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
845
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
846
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
847
1
  ccv_nnc_tensor_t* const um = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
848
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
849
1
  ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
850
1
  ccv_nnc_tensor_t* const m16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
851
1
  ccv_nnc_tensor_t* const v16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
852
1
  ccv_nnc_tensor_t* const vm16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
853
1
  ccv_nnc_tensor_t* const b16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
854
1
  ccv_nnc_tensor_t* const n16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
855
1
  ccv_nnc_tensor_t* const u16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
856
1
  ccv_nnc_tensor_t* const um16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
857
1
  dsfmt_t dsfmt;
858
1
  dsfmt_init_gen_rand(&dsfmt, 0);
859
1
  int i;
860
11
  for (i = 0; i < 10; 
i++10
)
861
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
862
11
  for (i = 0; i < 10; 
i++10
)
863
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
864
11
  for (i = 0; i < 10; 
i++10
)
865
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
866
11
  for (i = 0; i < 10; 
i++10
)
867
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
868
11
  for (i = 0; i < 10; 
i++10
)
869
10
    vm->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
870
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v, vm), TENSOR_LIST(b, n, u, um), 0);
871
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
872
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
873
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
874
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
875
1
  ccv_nnc_tensor_t* const gvm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
876
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v, vm), TENSOR_LIST(g16, a16, m16, v16, vm16), 0);
877
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a16, m16, v16, vm16), TENSOR_LIST(gg, ga, gm, gv, gvm), 0);
878
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(ga, gm, gv, gvm), 0);
879
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
880
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
881
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
882
1
  ccv_nnc_tensor_t* const gumt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
883
1
  ccv_nnc_tensor_t* const gbt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
884
1
  ccv_nnc_tensor_t* const gnt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
885
1
  ccv_nnc_tensor_t* const gut16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
886
1
  ccv_nnc_tensor_t* const gumt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
887
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv, gvm), TENSOR_LIST(gbt16, gnt16, gut16, gumt16), 0);
888
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16, gut16, gumt16), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
889
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
890
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
891
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
892
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gumt->data.f32, um->data.f32, 10, 1e-1, "cpu result should match");
893
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
894
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
895
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
896
1
  ccv_nnc_tensor_t* const gum = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
897
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16, m16, v16, vm16), TENSOR_LIST(ga, gm, gv, gvm), 0);
898
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(gb, gn, gu, gum), 0);
899
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu, gum), TENSOR_LIST(gbt16, gnt16, gut16, gumt16), 0);
900
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16, gut16, gumt16), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
901
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
902
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
903
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
904
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gumt->data.f32, um->data.f32, 10, 1e-1, "cpu result should match");
905
1
  ccv_nnc_tensor_free(g);
906
1
  ccv_nnc_tensor_free(a);
907
1
  ccv_nnc_tensor_free(m);
908
1
  ccv_nnc_tensor_free(v);
909
1
  ccv_nnc_tensor_free(vm);
910
1
  ccv_nnc_tensor_free(b);
911
1
  ccv_nnc_tensor_free(n);
912
1
  ccv_nnc_tensor_free(u);
913
1
  ccv_nnc_tensor_free(um);
914
1
  ccv_nnc_tensor_free(g16);
915
1
  ccv_nnc_tensor_free(a16);
916
1
  ccv_nnc_tensor_free(m16);
917
1
  ccv_nnc_tensor_free(v16);
918
1
  ccv_nnc_tensor_free(vm16);
919
1
  ccv_nnc_tensor_free(b16);
920
1
  ccv_nnc_tensor_free(n16);
921
1
  ccv_nnc_tensor_free(u16);
922
1
  ccv_nnc_tensor_free(um16);
923
1
  ccv_nnc_tensor_free(gg);
924
1
  ccv_nnc_tensor_free(ga);
925
1
  ccv_nnc_tensor_free(gm);
926
1
  ccv_nnc_tensor_free(gv);
927
1
  ccv_nnc_tensor_free(gvm);
928
1
  ccv_nnc_tensor_free(gb);
929
1
  ccv_nnc_tensor_free(gn);
930
1
  ccv_nnc_tensor_free(gu);
931
1
  ccv_nnc_tensor_free(gum);
932
1
  ccv_nnc_tensor_free(gbt);
933
1
  ccv_nnc_tensor_free(gnt);
934
1
  ccv_nnc_tensor_free(gut);
935
1
  ccv_nnc_tensor_free(gumt);
936
1
  ccv_nnc_tensor_free(gbt16);
937
1
  ccv_nnc_tensor_free(gnt16);
938
1
  ccv_nnc_tensor_free(gut16);
939
1
  ccv_nnc_tensor_free(gumt16);
940
1
}
941
942
TEST_CASE("adamw amsgrad in mixed precision")
943
1
{
944
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADAMW_FORWARD, CCV_NNC_BACKEND_GPU_REF));
945
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
946
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
947
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
948
1
  ccv_nnc_tensor_t* const v = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
949
1
  ccv_nnc_tensor_t* const vm = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
950
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
951
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
952
1
  ccv_nnc_tensor_t* const u = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
953
1
  ccv_nnc_tensor_t* const um = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
954
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
955
1
  dsfmt_t dsfmt;
956
1
  dsfmt_init_gen_rand(&dsfmt, 0);
957
1
  int i;
958
11
  for (i = 0; i < 10; 
i++10
)
959
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
960
11
  for (i = 0; i < 10; 
i++10
)
961
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
962
11
  for (i = 0; i < 10; 
i++10
)
963
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
964
11
  for (i = 0; i < 10; 
i++10
)
965
10
    v->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
966
11
  for (i = 0; i < 10; 
i++10
)
967
10
    vm->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
968
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m, v, vm), TENSOR_LIST(b, n, u, um), 0);
969
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
970
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
971
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
972
1
  ccv_nnc_tensor_t* const gv = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
973
1
  ccv_nnc_tensor_t* const gvm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
974
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g), TENSOR_LIST(g16), 0);
975
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a, m, v, vm), TENSOR_LIST(gg, ga, gm, gv, gvm), 0);
976
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(ga, gm, gv, gvm), 0);
977
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
978
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
979
1
  ccv_nnc_tensor_t* const gut = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
980
1
  ccv_nnc_tensor_t* const gumt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
981
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm, gv, gvm), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
982
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
983
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
984
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
985
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gumt->data.f32, um->data.f32, 10, 1e-3, "cpu result should match");
986
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
987
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
988
1
  ccv_nnc_tensor_t* const gu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
989
1
  ccv_nnc_tensor_t* const gum = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
990
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m, v, vm), TENSOR_LIST(ga, gm, gv, gvm), 0);
991
1
  ccv_nnc_cmd_exec(CMD_ADAMW_FORWARD(1, 0.002, 0.9, 0.98, 0, 1e-9, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm, gv, gvm), TENSOR_LIST(gb, gn, gu, gum), 0);
992
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn, gu, gum), TENSOR_LIST(gbt, gnt, gut, gumt), 0);
993
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
994
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
995
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gut->data.f32, u->data.f32, 10, 1e-3, "cpu result should match");
996
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gumt->data.f32, um->data.f32, 10, 1e-3, "cpu result should match");
997
1
  ccv_nnc_tensor_free(g);
998
1
  ccv_nnc_tensor_free(a);
999
1
  ccv_nnc_tensor_free(m);
1000
1
  ccv_nnc_tensor_free(v);
1001
1
  ccv_nnc_tensor_free(vm);
1002
1
  ccv_nnc_tensor_free(b);
1003
1
  ccv_nnc_tensor_free(n);
1004
1
  ccv_nnc_tensor_free(u);
1005
1
  ccv_nnc_tensor_free(um);
1006
1
  ccv_nnc_tensor_free(g16);
1007
1
  ccv_nnc_tensor_free(gg);
1008
1
  ccv_nnc_tensor_free(ga);
1009
1
  ccv_nnc_tensor_free(gm);
1010
1
  ccv_nnc_tensor_free(gv);
1011
1
  ccv_nnc_tensor_free(gvm);
1012
1
  ccv_nnc_tensor_free(gb);
1013
1
  ccv_nnc_tensor_free(gn);
1014
1
  ccv_nnc_tensor_free(gu);
1015
1
  ccv_nnc_tensor_free(gum);
1016
1
  ccv_nnc_tensor_free(gbt);
1017
1
  ccv_nnc_tensor_free(gnt);
1018
1
  ccv_nnc_tensor_free(gut);
1019
1
  ccv_nnc_tensor_free(gumt);
1020
1
}
1021
1022
#include "case_main.h"