Coverage Report

Created: 2025-04-03 22:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/sgd.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("SGD in float")
15
1
{
16
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF));
17
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
18
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
19
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
20
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
21
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
22
1
  dsfmt_t dsfmt;
23
1
  dsfmt_init_gen_rand(&dsfmt, 0);
24
1
  int i;
25
11
  for (i = 0; i < 10; 
i++10
)
26
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
27
11
  for (i = 0; i < 10; 
i++10
)
28
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
29
11
  for (i = 0; i < 10; 
i++10
)
30
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
31
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(0, 0.9, 0.5, 0.999, 0.9, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m), TENSOR_LIST(b, n), 0);
32
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
33
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
34
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
35
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m), TENSOR_LIST(gg, ga, gm), 0);
36
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(0, 0.9, 0.5, 0.999, 0.9, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(ga, gm), 0);
37
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
38
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
39
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm), TENSOR_LIST(gbt, gnt), 0);
40
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
41
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
42
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
43
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
44
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m), TENSOR_LIST(ga, gm), 0);
45
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(0, 0.9, 0.5, 0.999, 0.9, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(gb, gn), 0);
46
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn), TENSOR_LIST(gbt, gnt), 0);
47
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
48
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
49
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m), TENSOR_LIST(ga, gm), 0);
50
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(0, 0.9, 0.5, 0.999, 0.9, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(gb, gm), 0);
51
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gm), TENSOR_LIST(gbt, gnt), 0);
52
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
53
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
54
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m), TENSOR_LIST(ga, gm), 0);
55
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(0, 0.9, 0.5, 0.999, 0.9, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(ga, gn), 0);
56
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gn), TENSOR_LIST(gbt, gnt), 0);
57
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
58
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
59
1
  ccv_nnc_tensor_free(g);
60
1
  ccv_nnc_tensor_free(a);
61
1
  ccv_nnc_tensor_free(m);
62
1
  ccv_nnc_tensor_free(b);
63
1
  ccv_nnc_tensor_free(n);
64
1
  ccv_nnc_tensor_free(gg);
65
1
  ccv_nnc_tensor_free(ga);
66
1
  ccv_nnc_tensor_free(gm);
67
1
  ccv_nnc_tensor_free(gb);
68
1
  ccv_nnc_tensor_free(gn);
69
1
  ccv_nnc_tensor_free(gbt);
70
1
  ccv_nnc_tensor_free(gnt);
71
1
}
72
73
TEST_CASE("SGD in half precision")
74
1
{
75
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF));
76
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
77
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
78
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
79
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
80
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
81
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
82
1
  ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
83
1
  ccv_nnc_tensor_t* const m16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
84
1
  ccv_nnc_tensor_t* const b16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
85
1
  ccv_nnc_tensor_t* const n16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
86
1
  dsfmt_t dsfmt;
87
1
  dsfmt_init_gen_rand(&dsfmt, 0);
88
1
  int i;
89
11
  for (i = 0; i < 10; 
i++10
)
90
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
91
11
  for (i = 0; i < 10; 
i++10
)
92
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
93
11
  for (i = 0; i < 10; 
i++10
)
94
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
95
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(0, 0.9, 0.5, 0.999, 0.9, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m), TENSOR_LIST(b, n), 0);
96
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
97
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
98
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
99
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m), TENSOR_LIST(g16, a16, m16), 0);
100
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a16, m16), TENSOR_LIST(gg, ga, gm), 0);
101
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(0, 0.9, 0.5, 0.999, 0.9, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(ga, gm), 0);
102
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
103
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
104
1
  ccv_nnc_tensor_t* const gbt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
105
1
  ccv_nnc_tensor_t* const gnt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
106
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm), TENSOR_LIST(gbt16, gnt16), 0);
107
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16), TENSOR_LIST(gbt, gnt), 0);
108
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
109
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
110
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
111
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
112
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16, m16), TENSOR_LIST(ga, gm), 0);
113
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(0, 0.9, 0.5, 0.999, 0.9, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(gb, gn), 0);
114
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn), TENSOR_LIST(gbt16, gnt16), 0);
115
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16), TENSOR_LIST(gbt, gnt), 0);
116
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
117
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
118
1
  ccv_nnc_tensor_free(g);
119
1
  ccv_nnc_tensor_free(a);
120
1
  ccv_nnc_tensor_free(m);
121
1
  ccv_nnc_tensor_free(b);
122
1
  ccv_nnc_tensor_free(n);
123
1
  ccv_nnc_tensor_free(g16);
124
1
  ccv_nnc_tensor_free(a16);
125
1
  ccv_nnc_tensor_free(m16);
126
1
  ccv_nnc_tensor_free(b16);
127
1
  ccv_nnc_tensor_free(n16);
128
1
  ccv_nnc_tensor_free(gg);
129
1
  ccv_nnc_tensor_free(ga);
130
1
  ccv_nnc_tensor_free(gm);
131
1
  ccv_nnc_tensor_free(gb);
132
1
  ccv_nnc_tensor_free(gn);
133
1
  ccv_nnc_tensor_free(gbt);
134
1
  ccv_nnc_tensor_free(gnt);
135
1
  ccv_nnc_tensor_free(gbt16);
136
1
  ccv_nnc_tensor_free(gnt16);
137
1
}
138
139
TEST_CASE("SGD in mixed precision")
140
1
{
141
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF));
142
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
143
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
144
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
145
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
146
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
147
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
148
1
  dsfmt_t dsfmt;
149
1
  dsfmt_init_gen_rand(&dsfmt, 0);
150
1
  int i;
151
11
  for (i = 0; i < 10; 
i++10
)
152
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
153
11
  for (i = 0; i < 10; 
i++10
)
154
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
155
11
  for (i = 0; i < 10; 
i++10
)
156
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
157
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(0, 0.9, 0.5, 0.999, 0.9, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m), TENSOR_LIST(b, n), 0);
158
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
159
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
160
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
161
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g), TENSOR_LIST(g16), 0);
162
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a, m), TENSOR_LIST(gg, ga, gm), 0);
163
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(0, 0.9, 0.5, 0.999, 0.9, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(ga, gm), 0);
164
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
165
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
166
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm), TENSOR_LIST(gbt, gnt), 0);
167
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
168
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
169
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
170
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
171
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m), TENSOR_LIST(ga, gm), 0);
172
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(0, 0.9, 0.5, 0.999, 0.9, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(gb, gn), 0);
173
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn), TENSOR_LIST(gbt, gnt), 0);
174
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
175
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
176
1
  ccv_nnc_tensor_free(g);
177
1
  ccv_nnc_tensor_free(a);
178
1
  ccv_nnc_tensor_free(m);
179
1
  ccv_nnc_tensor_free(b);
180
1
  ccv_nnc_tensor_free(n);
181
1
  ccv_nnc_tensor_free(g16);
182
1
  ccv_nnc_tensor_free(gg);
183
1
  ccv_nnc_tensor_free(ga);
184
1
  ccv_nnc_tensor_free(gm);
185
1
  ccv_nnc_tensor_free(gb);
186
1
  ccv_nnc_tensor_free(gn);
187
1
  ccv_nnc_tensor_free(gbt);
188
1
  ccv_nnc_tensor_free(gnt);
189
1
}
190
191
TEST_CASE("Nesterov SGD in float")
192
1
{
193
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF));
194
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
195
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
196
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
197
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
198
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
199
1
  dsfmt_t dsfmt;
200
1
  dsfmt_init_gen_rand(&dsfmt, 0);
201
1
  int i;
202
11
  for (i = 0; i < 10; 
i++10
)
203
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
204
11
  for (i = 0; i < 10; 
i++10
)
205
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
206
11
  for (i = 0; i < 10; 
i++10
)
207
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
208
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(1, 0.9, 0.5, 0.999, 0.9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m), TENSOR_LIST(b, n), 0);
209
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
210
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
211
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
212
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m), TENSOR_LIST(gg, ga, gm), 0);
213
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(1, 0.9, 0.5, 0.999, 0.9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(ga, gm), 0);
214
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
215
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
216
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm), TENSOR_LIST(gbt, gnt), 0);
217
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
218
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
219
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
220
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
221
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m), TENSOR_LIST(ga, gm), 0);
222
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(1, 0.9, 0.5, 0.999, 0.9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(gb, gn), 0);
223
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn), TENSOR_LIST(gbt, gnt), 0);
224
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
225
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
226
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m), TENSOR_LIST(ga, gm), 0);
227
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(1, 0.9, 0.5, 0.999, 0.9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(gb, gm), 0);
228
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gm), TENSOR_LIST(gbt, gnt), 0);
229
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
230
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
231
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m), TENSOR_LIST(ga, gm), 0);
232
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(1, 0.9, 0.5, 0.999, 0.9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(ga, gn), 0);
233
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gn), TENSOR_LIST(gbt, gnt), 0);
234
1
  REQUIRE_TENSOR_EQ(gbt, b, "cpu result should match");
235
1
  REQUIRE_TENSOR_EQ(gnt, n, "cpu result should match");
236
1
  ccv_nnc_tensor_free(g);
237
1
  ccv_nnc_tensor_free(a);
238
1
  ccv_nnc_tensor_free(m);
239
1
  ccv_nnc_tensor_free(b);
240
1
  ccv_nnc_tensor_free(n);
241
1
  ccv_nnc_tensor_free(gg);
242
1
  ccv_nnc_tensor_free(ga);
243
1
  ccv_nnc_tensor_free(gm);
244
1
  ccv_nnc_tensor_free(gb);
245
1
  ccv_nnc_tensor_free(gn);
246
1
  ccv_nnc_tensor_free(gbt);
247
1
  ccv_nnc_tensor_free(gnt);
248
1
}
249
250
TEST_CASE("Nesterov SGD in half precision")
251
1
{
252
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF));
253
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
254
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
255
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
256
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
257
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
258
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
259
1
  ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
260
1
  ccv_nnc_tensor_t* const m16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
261
1
  ccv_nnc_tensor_t* const b16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
262
1
  ccv_nnc_tensor_t* const n16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
263
1
  dsfmt_t dsfmt;
264
1
  dsfmt_init_gen_rand(&dsfmt, 0);
265
1
  int i;
266
11
  for (i = 0; i < 10; 
i++10
)
267
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
268
11
  for (i = 0; i < 10; 
i++10
)
269
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
270
11
  for (i = 0; i < 10; 
i++10
)
271
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
272
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(1, 0.9, 0.5, 0.999, 0.9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m), TENSOR_LIST(b, n), 0);
273
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
274
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
275
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
276
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m), TENSOR_LIST(g16, a16, m16), 0);
277
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a16, m16), TENSOR_LIST(gg, ga, gm), 0);
278
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(1, 0.9, 0.5, 0.999, 0.9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(ga, gm), 0);
279
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
280
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
281
1
  ccv_nnc_tensor_t* const gbt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
282
1
  ccv_nnc_tensor_t* const gnt16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
283
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm), TENSOR_LIST(gbt16, gnt16), 0);
284
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16), TENSOR_LIST(gbt, gnt), 0);
285
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
286
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
287
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
288
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
289
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16, m16), TENSOR_LIST(ga, gm), 0);
290
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(1, 0.9, 0.5, 0.999, 0.9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(gb, gn), 0);
291
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn), TENSOR_LIST(gbt16, gnt16), 0);
292
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gbt16, gnt16), TENSOR_LIST(gbt, gnt), 0);
293
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
294
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
295
1
  ccv_nnc_tensor_free(g);
296
1
  ccv_nnc_tensor_free(a);
297
1
  ccv_nnc_tensor_free(m);
298
1
  ccv_nnc_tensor_free(b);
299
1
  ccv_nnc_tensor_free(n);
300
1
  ccv_nnc_tensor_free(g16);
301
1
  ccv_nnc_tensor_free(a16);
302
1
  ccv_nnc_tensor_free(m16);
303
1
  ccv_nnc_tensor_free(b16);
304
1
  ccv_nnc_tensor_free(n16);
305
1
  ccv_nnc_tensor_free(gg);
306
1
  ccv_nnc_tensor_free(ga);
307
1
  ccv_nnc_tensor_free(gm);
308
1
  ccv_nnc_tensor_free(gb);
309
1
  ccv_nnc_tensor_free(gn);
310
1
  ccv_nnc_tensor_free(gbt);
311
1
  ccv_nnc_tensor_free(gnt);
312
1
  ccv_nnc_tensor_free(gbt16);
313
1
  ccv_nnc_tensor_free(gnt16);
314
1
}
315
316
TEST_CASE("Nesterov SGD in mixed precision")
317
1
{
318
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_GPU_REF));
319
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
320
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
321
1
  ccv_nnc_tensor_t* const m = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
322
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
323
1
  ccv_nnc_tensor_t* const n = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
324
1
  ccv_nnc_tensor_t* const g16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
325
1
  dsfmt_t dsfmt;
326
1
  dsfmt_init_gen_rand(&dsfmt, 0);
327
1
  int i;
328
11
  for (i = 0; i < 10; 
i++10
)
329
10
    g->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
330
11
  for (i = 0; i < 10; 
i++10
)
331
10
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
332
11
  for (i = 0; i < 10; 
i++10
)
333
10
    m->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
334
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(1, 0.9, 0.5, 0.999, 0.9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, m), TENSOR_LIST(b, n), 0);
335
1
  ccv_nnc_tensor_t* const gg = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
336
1
  ccv_nnc_tensor_t* const ga = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
337
1
  ccv_nnc_tensor_t* const gm = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
338
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g), TENSOR_LIST(g16), 0);
339
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g16, a, m), TENSOR_LIST(gg, ga, gm), 0);
340
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(1, 0.9, 0.5, 0.999, 0.9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(ga, gm), 0);
341
1
  ccv_nnc_tensor_t* const gbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
342
1
  ccv_nnc_tensor_t* const gnt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
343
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ga, gm), TENSOR_LIST(gbt, gnt), 0);
344
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
345
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
346
1
  ccv_nnc_tensor_t* const gb = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
347
1
  ccv_nnc_tensor_t* const gn = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
348
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, m), TENSOR_LIST(ga, gm), 0);
349
1
  ccv_nnc_cmd_exec(CMD_SGD_FORWARD(1, 0.9, 0.5, 0.999, 0.9, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(gg, ga, gm), TENSOR_LIST(gb, gn), 0);
350
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(gb, gn), TENSOR_LIST(gbt, gnt), 0);
351
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gbt->data.f32, b->data.f32, 10, 1e-3, "cpu result should match");
352
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, gnt->data.f32, n->data.f32, 10, 1e-3, "cpu result should match");
353
1
  ccv_nnc_tensor_free(g);
354
1
  ccv_nnc_tensor_free(a);
355
1
  ccv_nnc_tensor_free(m);
356
1
  ccv_nnc_tensor_free(b);
357
1
  ccv_nnc_tensor_free(n);
358
1
  ccv_nnc_tensor_free(g16);
359
1
  ccv_nnc_tensor_free(gg);
360
1
  ccv_nnc_tensor_free(ga);
361
1
  ccv_nnc_tensor_free(gm);
362
1
  ccv_nnc_tensor_free(gb);
363
1
  ccv_nnc_tensor_free(gn);
364
1
  ccv_nnc_tensor_free(gbt);
365
1
  ccv_nnc_tensor_free(gnt);
366
1
}
367
368
#include "case_main.h"