Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/gelu.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("gelu in float")
15
{
16
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS));
17
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
18
  ccv_nnc_tensor_symbol_t a = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 20, 10), "a");
19
  ccv_nnc_tensor_symbol_t b = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 20, 10), "b");
20
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(0), TENSOR_SYMBOL_LIST(a), TENSOR_SYMBOL_LIST(b), "gelu");
21
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
22
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
23
  ccv_nnc_graph_t* graph = 0;
24
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
25
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
26
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
27
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
28
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
29
  dsfmt_t dsfmt;
30
  dsfmt_init_gen_rand(&dsfmt, 0);
31
  int i;
32
  for (i = 0; i < 20 * 10; i++)
33
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
34
  ccv_nnc_tensor_t* const a_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, a);
35
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(a_tensor), 0);
36
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
37
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
38
  ccv_nnc_tensor_t* const b_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, b);
39
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b_tensor), TENSOR_LIST(y_tensor), 0);
40
  ccv_nnc_tensor_t* const ty = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
41
  ccv_nnc_cmd_exec(CMD_GELU_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty), 0);
42
  REQUIRE_TENSOR_EQ(ty, y_tensor, "gelu from cudnn should match from CPU");
43
  ccv_nnc_tensor_free(x_tensor);
44
  ccv_nnc_tensor_free(y_tensor);
45
  ccv_nnc_tensor_free(ty);
46
  ccv_nnc_graph_free(graph);
47
  ccv_nnc_tensor_arena_free(tensor_arena);
48
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
49
  ccv_nnc_symbolic_graph_free(symbolic_graph);
50
}
51
52
TEST_CASE("gelu in half precision")
53
1
{
54
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS));
55
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
56
1
  ccv_nnc_tensor_symbol_t a = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 16F, 20, 10), "a");
57
1
  ccv_nnc_tensor_symbol_t b = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 16F, 20, 10), "b");
58
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(0), TENSOR_SYMBOL_LIST(a), TENSOR_SYMBOL_LIST(b), "gelu");
59
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
60
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
61
1
  ccv_nnc_graph_t* graph = 0;
62
1
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
63
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
64
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
65
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
66
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
67
1
  dsfmt_t dsfmt;
68
1
  dsfmt_init_gen_rand(&dsfmt, 0);
69
1
  int i;
70
201
  for (i = 0; i < 20 * 10; 
i++200
)
71
200
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
72
1
  ccv_nnc_tensor_t* const a_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, a);
73
1
  ccv_nnc_tensor_t* const x16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(16F, 20, 10), 0);
74
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(x16_tensor), 0);
75
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x16_tensor), TENSOR_LIST(a_tensor), 0);
76
1
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
77
1
  ccv_nnc_tensor_t* const y16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(16F, 20, 10), 0);
78
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
79
1
  ccv_nnc_tensor_t* const b_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, b);
80
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b_tensor), TENSOR_LIST(y16_tensor), 0);
81
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(y16_tensor), TENSOR_LIST(y_tensor), 0);
82
1
  ccv_nnc_tensor_t* const ty = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
83
1
  ccv_nnc_cmd_exec(CMD_GELU_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty), 0);
84
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, ty->data.f32, y_tensor->data.f32, 20 * 10, 1e-3, "gelu from cudnn should match from CPU");
85
1
  ccv_nnc_tensor_free(x_tensor);
86
1
  ccv_nnc_tensor_free(x16_tensor);
87
1
  ccv_nnc_tensor_free(y16_tensor);
88
1
  ccv_nnc_tensor_free(y_tensor);
89
1
  ccv_nnc_tensor_free(ty);
90
1
  ccv_nnc_graph_free(graph);
91
1
  ccv_nnc_tensor_arena_free(tensor_arena);
92
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
93
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
94
1
}
95
96
TEST_CASE("gelu gradient in float")
97
1
{
98
1
  GUARD_ELSE_RETURN((ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
99
1
    ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_GPU_REF)) || 
100
1
    (ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS) &&
101
1
    ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_MPS)));
102
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
103
1
  ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 10, 100), "x");
104
1
  ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 10, 100), "y");
105
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(0), TENSOR_SYMBOL_LIST(x), TENSOR_SYMBOL_LIST(y), "gelu");
106
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
107
1
  ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph));
108
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
109
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
110
1
  ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y);
111
1
  ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x);
112
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
113
1
  dsfmt_t dsfmt;
114
1
  dsfmt_init_gen_rand(&dsfmt, 0);
115
1
  int i;
116
1.00k
  for (i = 0; i < 10 * 100; 
i++1.00k
)
117
1.00k
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
118
1
  ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
119
1.00k
  for (i = 0; i < 10 * 100; 
i++1.00k
)
120
1.00k
    dy_tensor->data.f32[i] = 0;
121
11
  for (i = 0; i < 10; 
i++10
)
122
10
    dy_tensor->data.f32[i * 100 + i] = 1;
123
1
  ccv_nnc_tensor_t* const dyt = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
124
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor), TENSOR_LIST(dyt), 0);
125
1
  ccv_nnc_graph_t* graph = 0;
126
1
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
127
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
128
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, TENSOR_BIND_MAP(KV(dy, dyt)), TENSOR_SYMBOL_LIST(y), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
129
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
130
1
  ccv_nnc_tensor_t* const xt = ccv_nnc_tensor_from_symbol(tensor_arena, x);
131
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(xt), 0);
132
1
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
133
1
  ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
134
1
  ccv_nnc_tensor_t* const dxt = ccv_nnc_tensor_from_symbol(tensor_arena, dx);
135
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
136
1
  ccv_nnc_tensor_t* const yt = ccv_nnc_tensor_from_symbol(tensor_arena, y);
137
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dxt), TENSOR_LIST(dx_tensor), 0);
138
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(yt), TENSOR_LIST(y_tensor), 0);
139
1
  ccv_nnc_tensor_t* const ty_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
140
1
  ccv_nnc_cmd_exec(CMD_GELU_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty_tensor), 0);
141
1
  REQUIRE_TENSOR_EQ(ty_tensor, y_tensor, "forward pass should match");
142
1
  ccv_nnc_tensor_t* const tdx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
143
1
  ccv_nnc_cmd_exec(CMD_GELU_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor, x_tensor, 0), TENSOR_LIST(tdx_tensor), 0);
144
1
  REQUIRE_TENSOR_EQ(tdx_tensor, dx_tensor, "backward pass should match");
145
1
  ccv_nnc_tensor_free(x_tensor);
146
1
  ccv_nnc_tensor_free(y_tensor);
147
1
  ccv_nnc_tensor_free(dx_tensor);
148
1
  ccv_nnc_tensor_free(dy_tensor);
149
1
  ccv_nnc_tensor_free(ty_tensor);
150
1
  ccv_nnc_tensor_free(tdx_tensor);
151
1
  ccv_nnc_tensor_free(dyt);
152
1
  ccv_nnc_graph_free(graph);
153
1
  ccv_nnc_tensor_arena_free(tensor_arena);
154
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
155
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
156
1
}
157
158
TEST_CASE("mps gelu gradient in float")
159
1
{
160
1
  GUARD_ELSE_RETURN((ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS) &&
161
1
    ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_MPS)));
162
0
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
163
0
  ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 10, 100), "x");
164
0
  ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 10, 100), "y");
165
0
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(0), TENSOR_SYMBOL_LIST(x), TENSOR_SYMBOL_LIST(y), "gelu");
166
0
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
167
0
  ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph));
168
0
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
169
0
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
170
0
  ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y);
171
0
  ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x);
172
0
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
173
0
  dsfmt_t dsfmt;
174
0
  dsfmt_init_gen_rand(&dsfmt, 0);
175
0
  int i;
176
0
  for (i = 0; i < 10 * 100; i++)
177
0
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
178
0
  ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
179
0
  for (i = 0; i < 10 * 100; i++)
180
0
    dy_tensor->data.f32[i] = 0;
181
0
  for (i = 0; i < 10; i++)
182
0
    dy_tensor->data.f32[i * 100 + i] = 1;
183
0
  ccv_nnc_tensor_t* const dyt = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
184
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor), TENSOR_LIST(dyt), 0);
185
0
  ccv_nnc_graph_t* graph = 0;
186
0
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
187
0
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
188
0
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, TENSOR_BIND_MAP(KV(dy, dyt)), TENSOR_SYMBOL_LIST(y), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
189
0
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
190
0
  ccv_nnc_tensor_t* const xt = ccv_nnc_tensor_from_symbol(tensor_arena, x);
191
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(xt), 0);
192
0
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
193
0
  ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
194
0
  ccv_nnc_tensor_t* const dxt = ccv_nnc_tensor_from_symbol(tensor_arena, dx);
195
0
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
196
0
  ccv_nnc_tensor_t* const yt = ccv_nnc_tensor_from_symbol(tensor_arena, y);
197
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dxt), TENSOR_LIST(dx_tensor), 0);
198
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(yt), TENSOR_LIST(y_tensor), 0);
199
0
  ccv_nnc_tensor_t* const ty_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
200
0
  ccv_nnc_cmd_exec(CMD_GELU_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty_tensor), 0);
201
0
  REQUIRE_TENSOR_EQ(ty_tensor, y_tensor, "forward pass should match");
202
0
  ccv_nnc_tensor_t* const tdx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
203
0
  ccv_nnc_cmd_exec(CMD_GELU_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor, x_tensor, 0), TENSOR_LIST(tdx_tensor), 0);
204
0
  REQUIRE_TENSOR_EQ(tdx_tensor, dx_tensor, "backward pass should match");
205
0
  ccv_nnc_tensor_free(x_tensor);
206
0
  ccv_nnc_tensor_free(y_tensor);
207
0
  ccv_nnc_tensor_free(dx_tensor);
208
0
  ccv_nnc_tensor_free(dy_tensor);
209
0
  ccv_nnc_tensor_free(ty_tensor);
210
0
  ccv_nnc_tensor_free(tdx_tensor);
211
0
  ccv_nnc_tensor_free(dyt);
212
0
  ccv_nnc_graph_free(graph);
213
0
  ccv_nnc_tensor_arena_free(tensor_arena);
214
0
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
215
0
  ccv_nnc_symbolic_graph_free(symbolic_graph);
216
0
}
217
218
TEST_CASE("gelu gradient in half precision")
219
1
{
220
1
  GUARD_ELSE_RETURN((ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
221
1
    ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_GPU_REF)) || 
222
1
    (ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS) &&
223
1
    ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_MPS)));
224
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
225
1
  ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 16F, 10, 100), "x");
226
1
  ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 16F, 10, 100), "y");
227
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(0), TENSOR_SYMBOL_LIST(x), TENSOR_SYMBOL_LIST(y), "gelu");
228
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
229
1
  ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph));
230
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
231
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
232
1
  ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y);
233
1
  ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x);
234
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
235
1
  dsfmt_t dsfmt;
236
1
  dsfmt_init_gen_rand(&dsfmt, 0);
237
1
  int i;
238
1.00k
  for (i = 0; i < 10 * 100; 
i++1.00k
)
239
1.00k
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
240
1
  ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
241
1.00k
  for (i = 0; i < 10 * 100; 
i++1.00k
)
242
1.00k
    dy_tensor->data.f32[i] = 0;
243
11
  for (i = 0; i < 10; 
i++10
)
244
10
    dy_tensor->data.f32[i * 100 + i] = 1;
245
1
  ccv_nnc_tensor_t* const dy16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
246
1
  ccv_nnc_tensor_t* const dyt = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10, 100), 0);
247
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor), TENSOR_LIST(dy16_tensor), 0);
248
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy16_tensor), TENSOR_LIST(dyt), 0);
249
1
  ccv_nnc_graph_t* graph = 0;
250
1
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
251
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
252
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, TENSOR_BIND_MAP(KV(dy, dyt)), TENSOR_SYMBOL_LIST(y), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
253
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
254
1
  ccv_nnc_tensor_t* const xt = ccv_nnc_tensor_from_symbol(tensor_arena, x);
255
1
  ccv_nnc_tensor_t* const x16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
256
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(x16_tensor), 0);
257
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x16_tensor), TENSOR_LIST(xt), 0);
258
1
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
259
1
  ccv_nnc_tensor_t* const dx16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
260
1
  ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
261
1
  ccv_nnc_tensor_t* const dxt = ccv_nnc_tensor_from_symbol(tensor_arena, dx);
262
1
  ccv_nnc_tensor_t* const y16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
263
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
264
1
  ccv_nnc_tensor_t* const yt = ccv_nnc_tensor_from_symbol(tensor_arena, y);
265
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dxt), TENSOR_LIST(dx16_tensor), 0);
266
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dx16_tensor), TENSOR_LIST(dx_tensor), 0);
267
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(yt), TENSOR_LIST(y16_tensor), 0);
268
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(y16_tensor), TENSOR_LIST(y_tensor), 0);
269
1
  ccv_nnc_tensor_t* const ty_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
270
1
  ccv_nnc_cmd_exec(CMD_GELU_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty_tensor), 0);
271
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, ty_tensor->data.f32, y_tensor->data.f32, 10 * 100, 1e-3, "forward pass should match");
272
1
  ccv_nnc_tensor_t* const tdx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
273
1
  ccv_nnc_cmd_exec(CMD_GELU_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor, x_tensor, 0), TENSOR_LIST(tdx_tensor), 0);
274
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tdx_tensor->data.f32, dx_tensor->data.f32, 10 * 100, 1e-3, "backward pass should match");
275
1
  ccv_nnc_tensor_free(x_tensor);
276
1
  ccv_nnc_tensor_free(x16_tensor);
277
1
  ccv_nnc_tensor_free(y_tensor);
278
1
  ccv_nnc_tensor_free(y16_tensor);
279
1
  ccv_nnc_tensor_free(dx_tensor);
280
1
  ccv_nnc_tensor_free(dx16_tensor);
281
1
  ccv_nnc_tensor_free(dy_tensor);
282
1
  ccv_nnc_tensor_free(dy16_tensor);
283
1
  ccv_nnc_tensor_free(ty_tensor);
284
1
  ccv_nnc_tensor_free(tdx_tensor);
285
1
  ccv_nnc_tensor_free(dyt);
286
1
  ccv_nnc_graph_free(graph);
287
1
  ccv_nnc_tensor_arena_free(tensor_arena);
288
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
289
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
290
1
}
291
292
TEST_CASE("mps gelu gradient in half precision")
293
1
{
294
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS) &&
295
1
    ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_MPS));
296
0
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
297
0
  ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 16F, 10, 100), "x");
298
0
  ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 16F, 10, 100), "y");
299
0
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(0), TENSOR_SYMBOL_LIST(x), TENSOR_SYMBOL_LIST(y), "gelu");
300
0
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
301
0
  ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph));
302
0
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
303
0
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
304
0
  ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y);
305
0
  ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x);
306
0
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
307
0
  dsfmt_t dsfmt;
308
0
  dsfmt_init_gen_rand(&dsfmt, 0);
309
0
  int i;
310
0
  for (i = 0; i < 10 * 100; i++)
311
0
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
312
0
  ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
313
0
  for (i = 0; i < 10 * 100; i++)
314
0
    dy_tensor->data.f32[i] = 0;
315
0
  for (i = 0; i < 10; i++)
316
0
    dy_tensor->data.f32[i * 100 + i] = 1;
317
0
  ccv_nnc_tensor_t* const dy16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
318
0
  ccv_nnc_tensor_t* const dyt = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10, 100), 0);
319
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor), TENSOR_LIST(dy16_tensor), 0);
320
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy16_tensor), TENSOR_LIST(dyt), 0);
321
0
  ccv_nnc_graph_t* graph = 0;
322
0
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
323
0
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
324
0
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, TENSOR_BIND_MAP(KV(dy, dyt)), TENSOR_SYMBOL_LIST(y), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
325
0
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
326
0
  ccv_nnc_tensor_t* const xt = ccv_nnc_tensor_from_symbol(tensor_arena, x);
327
0
  ccv_nnc_tensor_t* const x16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
328
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(x16_tensor), 0);
329
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x16_tensor), TENSOR_LIST(xt), 0);
330
0
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
331
0
  ccv_nnc_tensor_t* const dx16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
332
0
  ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
333
0
  ccv_nnc_tensor_t* const dxt = ccv_nnc_tensor_from_symbol(tensor_arena, dx);
334
0
  ccv_nnc_tensor_t* const y16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
335
0
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
336
0
  ccv_nnc_tensor_t* const yt = ccv_nnc_tensor_from_symbol(tensor_arena, y);
337
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dxt), TENSOR_LIST(dx16_tensor), 0);
338
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dx16_tensor), TENSOR_LIST(dx_tensor), 0);
339
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(yt), TENSOR_LIST(y16_tensor), 0);
340
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(y16_tensor), TENSOR_LIST(y_tensor), 0);
341
0
  ccv_nnc_tensor_t* const ty_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
342
0
  ccv_nnc_cmd_exec(CMD_GELU_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty_tensor), 0);
343
0
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, ty_tensor->data.f32, y_tensor->data.f32, 10 * 100, 1e-3, "forward pass should match");
344
0
  ccv_nnc_tensor_t* const tdx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
345
0
  ccv_nnc_cmd_exec(CMD_GELU_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor, x_tensor, 0), TENSOR_LIST(tdx_tensor), 0);
346
0
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tdx_tensor->data.f32, dx_tensor->data.f32, 10 * 100, 1e-3, "backward pass should match");
347
0
  ccv_nnc_tensor_free(x_tensor);
348
0
  ccv_nnc_tensor_free(x16_tensor);
349
0
  ccv_nnc_tensor_free(y_tensor);
350
0
  ccv_nnc_tensor_free(y16_tensor);
351
0
  ccv_nnc_tensor_free(dx_tensor);
352
0
  ccv_nnc_tensor_free(dx16_tensor);
353
0
  ccv_nnc_tensor_free(dy_tensor);
354
0
  ccv_nnc_tensor_free(dy16_tensor);
355
0
  ccv_nnc_tensor_free(ty_tensor);
356
0
  ccv_nnc_tensor_free(tdx_tensor);
357
0
  ccv_nnc_tensor_free(dyt);
358
0
  ccv_nnc_graph_free(graph);
359
0
  ccv_nnc_tensor_arena_free(tensor_arena);
360
0
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
361
0
  ccv_nnc_symbolic_graph_free(symbolic_graph);
362
0
}
363
364
TEST_CASE("fast gelu in float")
365
1
{
366
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS));
367
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
368
1
  ccv_nnc_tensor_symbol_t a = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 20, 10), "a");
369
1
  ccv_nnc_tensor_symbol_t b = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 20, 10), "b");
370
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(1), TENSOR_SYMBOL_LIST(a), TENSOR_SYMBOL_LIST(b), "gelu");
371
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
372
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
373
1
  ccv_nnc_graph_t* graph = 0;
374
1
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
375
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
376
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
377
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
378
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
379
1
  dsfmt_t dsfmt;
380
1
  dsfmt_init_gen_rand(&dsfmt, 0);
381
1
  int i;
382
201
  for (i = 0; i < 20 * 10; 
i++200
)
383
200
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
384
1
  ccv_nnc_tensor_t* const a_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, a);
385
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(a_tensor), 0);
386
1
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
387
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
388
1
  ccv_nnc_tensor_t* const b_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, b);
389
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b_tensor), TENSOR_LIST(y_tensor), 0);
390
1
  ccv_nnc_tensor_t* const ty = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
391
1
  ccv_nnc_cmd_exec(CMD_GELU_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty), 0);
392
1
  REQUIRE_TENSOR_EQ(ty, y_tensor, "gelu from cudnn should match from CPU");
393
1
  ccv_nnc_tensor_free(x_tensor);
394
1
  ccv_nnc_tensor_free(y_tensor);
395
1
  ccv_nnc_tensor_free(ty);
396
1
  ccv_nnc_graph_free(graph);
397
1
  ccv_nnc_tensor_arena_free(tensor_arena);
398
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
399
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
400
1
}
401
402
TEST_CASE("fast gelu in half precision")
403
1
{
404
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS));
405
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
406
1
  ccv_nnc_tensor_symbol_t a = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 16F, 20, 10), "a");
407
1
  ccv_nnc_tensor_symbol_t b = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 16F, 20, 10), "b");
408
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(1), TENSOR_SYMBOL_LIST(a), TENSOR_SYMBOL_LIST(b), "gelu");
409
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
410
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
411
1
  ccv_nnc_graph_t* graph = 0;
412
1
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
413
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
414
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
415
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
416
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
417
1
  dsfmt_t dsfmt;
418
1
  dsfmt_init_gen_rand(&dsfmt, 0);
419
1
  int i;
420
201
  for (i = 0; i < 20 * 10; 
i++200
)
421
200
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
422
1
  ccv_nnc_tensor_t* const a_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, a);
423
1
  ccv_nnc_tensor_t* const x16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(16F, 20, 10), 0);
424
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(x16_tensor), 0);
425
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x16_tensor), TENSOR_LIST(a_tensor), 0);
426
1
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
427
1
  ccv_nnc_tensor_t* const y16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(16F, 20, 10), 0);
428
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
429
1
  ccv_nnc_tensor_t* const b_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, b);
430
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b_tensor), TENSOR_LIST(y16_tensor), 0);
431
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(y16_tensor), TENSOR_LIST(y_tensor), 0);
432
1
  ccv_nnc_tensor_t* const ty = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 20, 10), 0);
433
1
  ccv_nnc_cmd_exec(CMD_GELU_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty), 0);
434
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, ty->data.f32, y_tensor->data.f32, 20 * 10, 1e-3, "gelu from cudnn should match from CPU");
435
1
  ccv_nnc_tensor_free(x_tensor);
436
1
  ccv_nnc_tensor_free(x16_tensor);
437
1
  ccv_nnc_tensor_free(y16_tensor);
438
1
  ccv_nnc_tensor_free(y_tensor);
439
1
  ccv_nnc_tensor_free(ty);
440
1
  ccv_nnc_graph_free(graph);
441
1
  ccv_nnc_tensor_arena_free(tensor_arena);
442
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
443
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
444
1
}
445
446
TEST_CASE("fast gelu gradient in float")
447
1
{
448
1
  GUARD_ELSE_RETURN((ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
449
1
    ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_GPU_REF)) || 
450
1
    (ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS) &&
451
1
    ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_MPS)));
452
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
453
1
  ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 10, 100), "x");
454
1
  ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 10, 100), "y");
455
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(1), TENSOR_SYMBOL_LIST(x), TENSOR_SYMBOL_LIST(y), "gelu");
456
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
457
1
  ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph));
458
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
459
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
460
1
  ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y);
461
1
  ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x);
462
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
463
1
  dsfmt_t dsfmt;
464
1
  dsfmt_init_gen_rand(&dsfmt, 0);
465
1
  int i;
466
1.00k
  for (i = 0; i < 10 * 100; 
i++1.00k
)
467
1.00k
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
468
1
  ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
469
1.00k
  for (i = 0; i < 10 * 100; 
i++1.00k
)
470
1.00k
    dy_tensor->data.f32[i] = 0;
471
11
  for (i = 0; i < 10; 
i++10
)
472
10
    dy_tensor->data.f32[i * 100 + i] = 1;
473
1
  ccv_nnc_tensor_t* const dyt = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
474
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor), TENSOR_LIST(dyt), 0);
475
1
  ccv_nnc_graph_t* graph = 0;
476
1
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
477
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
478
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, TENSOR_BIND_MAP(KV(dy, dyt)), TENSOR_SYMBOL_LIST(y), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
479
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
480
1
  ccv_nnc_tensor_t* const xt = ccv_nnc_tensor_from_symbol(tensor_arena, x);
481
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(xt), 0);
482
1
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
483
1
  ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
484
1
  ccv_nnc_tensor_t* const dxt = ccv_nnc_tensor_from_symbol(tensor_arena, dx);
485
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
486
1
  ccv_nnc_tensor_t* const yt = ccv_nnc_tensor_from_symbol(tensor_arena, y);
487
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dxt), TENSOR_LIST(dx_tensor), 0);
488
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(yt), TENSOR_LIST(y_tensor), 0);
489
1
  ccv_nnc_tensor_t* const ty_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
490
1
  ccv_nnc_cmd_exec(CMD_GELU_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty_tensor), 0);
491
1
  REQUIRE_TENSOR_EQ(ty_tensor, y_tensor, "forward pass should match");
492
1
  ccv_nnc_tensor_t* const tdx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
493
1
  ccv_nnc_cmd_exec(CMD_GELU_BACKWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor, x_tensor, 0), TENSOR_LIST(tdx_tensor), 0);
494
1
  REQUIRE_TENSOR_EQ(tdx_tensor, dx_tensor, "backward pass should match");
495
1
  ccv_nnc_tensor_free(x_tensor);
496
1
  ccv_nnc_tensor_free(y_tensor);
497
1
  ccv_nnc_tensor_free(dx_tensor);
498
1
  ccv_nnc_tensor_free(dy_tensor);
499
1
  ccv_nnc_tensor_free(ty_tensor);
500
1
  ccv_nnc_tensor_free(tdx_tensor);
501
1
  ccv_nnc_tensor_free(dyt);
502
1
  ccv_nnc_graph_free(graph);
503
1
  ccv_nnc_tensor_arena_free(tensor_arena);
504
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
505
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
506
1
}
507
508
TEST_CASE("fast gelu gradient in half precision")
509
1
{
510
1
  GUARD_ELSE_RETURN((ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
511
1
    ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_GPU_REF)) || 
512
1
    (ccv_nnc_cmd_ok(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_MPS) &&
513
1
    ccv_nnc_cmd_ok(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_MPS)));
514
515
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
516
1
  ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 16F, 10, 100), "x");
517
1
  ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 16F, 10, 100), "y");
518
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GELU_FORWARD(1), TENSOR_SYMBOL_LIST(x), TENSOR_SYMBOL_LIST(y), "gelu");
519
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
520
1
  ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph));
521
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
522
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
523
1
  ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y);
524
1
  ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x);
525
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
526
1
  dsfmt_t dsfmt;
527
1
  dsfmt_init_gen_rand(&dsfmt, 0);
528
1
  int i;
529
1.00k
  for (i = 0; i < 10 * 100; 
i++1.00k
)
530
1.00k
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
531
1
  ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
532
1.00k
  for (i = 0; i < 10 * 100; 
i++1.00k
)
533
1.00k
    dy_tensor->data.f32[i] = 0;
534
11
  for (i = 0; i < 10; 
i++10
)
535
10
    dy_tensor->data.f32[i * 100 + i] = 1;
536
1
  ccv_nnc_tensor_t* const dy16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
537
1
  ccv_nnc_tensor_t* const dyt = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10, 100), 0);
538
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor), TENSOR_LIST(dy16_tensor), 0);
539
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dy16_tensor), TENSOR_LIST(dyt), 0);
540
1
  ccv_nnc_graph_t* graph = 0;
541
1
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
542
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
543
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, TENSOR_BIND_MAP(KV(dy, dyt)), TENSOR_SYMBOL_LIST(y), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
544
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
545
1
  ccv_nnc_tensor_t* const xt = ccv_nnc_tensor_from_symbol(tensor_arena, x);
546
1
  ccv_nnc_tensor_t* const x16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
547
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(x16_tensor), 0);
548
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(x16_tensor), TENSOR_LIST(xt), 0);
549
1
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
550
1
  ccv_nnc_tensor_t* const dx16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
551
1
  ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
552
1
  ccv_nnc_tensor_t* const dxt = ccv_nnc_tensor_from_symbol(tensor_arena, dx);
553
1
  ccv_nnc_tensor_t* const y16_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 100), 0);
554
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
555
1
  ccv_nnc_tensor_t* const yt = ccv_nnc_tensor_from_symbol(tensor_arena, y);
556
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dxt), TENSOR_LIST(dx16_tensor), 0);
557
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(dx16_tensor), TENSOR_LIST(dx_tensor), 0);
558
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(yt), TENSOR_LIST(y16_tensor), 0);
559
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(y16_tensor), TENSOR_LIST(y_tensor), 0);
560
1
  ccv_nnc_tensor_t* const ty_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
561
1
  ccv_nnc_cmd_exec(CMD_GELU_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(x_tensor), TENSOR_LIST(ty_tensor), 0);
562
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, ty_tensor->data.f32, y_tensor->data.f32, 10 * 100, 1e-3, "forward pass should match");
563
1
  ccv_nnc_tensor_t* const tdx_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
564
1
  ccv_nnc_cmd_exec(CMD_GELU_BACKWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(dy_tensor, x_tensor, 0), TENSOR_LIST(tdx_tensor), 0);
565
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tdx_tensor->data.f32, dx_tensor->data.f32, 10 * 100, 1e-3, "backward pass should match");
566
1
  ccv_nnc_tensor_free(x_tensor);
567
1
  ccv_nnc_tensor_free(x16_tensor);
568
1
  ccv_nnc_tensor_free(y_tensor);
569
1
  ccv_nnc_tensor_free(y16_tensor);
570
1
  ccv_nnc_tensor_free(dx_tensor);
571
1
  ccv_nnc_tensor_free(dx16_tensor);
572
1
  ccv_nnc_tensor_free(dy_tensor);
573
1
  ccv_nnc_tensor_free(dy16_tensor);
574
1
  ccv_nnc_tensor_free(ty_tensor);
575
1
  ccv_nnc_tensor_free(tdx_tensor);
576
1
  ccv_nnc_tensor_free(dyt);
577
1
  ccv_nnc_graph_free(graph);
578
1
  ccv_nnc_tensor_arena_free(tensor_arena);
579
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
580
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
581
1
}
582
583
#include "case_main.h"