/home/liu/actions-runner/_work/ccv/ccv/test/unit/nnc/rmsnorm.tests.c
Line | Count | Source |
1 | | #include "case.h" |
2 | | #include "ccv_case.h" |
3 | | #include "ccv_nnc_case.h" |
4 | | #include <ccv.h> |
5 | | #include <nnc/ccv_nnc.h> |
6 | | #include <nnc/ccv_nnc_easy.h> |
7 | | #include "3rdparty/dsfmt/dSFMT.h" |
8 | | |
9 | | TEST_SETUP() |
10 | | { |
11 | | ccv_nnc_init(); |
12 | | } |
13 | | |
14 | | TEST_CASE("implement rmsnorm with other symbolic graph") |
15 | 1 | { |
16 | 1 | ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new(); |
17 | 1 | ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x"); |
18 | 1 | ccv_nnc_tensor_symbol_t sqr = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "sqr"); |
19 | 1 | ccv_nnc_tensor_symbol_t varsum = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "varsum"); |
20 | 1 | ccv_nnc_tensor_symbol_t var = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "var"); |
21 | 1 | ccv_nnc_tensor_symbol_t logvar = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar"); |
22 | 1 | ccv_nnc_tensor_symbol_t logvar_2 = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar"); |
23 | 1 | ccv_nnc_tensor_symbol_t std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "std"); |
24 | 1 | ccv_nnc_tensor_symbol_t inv_std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "inv_std"); |
25 | 1 | ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y"); |
26 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWPROD_FORWARD(), TENSOR_SYMBOL_LIST(x, x), TENSOR_SYMBOL_LIST(sqr), "sqr"); |
27 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_REDUCE_SUM_FORWARD(1, 2, 3), TENSOR_SYMBOL_LIST(sqr), TENSOR_SYMBOL_LIST(varsum), "varsum"); |
28 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(1.0 / (4 * 4 * 10)), TENSOR_SYMBOL_LIST(varsum), TENSOR_SYMBOL_LIST(var), "var"); |
29 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWLOG_FORWARD(), TENSOR_SYMBOL_LIST(var), TENSOR_SYMBOL_LIST(logvar), "log(var)"); |
30 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(0.5), TENSOR_SYMBOL_LIST(logvar), TENSOR_SYMBOL_LIST(logvar_2), "log(var)/2"); |
31 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWEXP_FORWARD(), TENSOR_SYMBOL_LIST(logvar_2), TENSOR_SYMBOL_LIST(std), "std"); |
32 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWDIV_FORWARD(), TENSOR_SYMBOL_LIST(NO_TENSOR_SYMBOL, std), TENSOR_SYMBOL_LIST(inv_std), "1/std"); |
33 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_MUL_FORWARD(1), TENSOR_SYMBOL_LIST(x, inv_std), TENSOR_SYMBOL_LIST(y), "y"); |
34 | 1 | ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
35 | 1 | SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH); |
36 | 1 | ccv_nnc_graph_t* graph = 0; |
37 | 1 | ccv_nnc_tensor_arena_t* tensor_arena = 0; |
38 | 1 | ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0; |
39 | 1 | ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena); |
40 | 1 | GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH); |
41 | 1 | ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, x); |
42 | 1 | dsfmt_t dsfmt; |
43 | 1 | int i; |
44 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
45 | 1.28k | for (i = 0; i < 8 * 4 * 4 * 10; i++1.28k ) |
46 | 1.28k | x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); |
47 | 1 | ccv_nnc_symbolic_graph_t* const rmsnorm_symbolic_graph = ccv_nnc_symbolic_graph_new(); |
48 | 1 | ccv_nnc_tensor_symbol_t bx = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x"); |
49 | 1 | ccv_nnc_tensor_symbol_t by = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y"); |
50 | 1 | ccv_nnc_tensor_symbol_t scale = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 1, 4, 4, 10), "scale"); |
51 | 1 | ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "saved_inv_std"); |
52 | 1 | ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_SET_FORWARD(1), 0, 0, TENSOR_SYMBOL_LIST(scale), "set_scale"); |
53 | 1 | ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_RMSNORM_FORWARD(0, 1, 1, 2, 3), TENSOR_SYMBOL_LIST(bx, scale), TENSOR_SYMBOL_LIST(by, saved_inv_std), "rmsnorm"); |
54 | 1 | ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
55 | 1 | ccv_nnc_graph_t* rmsnorm_graph = 0; |
56 | 1 | ccv_nnc_tensor_arena_t* rmsnorm_tensor_arena = 0; |
57 | 1 | ccv_nnc_graph_exec_arena_t* rmsnorm_graph_exec_arena = 0; |
58 | 1 | ccv_nnc_symbolic_graph_compile(rmsnorm_symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph), &rmsnorm_graph, &rmsnorm_tensor_arena, &rmsnorm_graph_exec_arena); |
59 | 1 | ccv_nnc_tensor_t* const bx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, bx); |
60 | 1 | memcpy(bx_tensor->data.f32, x_tensor->data.f32, sizeof(float) * 8 * 4 * 4 * 10); |
61 | 1 | ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0); |
62 | 1 | ccv_nnc_graph_run(rmsnorm_graph, 0, TRAVERSE_FULL, 0, 0); |
63 | 1 | ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, y); |
64 | 1 | ccv_nnc_tensor_t* const by_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, by); |
65 | 1 | REQUIRE_TENSOR_EQ(y_tensor, by_tensor, "graph computed result should match rmsnorm op result"); |
66 | 1 | ccv_nnc_symbolic_graph_free(symbolic_graph); |
67 | 1 | ccv_nnc_tensor_arena_free(tensor_arena); |
68 | 1 | ccv_nnc_graph_exec_arena_free(graph_exec_arena); |
69 | 1 | ccv_nnc_graph_free(graph); |
70 | 1 | ccv_nnc_symbolic_graph_free(rmsnorm_symbolic_graph); |
71 | 1 | ccv_nnc_tensor_arena_free(rmsnorm_tensor_arena); |
72 | 1 | ccv_nnc_graph_exec_arena_free(rmsnorm_graph_exec_arena); |
73 | 1 | ccv_nnc_graph_free(rmsnorm_graph); |
74 | 1 | } |
75 | | |
76 | | TEST_CASE("compare rmsnorm gradient with other symbolic graph") |
77 | 1 | { |
78 | 1 | ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new(); |
79 | 1 | ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x"); |
80 | 1 | ccv_nnc_tensor_symbol_t sqr = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "sqr"); |
81 | 1 | ccv_nnc_tensor_symbol_t varsum = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "varsum"); |
82 | 1 | ccv_nnc_tensor_symbol_t var = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "var"); |
83 | 1 | ccv_nnc_tensor_symbol_t logvar = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar"); |
84 | 1 | ccv_nnc_tensor_symbol_t logvar_2 = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar"); |
85 | 1 | ccv_nnc_tensor_symbol_t std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "std"); |
86 | 1 | ccv_nnc_tensor_symbol_t inv_std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "inv_std"); |
87 | 1 | ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y"); |
88 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWPROD_FORWARD(), TENSOR_SYMBOL_LIST(x, x), TENSOR_SYMBOL_LIST(sqr), "sqr"); |
89 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_REDUCE_SUM_FORWARD(1, 2, 3), TENSOR_SYMBOL_LIST(sqr), TENSOR_SYMBOL_LIST(varsum), "varsum"); |
90 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(1.0 / (4 * 4 * 10)), TENSOR_SYMBOL_LIST(varsum), TENSOR_SYMBOL_LIST(var), "var"); |
91 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWLOG_FORWARD(), TENSOR_SYMBOL_LIST(var), TENSOR_SYMBOL_LIST(logvar), "log(var)"); |
92 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(0.5), TENSOR_SYMBOL_LIST(logvar), TENSOR_SYMBOL_LIST(logvar_2), "log(var)/2"); |
93 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWEXP_FORWARD(), TENSOR_SYMBOL_LIST(logvar_2), TENSOR_SYMBOL_LIST(std), "std"); |
94 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWDIV_FORWARD(), TENSOR_SYMBOL_LIST(NO_TENSOR_SYMBOL, std), TENSOR_SYMBOL_LIST(inv_std), "1/std"); |
95 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_MUL_FORWARD(1), TENSOR_SYMBOL_LIST(x, inv_std), TENSOR_SYMBOL_LIST(y), "y"); |
96 | 1 | ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
97 | 1 | ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph)); |
98 | 1 | ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
99 | 1 | ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y); |
100 | 1 | ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x); |
101 | 1 | SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH); |
102 | 1 | ccv_nnc_graph_t* graph = 0; |
103 | 1 | ccv_nnc_tensor_arena_t* tensor_arena = 0; |
104 | 1 | ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0; |
105 | 1 | ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena); |
106 | 1 | GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH); |
107 | 1 | ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, x); |
108 | 1 | dsfmt_t dsfmt; |
109 | 1 | int i; |
110 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
111 | 1.28k | for (i = 0; i < 8 * 4 * 4 * 10; i++1.28k ) |
112 | 1.28k | x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); |
113 | 1 | ccv_nnc_symbolic_graph_t* const rmsnorm_symbolic_graph = ccv_nnc_symbolic_graph_new(); |
114 | 1 | ccv_nnc_tensor_symbol_t bx = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x"); |
115 | 1 | ccv_nnc_tensor_symbol_t by = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y"); |
116 | 1 | ccv_nnc_tensor_symbol_t scale = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 1, 4, 4, 10), "scale"); |
117 | 1 | ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "saved_inv_std"); |
118 | 1 | ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_SET_FORWARD(1), 0, 0, TENSOR_SYMBOL_LIST(scale), "set_scale"); |
119 | 1 | ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_RMSNORM_FORWARD(0, 1, 1, 2, 3), TENSOR_SYMBOL_LIST(bx, scale), TENSOR_SYMBOL_LIST(by, saved_inv_std), "rmsnorm"); |
120 | 1 | ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
121 | 1 | ccv_nnc_symbolic_graph_backward(rmsnorm_symbolic_graph, TENSOR_SYMBOL_LIST(by), TENSOR_SYMBOL_LIST(bx, scale), SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph)); |
122 | 1 | ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
123 | 1 | ccv_nnc_tensor_symbol_t dby = ccv_nnc_tensor_symbol_for_backward(rmsnorm_symbolic_graph, by); |
124 | 1 | ccv_nnc_tensor_symbol_t dbx = ccv_nnc_tensor_symbol_for_backward(rmsnorm_symbolic_graph, bx); |
125 | 1 | ccv_nnc_graph_t* rmsnorm_graph = 0; |
126 | 1 | ccv_nnc_tensor_arena_t* rmsnorm_tensor_arena = 0; |
127 | 1 | ccv_nnc_graph_exec_arena_t* rmsnorm_graph_exec_arena = 0; |
128 | 1 | ccv_nnc_symbolic_graph_compile(rmsnorm_symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph), &rmsnorm_graph, &rmsnorm_tensor_arena, &rmsnorm_graph_exec_arena); |
129 | 1 | ccv_nnc_tensor_t* const bx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, bx); |
130 | 1 | ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, dy); |
131 | 1 | ccv_nnc_tensor_t* const dby_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, dby); |
132 | 1.28k | for (i = 0; i < 8 * 4 * 4 * 10; i++1.28k ) |
133 | 1.28k | dby_tensor->data.f32[i] = dy_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
134 | 1 | memcpy(bx_tensor->data.f32, x_tensor->data.f32, sizeof(float) * 8 * 4 * 4 * 10); |
135 | 1 | ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0); |
136 | 1 | ccv_nnc_graph_run(rmsnorm_graph, 0, TRAVERSE_FULL, 0, 0); |
137 | 1 | ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, dx); |
138 | 1 | ccv_nnc_tensor_t* const dbx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, dbx); |
139 | 1 | REQUIRE_TENSOR_EQ(dx_tensor, dbx_tensor, "graph computed result should match rmsnorm op result"); |
140 | 1 | ccv_nnc_symbolic_graph_free(symbolic_graph); |
141 | 1 | ccv_nnc_tensor_arena_free(tensor_arena); |
142 | 1 | ccv_nnc_graph_exec_arena_free(graph_exec_arena); |
143 | 1 | ccv_nnc_graph_free(graph); |
144 | 1 | ccv_nnc_symbolic_graph_free(rmsnorm_symbolic_graph); |
145 | 1 | ccv_nnc_tensor_arena_free(rmsnorm_tensor_arena); |
146 | 1 | ccv_nnc_graph_exec_arena_free(rmsnorm_graph_exec_arena); |
147 | 1 | ccv_nnc_graph_free(rmsnorm_graph); |
148 | 1 | } |
149 | | |
150 | | TEST_CASE("implement rmsnorm with other symbolic graph without scale") |
151 | 1 | { |
152 | 1 | ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new(); |
153 | 1 | ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x"); |
154 | 1 | ccv_nnc_tensor_symbol_t sqr = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "sqr"); |
155 | 1 | ccv_nnc_tensor_symbol_t varsum = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "varsum"); |
156 | 1 | ccv_nnc_tensor_symbol_t var = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "var"); |
157 | 1 | ccv_nnc_tensor_symbol_t logvar = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar"); |
158 | 1 | ccv_nnc_tensor_symbol_t logvar_2 = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar"); |
159 | 1 | ccv_nnc_tensor_symbol_t std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "std"); |
160 | 1 | ccv_nnc_tensor_symbol_t inv_std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "inv_std"); |
161 | 1 | ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y"); |
162 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWPROD_FORWARD(), TENSOR_SYMBOL_LIST(x, x), TENSOR_SYMBOL_LIST(sqr), "sqr"); |
163 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_REDUCE_SUM_FORWARD(1, 2, 3), TENSOR_SYMBOL_LIST(sqr), TENSOR_SYMBOL_LIST(varsum), "varsum"); |
164 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(1.0 / (4 * 4 * 10)), TENSOR_SYMBOL_LIST(varsum), TENSOR_SYMBOL_LIST(var), "var"); |
165 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWLOG_FORWARD(), TENSOR_SYMBOL_LIST(var), TENSOR_SYMBOL_LIST(logvar), "log(var)"); |
166 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(0.5), TENSOR_SYMBOL_LIST(logvar), TENSOR_SYMBOL_LIST(logvar_2), "log(var)/2"); |
167 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWEXP_FORWARD(), TENSOR_SYMBOL_LIST(logvar_2), TENSOR_SYMBOL_LIST(std), "std"); |
168 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWDIV_FORWARD(), TENSOR_SYMBOL_LIST(NO_TENSOR_SYMBOL, std), TENSOR_SYMBOL_LIST(inv_std), "1/std"); |
169 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_MUL_FORWARD(1), TENSOR_SYMBOL_LIST(x, inv_std), TENSOR_SYMBOL_LIST(y), "y"); |
170 | 1 | ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
171 | 1 | SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH); |
172 | 1 | ccv_nnc_graph_t* graph = 0; |
173 | 1 | ccv_nnc_tensor_arena_t* tensor_arena = 0; |
174 | 1 | ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0; |
175 | 1 | ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena); |
176 | 1 | GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH); |
177 | 1 | ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, x); |
178 | 1 | dsfmt_t dsfmt; |
179 | 1 | int i; |
180 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
181 | 1.28k | for (i = 0; i < 8 * 4 * 4 * 10; i++1.28k ) |
182 | 1.28k | x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); |
183 | 1 | ccv_nnc_symbolic_graph_t* const rmsnorm_symbolic_graph = ccv_nnc_symbolic_graph_new(); |
184 | 1 | ccv_nnc_tensor_symbol_t bx = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x"); |
185 | 1 | ccv_nnc_tensor_symbol_t by = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y"); |
186 | 1 | ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "saved_inv_std"); |
187 | 1 | ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_RMSNORM_FORWARD(0, 0, 1, 2, 3), TENSOR_SYMBOL_LIST(bx), TENSOR_SYMBOL_LIST(by, saved_inv_std), "rmsnorm"); |
188 | 1 | ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
189 | 1 | ccv_nnc_graph_t* rmsnorm_graph = 0; |
190 | 1 | ccv_nnc_tensor_arena_t* rmsnorm_tensor_arena = 0; |
191 | 1 | ccv_nnc_graph_exec_arena_t* rmsnorm_graph_exec_arena = 0; |
192 | 1 | ccv_nnc_symbolic_graph_compile(rmsnorm_symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph), &rmsnorm_graph, &rmsnorm_tensor_arena, &rmsnorm_graph_exec_arena); |
193 | 1 | ccv_nnc_tensor_t* const bx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, bx); |
194 | 1 | memcpy(bx_tensor->data.f32, x_tensor->data.f32, sizeof(float) * 8 * 4 * 4 * 10); |
195 | 1 | ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0); |
196 | 1 | ccv_nnc_graph_run(rmsnorm_graph, 0, TRAVERSE_FULL, 0, 0); |
197 | 1 | ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, y); |
198 | 1 | ccv_nnc_tensor_t* const by_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, by); |
199 | 1 | REQUIRE_TENSOR_EQ(y_tensor, by_tensor, "graph computed result should match rmsnorm op result"); |
200 | 1 | ccv_nnc_symbolic_graph_free(symbolic_graph); |
201 | 1 | ccv_nnc_tensor_arena_free(tensor_arena); |
202 | 1 | ccv_nnc_graph_exec_arena_free(graph_exec_arena); |
203 | 1 | ccv_nnc_graph_free(graph); |
204 | 1 | ccv_nnc_symbolic_graph_free(rmsnorm_symbolic_graph); |
205 | 1 | ccv_nnc_tensor_arena_free(rmsnorm_tensor_arena); |
206 | 1 | ccv_nnc_graph_exec_arena_free(rmsnorm_graph_exec_arena); |
207 | 1 | ccv_nnc_graph_free(rmsnorm_graph); |
208 | 1 | } |
209 | | |
210 | | TEST_CASE("compare rmsnorm gradient with other symbolic graph without scale") |
211 | 1 | { |
212 | 1 | ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new(); |
213 | 1 | ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x"); |
214 | 1 | ccv_nnc_tensor_symbol_t sqr = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "sqr"); |
215 | 1 | ccv_nnc_tensor_symbol_t varsum = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "varsum"); |
216 | 1 | ccv_nnc_tensor_symbol_t var = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "var"); |
217 | 1 | ccv_nnc_tensor_symbol_t logvar = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar"); |
218 | 1 | ccv_nnc_tensor_symbol_t logvar_2 = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar"); |
219 | 1 | ccv_nnc_tensor_symbol_t std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "std"); |
220 | 1 | ccv_nnc_tensor_symbol_t inv_std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "inv_std"); |
221 | 1 | ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y"); |
222 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWPROD_FORWARD(), TENSOR_SYMBOL_LIST(x, x), TENSOR_SYMBOL_LIST(sqr), "sqr"); |
223 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_REDUCE_SUM_FORWARD(1, 2, 3), TENSOR_SYMBOL_LIST(sqr), TENSOR_SYMBOL_LIST(varsum), "varsum"); |
224 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(1.0 / (4 * 4 * 10)), TENSOR_SYMBOL_LIST(varsum), TENSOR_SYMBOL_LIST(var), "var"); |
225 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWLOG_FORWARD(), TENSOR_SYMBOL_LIST(var), TENSOR_SYMBOL_LIST(logvar), "log(var)"); |
226 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(0.5), TENSOR_SYMBOL_LIST(logvar), TENSOR_SYMBOL_LIST(logvar_2), "log(var)/2"); |
227 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWEXP_FORWARD(), TENSOR_SYMBOL_LIST(logvar_2), TENSOR_SYMBOL_LIST(std), "std"); |
228 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWDIV_FORWARD(), TENSOR_SYMBOL_LIST(NO_TENSOR_SYMBOL, std), TENSOR_SYMBOL_LIST(inv_std), "1/std"); |
229 | 1 | ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_MUL_FORWARD(1), TENSOR_SYMBOL_LIST(x, inv_std), TENSOR_SYMBOL_LIST(y), "y"); |
230 | 1 | ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
231 | 1 | ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph)); |
232 | 1 | ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
233 | 1 | ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y); |
234 | 1 | ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x); |
235 | 1 | SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH); |
236 | 1 | ccv_nnc_graph_t* graph = 0; |
237 | 1 | ccv_nnc_tensor_arena_t* tensor_arena = 0; |
238 | 1 | ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0; |
239 | 1 | ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena); |
240 | 1 | GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH); |
241 | 1 | ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, x); |
242 | 1 | dsfmt_t dsfmt; |
243 | 1 | int i; |
244 | 1 | dsfmt_init_gen_rand(&dsfmt, 1); |
245 | 1.28k | for (i = 0; i < 8 * 4 * 4 * 10; i++1.28k ) |
246 | 1.28k | x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt); |
247 | 1 | ccv_nnc_symbolic_graph_t* const rmsnorm_symbolic_graph = ccv_nnc_symbolic_graph_new(); |
248 | 1 | ccv_nnc_tensor_symbol_t bx = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x"); |
249 | 1 | ccv_nnc_tensor_symbol_t by = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y"); |
250 | 1 | ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "saved_inv_std"); |
251 | 1 | ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_RMSNORM_FORWARD(0, 0, 1, 2, 3), TENSOR_SYMBOL_LIST(bx), TENSOR_SYMBOL_LIST(by, saved_inv_std), "rmsnorm"); |
252 | 1 | ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
253 | 1 | ccv_nnc_symbolic_graph_backward(rmsnorm_symbolic_graph, TENSOR_SYMBOL_LIST(by), TENSOR_SYMBOL_LIST(bx), SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph)); |
254 | 1 | ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS); |
255 | 1 | ccv_nnc_tensor_symbol_t dby = ccv_nnc_tensor_symbol_for_backward(rmsnorm_symbolic_graph, by); |
256 | 1 | ccv_nnc_tensor_symbol_t dbx = ccv_nnc_tensor_symbol_for_backward(rmsnorm_symbolic_graph, bx); |
257 | 1 | ccv_nnc_graph_t* rmsnorm_graph = 0; |
258 | 1 | ccv_nnc_tensor_arena_t* rmsnorm_tensor_arena = 0; |
259 | 1 | ccv_nnc_graph_exec_arena_t* rmsnorm_graph_exec_arena = 0; |
260 | 1 | ccv_nnc_symbolic_graph_compile(rmsnorm_symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph), &rmsnorm_graph, &rmsnorm_tensor_arena, &rmsnorm_graph_exec_arena); |
261 | 1 | ccv_nnc_tensor_t* const bx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, bx); |
262 | 1 | ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, dy); |
263 | 1 | ccv_nnc_tensor_t* const dby_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, dby); |
264 | 1.28k | for (i = 0; i < 8 * 4 * 4 * 10; i++1.28k ) |
265 | 1.28k | dby_tensor->data.f32[i] = dy_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
266 | 1 | memcpy(bx_tensor->data.f32, x_tensor->data.f32, sizeof(float) * 8 * 4 * 4 * 10); |
267 | 1 | ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0); |
268 | 1 | ccv_nnc_graph_run(rmsnorm_graph, 0, TRAVERSE_FULL, 0, 0); |
269 | 1 | ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, dx); |
270 | 1 | ccv_nnc_tensor_t* const dbx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, dbx); |
271 | 1 | REQUIRE_TENSOR_EQ(dx_tensor, dbx_tensor, "graph computed result should match rmsnorm op result"); |
272 | 1 | ccv_nnc_symbolic_graph_free(symbolic_graph); |
273 | 1 | ccv_nnc_tensor_arena_free(tensor_arena); |
274 | 1 | ccv_nnc_graph_exec_arena_free(graph_exec_arena); |
275 | 1 | ccv_nnc_graph_free(graph); |
276 | 1 | ccv_nnc_symbolic_graph_free(rmsnorm_symbolic_graph); |
277 | 1 | ccv_nnc_tensor_arena_free(rmsnorm_tensor_arena); |
278 | 1 | ccv_nnc_graph_exec_arena_free(rmsnorm_graph_exec_arena); |
279 | 1 | ccv_nnc_graph_free(rmsnorm_graph); |
280 | 1 | } |
281 | | |
282 | | #include "case_main.h" |