Coverage Report

Created: 2026-04-18 18:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/test/unit/nnc/rmsnorm.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include "3rdparty/dsfmt/dSFMT.h"
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("implement rmsnorm with other symbolic graph")
15
1
{
16
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
17
1
  ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x");
18
1
  ccv_nnc_tensor_symbol_t sqr = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "sqr");
19
1
  ccv_nnc_tensor_symbol_t varsum = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "varsum");
20
1
  ccv_nnc_tensor_symbol_t var = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "var");
21
1
  ccv_nnc_tensor_symbol_t logvar = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar");
22
1
  ccv_nnc_tensor_symbol_t logvar_2 = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar");
23
1
  ccv_nnc_tensor_symbol_t std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "std");
24
1
  ccv_nnc_tensor_symbol_t inv_std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "inv_std");
25
1
  ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y");
26
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWPROD_FORWARD(), TENSOR_SYMBOL_LIST(x, x), TENSOR_SYMBOL_LIST(sqr), "sqr");
27
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_REDUCE_SUM_FORWARD(1, 2, 3), TENSOR_SYMBOL_LIST(sqr), TENSOR_SYMBOL_LIST(varsum), "varsum");
28
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(1.0 / (4 * 4 * 10)), TENSOR_SYMBOL_LIST(varsum), TENSOR_SYMBOL_LIST(var), "var");
29
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWLOG_FORWARD(), TENSOR_SYMBOL_LIST(var), TENSOR_SYMBOL_LIST(logvar), "log(var)");
30
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(0.5), TENSOR_SYMBOL_LIST(logvar), TENSOR_SYMBOL_LIST(logvar_2), "log(var)/2");
31
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWEXP_FORWARD(), TENSOR_SYMBOL_LIST(logvar_2), TENSOR_SYMBOL_LIST(std), "std");
32
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWDIV_FORWARD(), TENSOR_SYMBOL_LIST(NO_TENSOR_SYMBOL, std), TENSOR_SYMBOL_LIST(inv_std), "1/std");
33
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_MUL_FORWARD(1), TENSOR_SYMBOL_LIST(x, inv_std), TENSOR_SYMBOL_LIST(y), "y");
34
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
35
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
36
1
  ccv_nnc_graph_t* graph = 0;
37
1
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
38
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
39
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
40
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
41
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, x);
42
1
  dsfmt_t dsfmt;
43
1
  int i;
44
1
  dsfmt_init_gen_rand(&dsfmt, 1);
45
1.28k
  for (i = 0; i < 8 * 4 * 4 * 10; 
i++1.28k
)
46
1.28k
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
47
1
  ccv_nnc_symbolic_graph_t* const rmsnorm_symbolic_graph = ccv_nnc_symbolic_graph_new();
48
1
  ccv_nnc_tensor_symbol_t bx = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x");
49
1
  ccv_nnc_tensor_symbol_t by = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y");
50
1
  ccv_nnc_tensor_symbol_t scale = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 1, 4, 4, 10), "scale");
51
1
  ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "saved_inv_std");
52
1
  ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_SET_FORWARD(1), 0, 0, TENSOR_SYMBOL_LIST(scale), "set_scale");
53
1
  ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_RMSNORM_FORWARD(0, 1, 1, 2, 3), TENSOR_SYMBOL_LIST(bx, scale), TENSOR_SYMBOL_LIST(by, saved_inv_std), "rmsnorm");
54
1
  ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
55
1
  ccv_nnc_graph_t* rmsnorm_graph = 0;
56
1
  ccv_nnc_tensor_arena_t* rmsnorm_tensor_arena = 0;
57
1
  ccv_nnc_graph_exec_arena_t* rmsnorm_graph_exec_arena = 0;
58
1
  ccv_nnc_symbolic_graph_compile(rmsnorm_symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph), &rmsnorm_graph, &rmsnorm_tensor_arena, &rmsnorm_graph_exec_arena);
59
1
  ccv_nnc_tensor_t* const bx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, bx);
60
1
  memcpy(bx_tensor->data.f32, x_tensor->data.f32, sizeof(float) * 8 * 4 * 4 * 10);
61
1
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
62
1
  ccv_nnc_graph_run(rmsnorm_graph, 0, TRAVERSE_FULL, 0, 0);
63
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, y);
64
1
  ccv_nnc_tensor_t* const by_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, by);
65
1
  REQUIRE_TENSOR_EQ(y_tensor, by_tensor, "graph computed result should match rmsnorm op result");
66
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
67
1
  ccv_nnc_tensor_arena_free(tensor_arena);
68
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
69
1
  ccv_nnc_graph_free(graph);
70
1
  ccv_nnc_symbolic_graph_free(rmsnorm_symbolic_graph);
71
1
  ccv_nnc_tensor_arena_free(rmsnorm_tensor_arena);
72
1
  ccv_nnc_graph_exec_arena_free(rmsnorm_graph_exec_arena);
73
1
  ccv_nnc_graph_free(rmsnorm_graph);
74
1
}
75
76
TEST_CASE("compare rmsnorm gradient with other symbolic graph")
77
1
{
78
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
79
1
  ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x");
80
1
  ccv_nnc_tensor_symbol_t sqr = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "sqr");
81
1
  ccv_nnc_tensor_symbol_t varsum = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "varsum");
82
1
  ccv_nnc_tensor_symbol_t var = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "var");
83
1
  ccv_nnc_tensor_symbol_t logvar = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar");
84
1
  ccv_nnc_tensor_symbol_t logvar_2 = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar");
85
1
  ccv_nnc_tensor_symbol_t std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "std");
86
1
  ccv_nnc_tensor_symbol_t inv_std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "inv_std");
87
1
  ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y");
88
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWPROD_FORWARD(), TENSOR_SYMBOL_LIST(x, x), TENSOR_SYMBOL_LIST(sqr), "sqr");
89
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_REDUCE_SUM_FORWARD(1, 2, 3), TENSOR_SYMBOL_LIST(sqr), TENSOR_SYMBOL_LIST(varsum), "varsum");
90
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(1.0 / (4 * 4 * 10)), TENSOR_SYMBOL_LIST(varsum), TENSOR_SYMBOL_LIST(var), "var");
91
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWLOG_FORWARD(), TENSOR_SYMBOL_LIST(var), TENSOR_SYMBOL_LIST(logvar), "log(var)");
92
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(0.5), TENSOR_SYMBOL_LIST(logvar), TENSOR_SYMBOL_LIST(logvar_2), "log(var)/2");
93
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWEXP_FORWARD(), TENSOR_SYMBOL_LIST(logvar_2), TENSOR_SYMBOL_LIST(std), "std");
94
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWDIV_FORWARD(), TENSOR_SYMBOL_LIST(NO_TENSOR_SYMBOL, std), TENSOR_SYMBOL_LIST(inv_std), "1/std");
95
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_MUL_FORWARD(1), TENSOR_SYMBOL_LIST(x, inv_std), TENSOR_SYMBOL_LIST(y), "y");
96
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
97
1
  ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph));
98
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
99
1
  ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y);
100
1
  ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x);
101
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
102
1
  ccv_nnc_graph_t* graph = 0;
103
1
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
104
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
105
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
106
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
107
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, x);
108
1
  dsfmt_t dsfmt;
109
1
  int i;
110
1
  dsfmt_init_gen_rand(&dsfmt, 1);
111
1.28k
  for (i = 0; i < 8 * 4 * 4 * 10; 
i++1.28k
)
112
1.28k
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
113
1
  ccv_nnc_symbolic_graph_t* const rmsnorm_symbolic_graph = ccv_nnc_symbolic_graph_new();
114
1
  ccv_nnc_tensor_symbol_t bx = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x");
115
1
  ccv_nnc_tensor_symbol_t by = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y");
116
1
  ccv_nnc_tensor_symbol_t scale = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 1, 4, 4, 10), "scale");
117
1
  ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "saved_inv_std");
118
1
  ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_SET_FORWARD(1), 0, 0, TENSOR_SYMBOL_LIST(scale), "set_scale");
119
1
  ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_RMSNORM_FORWARD(0, 1, 1, 2, 3), TENSOR_SYMBOL_LIST(bx, scale), TENSOR_SYMBOL_LIST(by, saved_inv_std), "rmsnorm");
120
1
  ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
121
1
  ccv_nnc_symbolic_graph_backward(rmsnorm_symbolic_graph, TENSOR_SYMBOL_LIST(by), TENSOR_SYMBOL_LIST(bx, scale), SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph));
122
1
  ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
123
1
  ccv_nnc_tensor_symbol_t dby = ccv_nnc_tensor_symbol_for_backward(rmsnorm_symbolic_graph, by);
124
1
  ccv_nnc_tensor_symbol_t dbx = ccv_nnc_tensor_symbol_for_backward(rmsnorm_symbolic_graph, bx);
125
1
  ccv_nnc_graph_t* rmsnorm_graph = 0;
126
1
  ccv_nnc_tensor_arena_t* rmsnorm_tensor_arena = 0;
127
1
  ccv_nnc_graph_exec_arena_t* rmsnorm_graph_exec_arena = 0;
128
1
  ccv_nnc_symbolic_graph_compile(rmsnorm_symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph), &rmsnorm_graph, &rmsnorm_tensor_arena, &rmsnorm_graph_exec_arena);
129
1
  ccv_nnc_tensor_t* const bx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, bx);
130
1
  ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, dy);
131
1
  ccv_nnc_tensor_t* const dby_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, dby);
132
1.28k
  for (i = 0; i < 8 * 4 * 4 * 10; 
i++1.28k
)
133
1.28k
    dby_tensor->data.f32[i] = dy_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
134
1
  memcpy(bx_tensor->data.f32, x_tensor->data.f32, sizeof(float) * 8 * 4 * 4 * 10);
135
1
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
136
1
  ccv_nnc_graph_run(rmsnorm_graph, 0, TRAVERSE_FULL, 0, 0);
137
1
  ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, dx);
138
1
  ccv_nnc_tensor_t* const dbx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, dbx);
139
1
  REQUIRE_TENSOR_EQ(dx_tensor, dbx_tensor, "graph computed result should match rmsnorm op result");
140
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
141
1
  ccv_nnc_tensor_arena_free(tensor_arena);
142
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
143
1
  ccv_nnc_graph_free(graph);
144
1
  ccv_nnc_symbolic_graph_free(rmsnorm_symbolic_graph);
145
1
  ccv_nnc_tensor_arena_free(rmsnorm_tensor_arena);
146
1
  ccv_nnc_graph_exec_arena_free(rmsnorm_graph_exec_arena);
147
1
  ccv_nnc_graph_free(rmsnorm_graph);
148
1
}
149
150
TEST_CASE("implement rmsnorm with other symbolic graph without scale")
151
1
{
152
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
153
1
  ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x");
154
1
  ccv_nnc_tensor_symbol_t sqr = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "sqr");
155
1
  ccv_nnc_tensor_symbol_t varsum = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "varsum");
156
1
  ccv_nnc_tensor_symbol_t var = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "var");
157
1
  ccv_nnc_tensor_symbol_t logvar = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar");
158
1
  ccv_nnc_tensor_symbol_t logvar_2 = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar");
159
1
  ccv_nnc_tensor_symbol_t std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "std");
160
1
  ccv_nnc_tensor_symbol_t inv_std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "inv_std");
161
1
  ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y");
162
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWPROD_FORWARD(), TENSOR_SYMBOL_LIST(x, x), TENSOR_SYMBOL_LIST(sqr), "sqr");
163
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_REDUCE_SUM_FORWARD(1, 2, 3), TENSOR_SYMBOL_LIST(sqr), TENSOR_SYMBOL_LIST(varsum), "varsum");
164
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(1.0 / (4 * 4 * 10)), TENSOR_SYMBOL_LIST(varsum), TENSOR_SYMBOL_LIST(var), "var");
165
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWLOG_FORWARD(), TENSOR_SYMBOL_LIST(var), TENSOR_SYMBOL_LIST(logvar), "log(var)");
166
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(0.5), TENSOR_SYMBOL_LIST(logvar), TENSOR_SYMBOL_LIST(logvar_2), "log(var)/2");
167
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWEXP_FORWARD(), TENSOR_SYMBOL_LIST(logvar_2), TENSOR_SYMBOL_LIST(std), "std");
168
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWDIV_FORWARD(), TENSOR_SYMBOL_LIST(NO_TENSOR_SYMBOL, std), TENSOR_SYMBOL_LIST(inv_std), "1/std");
169
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_MUL_FORWARD(1), TENSOR_SYMBOL_LIST(x, inv_std), TENSOR_SYMBOL_LIST(y), "y");
170
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
171
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
172
1
  ccv_nnc_graph_t* graph = 0;
173
1
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
174
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
175
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
176
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
177
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, x);
178
1
  dsfmt_t dsfmt;
179
1
  int i;
180
1
  dsfmt_init_gen_rand(&dsfmt, 1);
181
1.28k
  for (i = 0; i < 8 * 4 * 4 * 10; 
i++1.28k
)
182
1.28k
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
183
1
  ccv_nnc_symbolic_graph_t* const rmsnorm_symbolic_graph = ccv_nnc_symbolic_graph_new();
184
1
  ccv_nnc_tensor_symbol_t bx = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x");
185
1
  ccv_nnc_tensor_symbol_t by = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y");
186
1
  ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "saved_inv_std");
187
1
  ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_RMSNORM_FORWARD(0, 0, 1, 2, 3), TENSOR_SYMBOL_LIST(bx), TENSOR_SYMBOL_LIST(by, saved_inv_std), "rmsnorm");
188
1
  ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
189
1
  ccv_nnc_graph_t* rmsnorm_graph = 0;
190
1
  ccv_nnc_tensor_arena_t* rmsnorm_tensor_arena = 0;
191
1
  ccv_nnc_graph_exec_arena_t* rmsnorm_graph_exec_arena = 0;
192
1
  ccv_nnc_symbolic_graph_compile(rmsnorm_symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph), &rmsnorm_graph, &rmsnorm_tensor_arena, &rmsnorm_graph_exec_arena);
193
1
  ccv_nnc_tensor_t* const bx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, bx);
194
1
  memcpy(bx_tensor->data.f32, x_tensor->data.f32, sizeof(float) * 8 * 4 * 4 * 10);
195
1
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
196
1
  ccv_nnc_graph_run(rmsnorm_graph, 0, TRAVERSE_FULL, 0, 0);
197
1
  ccv_nnc_tensor_t* const y_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, y);
198
1
  ccv_nnc_tensor_t* const by_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, by);
199
1
  REQUIRE_TENSOR_EQ(y_tensor, by_tensor, "graph computed result should match rmsnorm op result");
200
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
201
1
  ccv_nnc_tensor_arena_free(tensor_arena);
202
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
203
1
  ccv_nnc_graph_free(graph);
204
1
  ccv_nnc_symbolic_graph_free(rmsnorm_symbolic_graph);
205
1
  ccv_nnc_tensor_arena_free(rmsnorm_tensor_arena);
206
1
  ccv_nnc_graph_exec_arena_free(rmsnorm_graph_exec_arena);
207
1
  ccv_nnc_graph_free(rmsnorm_graph);
208
1
}
209
210
TEST_CASE("compare rmsnorm gradient with other symbolic graph without scale")
211
1
{
212
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
213
1
  ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x");
214
1
  ccv_nnc_tensor_symbol_t sqr = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "sqr");
215
1
  ccv_nnc_tensor_symbol_t varsum = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "varsum");
216
1
  ccv_nnc_tensor_symbol_t var = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "var");
217
1
  ccv_nnc_tensor_symbol_t logvar = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar");
218
1
  ccv_nnc_tensor_symbol_t logvar_2 = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "logvar");
219
1
  ccv_nnc_tensor_symbol_t std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "std");
220
1
  ccv_nnc_tensor_symbol_t inv_std = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "inv_std");
221
1
  ccv_nnc_tensor_symbol_t y = ccv_nnc_tensor_symbol_new(symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y");
222
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWPROD_FORWARD(), TENSOR_SYMBOL_LIST(x, x), TENSOR_SYMBOL_LIST(sqr), "sqr");
223
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_REDUCE_SUM_FORWARD(1, 2, 3), TENSOR_SYMBOL_LIST(sqr), TENSOR_SYMBOL_LIST(varsum), "varsum");
224
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(1.0 / (4 * 4 * 10)), TENSOR_SYMBOL_LIST(varsum), TENSOR_SYMBOL_LIST(var), "var");
225
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWLOG_FORWARD(), TENSOR_SYMBOL_LIST(var), TENSOR_SYMBOL_LIST(logvar), "log(var)");
226
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SCALAR_MUL_FORWARD(0.5), TENSOR_SYMBOL_LIST(logvar), TENSOR_SYMBOL_LIST(logvar_2), "log(var)/2");
227
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWEXP_FORWARD(), TENSOR_SYMBOL_LIST(logvar_2), TENSOR_SYMBOL_LIST(std), "std");
228
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_EWDIV_FORWARD(), TENSOR_SYMBOL_LIST(NO_TENSOR_SYMBOL, std), TENSOR_SYMBOL_LIST(inv_std), "1/std");
229
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_MUL_FORWARD(1), TENSOR_SYMBOL_LIST(x, inv_std), TENSOR_SYMBOL_LIST(y), "y");
230
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
231
1
  ccv_nnc_symbolic_graph_backward(symbolic_graph, TENSOR_SYMBOL_LIST(y), TENSOR_SYMBOL_LIST(x), SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph));
232
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
233
1
  ccv_nnc_tensor_symbol_t dy = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, y);
234
1
  ccv_nnc_tensor_symbol_t dx = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, x);
235
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
236
1
  ccv_nnc_graph_t* graph = 0;
237
1
  ccv_nnc_tensor_arena_t* tensor_arena = 0;
238
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena = 0;
239
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), &graph, &tensor_arena, &graph_exec_arena);
240
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
241
1
  ccv_nnc_tensor_t* const x_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, x);
242
1
  dsfmt_t dsfmt;
243
1
  int i;
244
1
  dsfmt_init_gen_rand(&dsfmt, 1);
245
1.28k
  for (i = 0; i < 8 * 4 * 4 * 10; 
i++1.28k
)
246
1.28k
    x_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
247
1
  ccv_nnc_symbolic_graph_t* const rmsnorm_symbolic_graph = ccv_nnc_symbolic_graph_new();
248
1
  ccv_nnc_tensor_symbol_t bx = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "x");
249
1
  ccv_nnc_tensor_symbol_t by = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 4, 4, 10), "y");
250
1
  ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(rmsnorm_symbolic_graph, CPU_TENSOR_NHWC(32F, 8, 1, 1, 1), "saved_inv_std");
251
1
  ccv_nnc_graph_exec_symbol_new(rmsnorm_symbolic_graph, CMD_RMSNORM_FORWARD(0, 0, 1, 2, 3), TENSOR_SYMBOL_LIST(bx), TENSOR_SYMBOL_LIST(by, saved_inv_std), "rmsnorm");
252
1
  ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
253
1
  ccv_nnc_symbolic_graph_backward(rmsnorm_symbolic_graph, TENSOR_SYMBOL_LIST(by), TENSOR_SYMBOL_LIST(bx), SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph));
254
1
  ccv_nnc_graph_exec_symbol_autogen(rmsnorm_symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
255
1
  ccv_nnc_tensor_symbol_t dby = ccv_nnc_tensor_symbol_for_backward(rmsnorm_symbolic_graph, by);
256
1
  ccv_nnc_tensor_symbol_t dbx = ccv_nnc_tensor_symbol_for_backward(rmsnorm_symbolic_graph, bx);
257
1
  ccv_nnc_graph_t* rmsnorm_graph = 0;
258
1
  ccv_nnc_tensor_arena_t* rmsnorm_tensor_arena = 0;
259
1
  ccv_nnc_graph_exec_arena_t* rmsnorm_graph_exec_arena = 0;
260
1
  ccv_nnc_symbolic_graph_compile(rmsnorm_symbolic_graph, ccv_nnc_default_compile_params, 0, 0, 0, 0, SYMBOLIC_GRAPH_SOURCES(rmsnorm_symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(rmsnorm_symbolic_graph), &rmsnorm_graph, &rmsnorm_tensor_arena, &rmsnorm_graph_exec_arena);
261
1
  ccv_nnc_tensor_t* const bx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, bx);
262
1
  ccv_nnc_tensor_t* const dy_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, dy);
263
1
  ccv_nnc_tensor_t* const dby_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, dby);
264
1.28k
  for (i = 0; i < 8 * 4 * 4 * 10; 
i++1.28k
)
265
1.28k
    dby_tensor->data.f32[i] = dy_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
266
1
  memcpy(bx_tensor->data.f32, x_tensor->data.f32, sizeof(float) * 8 * 4 * 4 * 10);
267
1
  ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
268
1
  ccv_nnc_graph_run(rmsnorm_graph, 0, TRAVERSE_FULL, 0, 0);
269
1
  ccv_nnc_tensor_t* const dx_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, dx);
270
1
  ccv_nnc_tensor_t* const dbx_tensor = ccv_nnc_tensor_from_symbol(rmsnorm_tensor_arena, dbx);
271
1
  REQUIRE_TENSOR_EQ(dx_tensor, dbx_tensor, "graph computed result should match rmsnorm op result");
272
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
273
1
  ccv_nnc_tensor_arena_free(tensor_arena);
274
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
275
1
  ccv_nnc_graph_free(graph);
276
1
  ccv_nnc_symbolic_graph_free(rmsnorm_symbolic_graph);
277
1
  ccv_nnc_tensor_arena_free(rmsnorm_tensor_arena);
278
1
  ccv_nnc_graph_exec_arena_free(rmsnorm_graph_exec_arena);
279
1
  ccv_nnc_graph_free(rmsnorm_graph);
280
1
}
281
282
#include "case_main.h"