Coverage Report

Created: 2019-07-03 22:50

/home/liu/buildslave/linux-x64-runtests/build/test/int/nnc/schedule.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("schedule GPU work on one stream")
15
1
{
16
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS));
17
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
18
1
  ccv_nnc_tensor_symbol_t const a = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "a");
19
1
  ccv_nnc_tensor_symbol_t const w = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "w");
20
1
  ccv_nnc_tensor_symbol_t const bias = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "bias");
21
1
  ccv_nnc_tensor_symbol_t const b = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "b");
22
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(a, w, bias), TENSOR_SYMBOL_LIST(b), "mul");
23
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
24
1
  ccv_nnc_graph_t* graph;
25
1
  ccv_nnc_tensor_arena_t* tensor_arena;
26
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena;
27
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph,
28
1
    0, 0,
29
1
    TENSOR_SYMBOL_LIST(b),
30
1
    SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph),
31
1
    &graph, &tensor_arena, &graph_exec_arena);
32
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
33
1
  ccv_nnc_graph_static_schedule(graph, CCV_STREAM_CONTEXT_GPU);
34
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
35
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
36
1
  ccv_nnc_tensor_t* const hw = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
37
1
  ccv_nnc_tensor_t* const hbias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
38
1
  ccv_nnc_tensor_pin_memory(ha);
39
1
  ccv_nnc_tensor_pin_memory(hw);
40
1
  ccv_nnc_tensor_pin_memory(hbias);
41
1
  ha->data.f32[0] = 1.4;
42
1
  ha->data.f32[1] = 0.2;
43
1
  hw->data.f32[0] = 2;
44
1
  hw->data.f32[1] = 11;
45
1
  hbias->data.f32[0] = 0;
46
1
  ccv_nnc_tensor_t* const a_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, a);
47
1
  ccv_nnc_tensor_t* const w_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w);
48
1
  ccv_nnc_tensor_t* const bias_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias);
49
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hw, hbias), TENSOR_LIST(a_tensor, w_tensor, bias_tensor), 0);
50
1
  ccv_nnc_stream_context_t* const stream_context = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
51
1
  ccv_nnc_graph_run(graph, 0, stream_context, 0, TRAVERSE_FULL);
52
1
  ccv_nnc_stream_context_wait(stream_context);
53
1
  ccv_nnc_stream_context_free(stream_context);
54
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 1), 0);
55
1
  ccv_nnc_tensor_pin_memory(hb);
56
1
  ccv_nnc_tensor_t* const b_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, b);
57
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b_tensor), TENSOR_LIST(hb), 0);
58
1
  REQUIRE_EQ_WITH_TOLERANCE(hb->data.f32[0], 1.4 * 2 + 0.2 * 11, 1e-5, "should match simple algebra");
59
1
  ccv_nnc_tensor_free(ha);
60
1
  ccv_nnc_tensor_free(hw);
61
1
  ccv_nnc_tensor_free(hbias);
62
1
  ccv_nnc_tensor_free(hb);
63
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
64
1
  ccv_nnc_graph_free(graph);
65
1
  ccv_nnc_tensor_arena_free(tensor_arena);
66
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
67
1
}
68
69
TEST_CASE("schedule GPU work on multiple streams")
70
1
{
71
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS));
72
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
73
1
  ccv_nnc_tensor_symbol_t const a = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "a");
74
1
  ccv_nnc_tensor_symbol_t const w1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "w1");
75
1
  ccv_nnc_tensor_symbol_t const bias1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "bias1");
76
1
  ccv_nnc_tensor_symbol_t const b1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "b1");
77
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(a, w1, bias1), TENSOR_SYMBOL_LIST(b1), "mul1");
78
1
  ccv_nnc_tensor_symbol_t const w2 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "w2");
79
1
  ccv_nnc_tensor_symbol_t const bias2 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "bias2");
80
1
  ccv_nnc_tensor_symbol_t const b2 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "b2");
81
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(a, w2, bias2), TENSOR_SYMBOL_LIST(b2), "mul2");
82
1
  ccv_nnc_tensor_symbol_t const w3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "w3");
83
1
  ccv_nnc_tensor_symbol_t const bias3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "bias3");
84
1
  ccv_nnc_tensor_symbol_t const b3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "b3");
85
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(a, w3, bias3), TENSOR_SYMBOL_LIST(b3), "mul3");
86
1
  ccv_nnc_tensor_symbol_t const biasc = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "biasc");
87
1
  ccv_nnc_tensor_symbol_t const c = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "c");
88
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(b1, b2, biasc), TENSOR_SYMBOL_LIST(c), "mulc");
89
1
  ccv_nnc_tensor_symbol_t const biasd = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "biasd");
90
1
  ccv_nnc_tensor_symbol_t const d = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "d");
91
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(c, b3, biasd), TENSOR_SYMBOL_LIST(d), "muld");
92
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
93
1
  ccv_nnc_graph_t* graph;
94
1
  ccv_nnc_tensor_arena_t* tensor_arena;
95
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena;
96
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph,
97
1
    0, 0,
98
1
    TENSOR_SYMBOL_LIST(d),
99
1
    SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph),
100
1
    &graph, &tensor_arena, &graph_exec_arena);
101
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
102
1
  ccv_nnc_graph_static_schedule(graph, CCV_STREAM_CONTEXT_GPU);
103
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
104
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
105
1
  ccv_nnc_tensor_t* const hw1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
106
1
  ccv_nnc_tensor_t* const hbias1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
107
1
  ccv_nnc_tensor_pin_memory(ha);
108
1
  ccv_nnc_tensor_pin_memory(hw1);
109
1
  ccv_nnc_tensor_pin_memory(hbias1);
110
1
  ha->data.f32[0] = 1.4;
111
1
  ha->data.f32[1] = 0.2;
112
1
  hw1->data.f32[0] = 2;
113
1
  hw1->data.f32[1] = 11;
114
1
  hbias1->data.f32[0] = 0;
115
1
  ccv_nnc_tensor_t* const hw2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
116
1
  ccv_nnc_tensor_t* const hbias2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
117
1
  ccv_nnc_tensor_pin_memory(hw2);
118
1
  ccv_nnc_tensor_pin_memory(hbias2);
119
1
  hw2->data.f32[0] = 1;
120
1
  hw2->data.f32[1] = 2.2;
121
1
  hbias2->data.f32[0] = 1;
122
1
  ccv_nnc_tensor_t* const hw3 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
123
1
  ccv_nnc_tensor_t* const hbias3 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
124
1
  ccv_nnc_tensor_pin_memory(hw3);
125
1
  ccv_nnc_tensor_pin_memory(hbias3);
126
1
  hw3->data.f32[0] = 0.5;
127
1
  hw3->data.f32[1] = 1.5;
128
1
  hbias3->data.f32[0] = 0.5;
129
1
  ccv_nnc_tensor_t* const hbiasc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
130
1
  ccv_nnc_tensor_pin_memory(hbiasc);
131
1
  hbiasc->data.f32[0] = 0.2;
132
1
  ccv_nnc_tensor_t* const hbiasd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
133
1
  ccv_nnc_tensor_pin_memory(hbiasd);
134
1
  hbiasd->data.f32[0] = 0.3;
135
1
  ccv_nnc_tensor_t* const a_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, a);
136
1
  ccv_nnc_tensor_t* const w1_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w1);
137
1
  ccv_nnc_tensor_t* const bias1_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias1);
138
1
  ccv_nnc_tensor_t* const w2_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w2);
139
1
  ccv_nnc_tensor_t* const bias2_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias2);
140
1
  ccv_nnc_tensor_t* const w3_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w3);
141
1
  ccv_nnc_tensor_t* const bias3_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias3);
142
1
  ccv_nnc_tensor_t* const biasc_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, biasc);
143
1
  ccv_nnc_tensor_t* const biasd_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, biasd);
144
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hw1, hbias1, hw2, hbias2, hw3, hbias3, hbiasc, hbiasd), TENSOR_LIST(a_tensor, w1_tensor, bias1_tensor, w2_tensor, bias2_tensor, w3_tensor, bias3_tensor, biasc_tensor, biasd_tensor), 0);
145
1
  ccv_nnc_stream_context_t* const stream_context = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
146
1
  ccv_nnc_graph_run(graph, 0, stream_context, 0, TRAVERSE_FULL);
147
1
  ccv_nnc_stream_context_wait(stream_context);
148
1
  ccv_nnc_stream_context_free(stream_context);
149
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 1), 0);
150
1
  ccv_nnc_tensor_pin_memory(hd);
151
1
  ccv_nnc_tensor_t* const d_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, d);
152
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d_tensor), TENSOR_LIST(hd), 0);
153
1
  const float b1v = 1.4 * 2 + 0.2 * 11;
154
1
  const float b2v = 1.4 * 1 + 0.2 * 2.2 + 1;
155
1
  const float b3v = 1.4 * 0.5 + 0.2 * 1.5 + 0.5;
156
1
  const float cv = b1v * b2v + 0.2;
157
1
  const float dv = cv * b3v + 0.3;
158
1
  REQUIRE_EQ_WITH_TOLERANCE(hd->data.f32[0], dv, 1e-5, "should match simple algebra");
159
1
  ccv_nnc_tensor_free(ha);
160
1
  ccv_nnc_tensor_free(hw1);
161
1
  ccv_nnc_tensor_free(hbias1);
162
1
  ccv_nnc_tensor_free(hw2);
163
1
  ccv_nnc_tensor_free(hbias2);
164
1
  ccv_nnc_tensor_free(hw3);
165
1
  ccv_nnc_tensor_free(hbias3);
166
1
  ccv_nnc_tensor_free(hbiasc);
167
1
  ccv_nnc_tensor_free(hbiasd);
168
1
  ccv_nnc_tensor_free(hd);
169
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
170
1
  ccv_nnc_graph_free(graph);
171
1
  ccv_nnc_tensor_arena_free(tensor_arena);
172
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
173
1
}
174
175
static int while_5(ccv_nnc_tensor_t* const* const inputs, const int input_size, const void* const data)
176
18
{
177
18
  return inputs[0]->data.i64[0] < 5;
178
18
}
179
180
TEST_CASE("schedule GPU work with while loop")
181
1
{
182
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS));
183
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
184
1
  ccv_nnc_symbolic_graph_t* const while_graph = ccv_nnc_symbolic_graph_new();
185
1
  ccv_nnc_symbolic_graph_while(symbolic_graph, CCV_NNC_GRAPH_FORWARD, while_graph, "while 1..5");
186
1
  ccv_nnc_tensor_symbol_t const a = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "a");
187
1
  ccv_nnc_tensor_symbol_t const w1 = ccv_nnc_tensor_symbol_new(while_graph, GPU_TENSOR_NHWC(000, 32F, 2, 2), "w1");
188
1
  ccv_nnc_tensor_symbol_t const bias1 = ccv_nnc_tensor_symbol_new(while_graph, GPU_TENSOR_NHWC(000, 32F, 2), "bias1");
189
1
  ccv_nnc_tensor_symbol_t const b1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "b1");
190
1
  ccv_nnc_graph_exec_symbol_t const noop = ccv_nnc_graph_exec_symbol_new(while_graph, CMD_NOOP(), 0, 0, 0, 0, "noop");
191
1
  ccv_nnc_graph_exec_symbol_t const mul1 = ccv_nnc_graph_exec_symbol_new(while_graph, CMD_GEMM_FORWARD(2), TENSOR_SYMBOL_LIST(a, w1, bias1), TENSOR_SYMBOL_LIST(b1), "mul1");
192
1
  ccv_nnc_graph_exec_symbol_concat(while_graph, noop, mul1);
193
1
  ccv_nnc_symbolic_graph_set_while_expr(while_graph, while_5, 0, TENSOR_SYMBOL_LIST(ccv_nnc_tensor_symbol_for_while_count(while_graph)), GRAPH_EXEC_SYMBOL_LIST(noop));
194
1
  ccv_nnc_symbolic_graph_set_carry_overs(while_graph, TENSOR_SYMBOL_MAP(KV(b1, a)));
195
1
  ccv_nnc_symbolic_graph_set_sources(while_graph, GRAPH_EXEC_SYMBOL_LIST(noop));
196
1
  ccv_nnc_symbolic_graph_set_destinations(while_graph, GRAPH_EXEC_SYMBOL_LIST(mul1));
197
1
  ccv_nnc_tensor_symbol_t const w2 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "w2");
198
1
  ccv_nnc_tensor_symbol_t const bias2 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "bias2");
199
1
  ccv_nnc_tensor_symbol_t const b2 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "b2");
200
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(a, w2, bias2), TENSOR_SYMBOL_LIST(b2), "mul2");
201
1
  ccv_nnc_tensor_symbol_t const w3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "w3");
202
1
  ccv_nnc_tensor_symbol_t const bias3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "bias3");
203
1
  ccv_nnc_tensor_symbol_t const b3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "b3");
204
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(b1, w3, bias3), TENSOR_SYMBOL_LIST(b3), "mul3");
205
1
  ccv_nnc_tensor_symbol_t const biasc = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "biasc");
206
1
  ccv_nnc_tensor_symbol_t const c = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "c");
207
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(b2, b3, biasc), TENSOR_SYMBOL_LIST(c), "mulc");
208
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
209
1
  ccv_nnc_graph_t* graph;
210
1
  ccv_nnc_tensor_arena_t* tensor_arena;
211
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena;
212
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph,
213
1
    0, 0,
214
1
    TENSOR_SYMBOL_LIST(c),
215
1
    SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph),
216
1
    &graph, &tensor_arena, &graph_exec_arena);
217
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
218
1
  ccv_nnc_graph_static_schedule(graph, CCV_STREAM_CONTEXT_GPU);
219
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
220
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
221
1
  ccv_nnc_tensor_t* const hw1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0);
222
1
  ccv_nnc_tensor_t* const hbias1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
223
1
  ccv_nnc_tensor_pin_memory(ha);
224
1
  ccv_nnc_tensor_pin_memory(hw1);
225
1
  ccv_nnc_tensor_pin_memory(hbias1);
226
1
  ha->data.f32[0] = 1.4;
227
1
  ha->data.f32[1] = 0.2;
228
1
  hw1->data.f32[0] = 1.1;
229
1
  hw1->data.f32[1] = 2.2;
230
1
  hw1->data.f32[2] = 1;
231
1
  hw1->data.f32[3] = 2;
232
1
  hbias1->data.f32[0] = 0;
233
1
  hbias1->data.f32[1] = 0;
234
1
  ccv_nnc_tensor_t* const hw2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
235
1
  ccv_nnc_tensor_t* const hbias2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
236
1
  ccv_nnc_tensor_pin_memory(hw2);
237
1
  ccv_nnc_tensor_pin_memory(hbias2);
238
1
  hw2->data.f32[0] = 0.6;
239
1
  hw2->data.f32[1] = 3;
240
1
  hbias2->data.f32[0] = 0.4;
241
1
  ccv_nnc_tensor_t* const hw3 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
242
1
  ccv_nnc_tensor_t* const hbias3 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
243
1
  ccv_nnc_tensor_pin_memory(hw3);
244
1
  ccv_nnc_tensor_pin_memory(hbias3);
245
1
  hw3->data.f32[0] = 0.2;
246
1
  hw3->data.f32[1] = 0.3;
247
1
  hbias3->data.f32[0] = 1;
248
1
  ccv_nnc_tensor_t* const hbiasc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
249
1
  ccv_nnc_tensor_pin_memory(hbiasc);
250
1
  hbiasc->data.f32[0] = 0.5;
251
1
  ccv_nnc_tensor_t* const a_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, a);
252
1
  ccv_nnc_tensor_t* const w1_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w1);
253
1
  ccv_nnc_tensor_t* const bias1_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias1);
254
1
  ccv_nnc_tensor_t* const w2_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w2);
255
1
  ccv_nnc_tensor_t* const bias2_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias2);
256
1
  ccv_nnc_tensor_t* const w3_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w3);
257
1
  ccv_nnc_tensor_t* const bias3_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias3);
258
1
  ccv_nnc_tensor_t* const biasc_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, biasc);
259
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hw1, hbias1, hw2, hbias2, hw3, hbias3, hbiasc), TENSOR_LIST(a_tensor, w1_tensor, bias1_tensor, w2_tensor, bias2_tensor, w3_tensor, bias3_tensor, biasc_tensor), 0);
260
1
  ccv_nnc_stream_context_t* const stream_context = ccv_nnc_graph_default_stream(graph);
261
1
  ccv_nnc_graph_run(graph, 0, stream_context, 0, TRAVERSE_FULL);
262
1
  ccv_nnc_stream_context_wait(stream_context);
263
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 1), 0);
264
1
  ccv_nnc_tensor_pin_memory(hc);
265
1
  ccv_nnc_tensor_t* const c_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, c);
266
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c_tensor), TENSOR_LIST(hc), 0);
267
1
  float av0 = 1.4;
268
1
  float av1 = 0.2;
269
1
  int i;
270
6
  for (i = 0; i < 5; 
i++5
)
271
5
  {
272
5
    const float b0 = av0 * 1.1 + av1 * 2.2;
273
5
    const float b1 = av0 * 1 + av1 * 2;
274
5
    av0 = b0;
275
5
    av1 = b1;
276
5
  }
277
1
  const float b2v = 1.4 * 0.6 + 0.2 * 3 + 0.4;
278
1
  const float b3v = av0 * 0.2 + av1 * 0.3 + 1;
279
1
  const float cv = b2v * b3v + 0.5;
280
1
  REQUIRE_EQ_WITH_TOLERANCE(hc->data.f32[0], cv, 1e-2, "should match simple algebra");
281
1
  ccv_nnc_tensor_free(ha);
282
1
  ccv_nnc_tensor_free(hw1);
283
1
  ccv_nnc_tensor_free(hbias1);
284
1
  ccv_nnc_tensor_free(hw2);
285
1
  ccv_nnc_tensor_free(hbias2);
286
1
  ccv_nnc_tensor_free(hw3);
287
1
  ccv_nnc_tensor_free(hbias3);
288
1
  ccv_nnc_tensor_free(hbiasc);
289
1
  ccv_nnc_tensor_free(hc);
290
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
291
1
  ccv_nnc_graph_free(graph);
292
1
  ccv_nnc_tensor_arena_free(tensor_arena);
293
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
294
1
}
295
296
static int case_of_0(ccv_nnc_tensor_t* const *const inputs, const int input_size, const void* const data)
297
3
{
298
3
  return 0;
299
3
}
300
301
TEST_CASE("schedule GPU work with case..of")
302
1
{
303
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS));
304
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
305
1
  ccv_nnc_tensor_symbol_t const a = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "a");
306
1
  ccv_nnc_tensor_symbol_t const b = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "b");
307
1
  ccv_nnc_graph_exec_symbol_t const case_of = ccv_nnc_symbolic_graph_case_of_new(symbolic_graph, CCV_NNC_GRAPH_FORWARD, TENSOR_SYMBOL_LIST(a), TENSOR_SYMBOL_MAP(KV(a, b)), "case..of");
308
1
  ccv_nnc_symbolic_graph_set_case_of_expr(symbolic_graph, case_of, case_of_0, 0);
309
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph_0 = ccv_nnc_symbolic_graph_new();
310
1
  ccv_nnc_tensor_symbol_t const b0 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "b0");
311
1
  ccv_nnc_symbolic_graph_set_case_of(symbolic_graph, case_of, symbolic_graph_0, 0, TENSOR_SYMBOL_MAP(KV(b0, b)));
312
1
  ccv_nnc_tensor_symbol_t const w = ccv_nnc_tensor_symbol_new(symbolic_graph_0, GPU_TENSOR_NHWC(000, 32F, 2, 2), "w");
313
1
  ccv_nnc_tensor_symbol_t const bias = ccv_nnc_tensor_symbol_new(symbolic_graph_0, GPU_TENSOR_NHWC(000, 32F, 2), "bias");
314
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph_0, CMD_GEMM_FORWARD(2), TENSOR_SYMBOL_LIST(a, w, bias), TENSOR_SYMBOL_LIST(b0), "mul");
315
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph_0, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
316
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
317
1
  ccv_nnc_graph_t* graph;
318
1
  ccv_nnc_tensor_arena_t* tensor_arena;
319
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena;
320
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph,
321
1
    0, 0,
322
1
    TENSOR_SYMBOL_LIST(b),
323
1
    SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph),
324
1
    &graph, &tensor_arena, &graph_exec_arena);
325
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
326
1
  ccv_nnc_graph_static_schedule(graph, CCV_STREAM_CONTEXT_GPU);
327
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
328
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
329
1
  ccv_nnc_tensor_t* const hw = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0);
330
1
  ccv_nnc_tensor_t* const hbias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
331
1
  ccv_nnc_tensor_pin_memory(ha);
332
1
  ccv_nnc_tensor_pin_memory(hw);
333
1
  ccv_nnc_tensor_pin_memory(hbias);
334
1
  ha->data.f32[0] = 1.4;
335
1
  ha->data.f32[1] = 0.2;
336
1
  hw->data.f32[0] = 2;
337
1
  hw->data.f32[1] = 11;
338
1
  hw->data.f32[2] = 1;
339
1
  hw->data.f32[3] = 2;
340
1
  hbias->data.f32[0] = 0;
341
1
  hbias->data.f32[1] = 0;
342
1
  ccv_nnc_tensor_t* const a_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, a);
343
1
  ccv_nnc_tensor_t* const w_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w);
344
1
  ccv_nnc_tensor_t* const bias_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias);
345
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hw, hbias), TENSOR_LIST(a_tensor, w_tensor, bias_tensor), 0);
346
1
  ccv_nnc_stream_context_t* const stream_context = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
347
1
  ccv_nnc_graph_run(graph, 0, stream_context, 0, TRAVERSE_FULL);
348
1
  ccv_nnc_stream_context_wait(stream_context);
349
1
  ccv_nnc_stream_context_free(stream_context);
350
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
351
1
  ccv_nnc_tensor_pin_memory(hb);
352
1
  ccv_nnc_tensor_t* const b_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, b);
353
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b_tensor), TENSOR_LIST(hb), 0);
354
1
  REQUIRE_EQ_WITH_TOLERANCE(hb->data.f32[0], 1.4 * 2 + 0.2 * 11, 1e-5, "should match simple algebra");
355
1
  REQUIRE_EQ_WITH_TOLERANCE(hb->data.f32[1], 1.4 + 0.2 * 2, 1e-5, "should match simple algebra");
356
1
  ccv_nnc_graph_free(graph);
357
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
358
1
  ccv_nnc_tensor_arena_free(tensor_arena);
359
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
360
1
  ccv_nnc_tensor_free(ha);
361
1
  ccv_nnc_tensor_free(hw);
362
1
  ccv_nnc_tensor_free(hbias);
363
1
  ccv_nnc_tensor_free(hb);
364
1
}
365
366
TEST_CASE("schedule GPU work with both while loop and case..of")
367
1
{
368
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
369
1
  ccv_nnc_symbolic_graph_t* const while_graph = ccv_nnc_symbolic_graph_new();
370
1
  ccv_nnc_symbolic_graph_while(symbolic_graph, CCV_NNC_GRAPH_FORWARD, while_graph, "while 1..5");
371
1
  ccv_nnc_tensor_symbol_t const a = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "a");
372
1
  ccv_nnc_tensor_symbol_t const w1 = ccv_nnc_tensor_symbol_new(while_graph, GPU_TENSOR_NHWC(000, 32F, 2, 2), "w1");
373
1
  ccv_nnc_tensor_symbol_t const bias1 = ccv_nnc_tensor_symbol_new(while_graph, GPU_TENSOR_NHWC(000, 32F, 2), "bias1");
374
1
  ccv_nnc_tensor_symbol_t const b1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "b1");
375
1
  ccv_nnc_graph_exec_symbol_t const noop = ccv_nnc_graph_exec_symbol_new(while_graph, CMD_NOOP(), 0, 0, 0, 0, "noop");
376
1
  ccv_nnc_graph_exec_symbol_t const mul1 = ccv_nnc_graph_exec_symbol_new(while_graph, CMD_GEMM_FORWARD(2), TENSOR_SYMBOL_LIST(a, w1, bias1), TENSOR_SYMBOL_LIST(b1), "mul1");
377
1
  ccv_nnc_graph_exec_symbol_concat(while_graph, noop, mul1);
378
1
  ccv_nnc_symbolic_graph_set_while_expr(while_graph, while_5, 0, TENSOR_SYMBOL_LIST(ccv_nnc_tensor_symbol_for_while_count(while_graph)), GRAPH_EXEC_SYMBOL_LIST(noop));
379
1
  ccv_nnc_symbolic_graph_set_carry_overs(while_graph, TENSOR_SYMBOL_MAP(KV(b1, a)));
380
1
  ccv_nnc_symbolic_graph_set_sources(while_graph, GRAPH_EXEC_SYMBOL_LIST(noop));
381
1
  ccv_nnc_symbolic_graph_set_destinations(while_graph, GRAPH_EXEC_SYMBOL_LIST(mul1));
382
1
  ccv_nnc_tensor_symbol_t const b2 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "b2");
383
1
  ccv_nnc_graph_exec_symbol_t const case_of = ccv_nnc_symbolic_graph_case_of_new(symbolic_graph, CCV_NNC_GRAPH_FORWARD, TENSOR_SYMBOL_LIST(a), TENSOR_SYMBOL_MAP(KV(a, b2)), "case..of");
384
1
  ccv_nnc_symbolic_graph_set_case_of_expr(symbolic_graph, case_of, case_of_0, 0);
385
1
  ccv_nnc_symbolic_graph_t* const symbolic_graph_0 = ccv_nnc_symbolic_graph_new();
386
1
  ccv_nnc_tensor_symbol_t const b20 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "b20");
387
1
  ccv_nnc_symbolic_graph_set_case_of(symbolic_graph, case_of, symbolic_graph_0, 0, TENSOR_SYMBOL_MAP(KV(b20, b2)));
388
1
  ccv_nnc_tensor_symbol_t const w2 = ccv_nnc_tensor_symbol_new(symbolic_graph_0, GPU_TENSOR_NHWC(000, 32F, 2, 2), "w2");
389
1
  ccv_nnc_tensor_symbol_t const bias2 = ccv_nnc_tensor_symbol_new(symbolic_graph_0, GPU_TENSOR_NHWC(000, 32F, 2), "bias2");
390
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph_0, CMD_GEMM_FORWARD(2), TENSOR_SYMBOL_LIST(a, w2, bias2), TENSOR_SYMBOL_LIST(b20), "mul2");
391
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph_0, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
392
1
  ccv_nnc_tensor_symbol_t const w3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "w3");
393
1
  ccv_nnc_tensor_symbol_t const bias3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "bias3");
394
1
  ccv_nnc_tensor_symbol_t const b3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "b3");
395
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(b2, w3, bias3), TENSOR_SYMBOL_LIST(b3), "mul3");
396
1
  ccv_nnc_tensor_symbol_t const w4 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 2), "w4");
397
1
  ccv_nnc_tensor_symbol_t const bias4 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "bias4");
398
1
  ccv_nnc_tensor_symbol_t const b4 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "b4");
399
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(b1, w4, bias4), TENSOR_SYMBOL_LIST(b4), "mul4");
400
1
  ccv_nnc_tensor_symbol_t const biasc = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1), "biasc");
401
1
  ccv_nnc_tensor_symbol_t const c = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NHWC(000, 32F, 1, 1), "c");
402
1
  ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_GEMM_FORWARD(1), TENSOR_SYMBOL_LIST(b3, b4, biasc), TENSOR_SYMBOL_LIST(c), "mulc");
403
1
  ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
404
1
  ccv_nnc_graph_t* graph;
405
1
  ccv_nnc_tensor_arena_t* tensor_arena;
406
1
  ccv_nnc_graph_exec_arena_t* graph_exec_arena;
407
1
  ccv_nnc_symbolic_graph_compile(symbolic_graph,
408
1
    0, 0,
409
1
    TENSOR_SYMBOL_LIST(c),
410
1
    SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph),
411
1
    &graph, &tensor_arena, &graph_exec_arena);
412
1
  SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
413
1
  ccv_nnc_graph_static_schedule(graph, CCV_STREAM_CONTEXT_GPU);
414
1
  GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
415
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
416
1
  ccv_nnc_tensor_t* const hw1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0);
417
1
  ccv_nnc_tensor_t* const hbias1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
418
1
  ccv_nnc_tensor_pin_memory(ha);
419
1
  ccv_nnc_tensor_pin_memory(hw1);
420
1
  ccv_nnc_tensor_pin_memory(hbias1);
421
1
  ha->data.f32[0] = 1.4;
422
1
  ha->data.f32[1] = 0.2;
423
1
  hw1->data.f32[0] = 1.1;
424
1
  hw1->data.f32[1] = 2.2;
425
1
  hw1->data.f32[2] = 1;
426
1
  hw1->data.f32[3] = 2;
427
1
  hbias1->data.f32[0] = 0;
428
1
  hbias1->data.f32[1] = 0;
429
1
  ccv_nnc_tensor_t* const hw2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2), 0);
430
1
  ccv_nnc_tensor_t* const hbias2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
431
1
  ccv_nnc_tensor_pin_memory(hw2);
432
1
  ccv_nnc_tensor_pin_memory(hbias2);
433
1
  hw2->data.f32[0] = 0.1;
434
1
  hw2->data.f32[1] = 0.2;
435
1
  hw2->data.f32[2] = 1.2;
436
1
  hw2->data.f32[3] = 1.1;
437
1
  hbias2->data.f32[0] = 1;
438
1
  hbias2->data.f32[1] = 0;
439
1
  ccv_nnc_tensor_t* const hw3 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
440
1
  ccv_nnc_tensor_t* const hbias3 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
441
1
  ccv_nnc_tensor_pin_memory(hw3);
442
1
  ccv_nnc_tensor_pin_memory(hbias3);
443
1
  hw3->data.f32[0] = 0.6;
444
1
  hw3->data.f32[1] = 3;
445
1
  hbias3->data.f32[0] = 0.4;
446
1
  ccv_nnc_tensor_t* const hw4 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 2), 0);
447
1
  ccv_nnc_tensor_t* const hbias4 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
448
1
  ccv_nnc_tensor_pin_memory(hw4);
449
1
  ccv_nnc_tensor_pin_memory(hbias4);
450
1
  hw4->data.f32[0] = 0.2;
451
1
  hw4->data.f32[1] = 0.3;
452
1
  hbias4->data.f32[0] = 1;
453
1
  ccv_nnc_tensor_t* const hbiasc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
454
1
  ccv_nnc_tensor_pin_memory(hbiasc);
455
1
  hbiasc->data.f32[0] = 0.5;
456
1
  ccv_nnc_tensor_t* const a_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, a);
457
1
  ccv_nnc_tensor_t* const w1_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w1);
458
1
  ccv_nnc_tensor_t* const bias1_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias1);
459
1
  ccv_nnc_tensor_t* const w2_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w2);
460
1
  ccv_nnc_tensor_t* const bias2_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias2);
461
1
  ccv_nnc_tensor_t* const w3_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w3);
462
1
  ccv_nnc_tensor_t* const bias3_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias3);
463
1
  ccv_nnc_tensor_t* const w4_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, w4);
464
1
  ccv_nnc_tensor_t* const bias4_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, bias4);
465
1
  ccv_nnc_tensor_t* const biasc_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, biasc);
466
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hw1, hbias1, hw2, hbias2, hw3, hbias3, hw4, hbias4, hbiasc), TENSOR_LIST(a_tensor, w1_tensor, bias1_tensor, w2_tensor, bias2_tensor, w3_tensor, bias3_tensor, w4_tensor, bias4_tensor, biasc_tensor), 0);
467
1
  ccv_nnc_stream_context_t* const stream_context = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
468
1
  ccv_nnc_graph_run(graph, 0, stream_context, 0, TRAVERSE_FULL);
469
1
  // Run again on the same graph immediately. It shouldn't mess with the result, still sequential.
470
1
  ccv_nnc_graph_run(graph, 0, stream_context, 0, TRAVERSE_FULL);
471
1
  ccv_nnc_stream_context_wait(stream_context);
472
1
  ccv_nnc_stream_context_free(stream_context);
473
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 1), 0);
474
1
  ccv_nnc_tensor_pin_memory(hc);
475
1
  ccv_nnc_tensor_t* const c_tensor = ccv_nnc_tensor_from_symbol(tensor_arena, c);
476
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c_tensor), TENSOR_LIST(hc), 0);
477
1
  float av0 = 1.4;
478
1
  float av1 = 0.2;
479
1
  int i;
480
6
  for (i = 0; i < 5; 
i++5
)
481
5
  {
482
5
    const float b0 = av0 * 1.1 + av1 * 2.2;
483
5
    const float b1 = av0 * 1 + av1 * 2;
484
5
    av0 = b0;
485
5
    av1 = b1;
486
5
  }
487
1
  const float b2v0 = 1.4 * 0.1 + 0.2 * 0.2 + 1;
488
1
  const float b2v1 = 1.4 * 1.2 + 0.2 * 1.1;
489
1
  const float b3v = b2v0 * 0.6 + b2v1 * 3 + 0.4;
490
1
  const float b4v = av0 * 0.2 + av1 * 0.3 + 1;
491
1
  const float cv = b3v * b4v + 0.5;
492
1
  REQUIRE_EQ_WITH_TOLERANCE(hc->data.f32[0], cv, 1e-2, "should match simple algebra");
493
1
  ccv_nnc_tensor_free(ha);
494
1
  ccv_nnc_tensor_free(hw1);
495
1
  ccv_nnc_tensor_free(hbias1);
496
1
  ccv_nnc_tensor_free(hw2);
497
1
  ccv_nnc_tensor_free(hbias2);
498
1
  ccv_nnc_tensor_free(hw3);
499
1
  ccv_nnc_tensor_free(hbias3);
500
1
  ccv_nnc_tensor_free(hw4);
501
1
  ccv_nnc_tensor_free(hbias4);
502
1
  ccv_nnc_tensor_free(hbiasc);
503
1
  ccv_nnc_tensor_free(hc);
504
1
  ccv_nnc_symbolic_graph_free(symbolic_graph);
505
1
  ccv_nnc_graph_free(graph);
506
1
  ccv_nnc_tensor_arena_free(tensor_arena);
507
1
  ccv_nnc_graph_exec_arena_free(graph_exec_arena);
508
1
}
509
510
#include "case_main.h"