Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/dynamic.graph.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("run dynamic graph on multiple streams")
15
{
16
  GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 &&
17
    ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
18
  ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new();
19
  ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
20
  ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
21
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0);
22
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0);
23
  ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
24
  ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
25
  ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, 0);
26
  ccv_nnc_tensor_t* const hy1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
27
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y1, 0)), TENSOR_LIST(hy1), 0);
28
  ccv_nnc_tensor_t* const hy0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
29
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y0, 0)), TENSOR_LIST(hy0), 0);
30
  REQUIRE_EQ_WITH_TOLERANCE(hy0->data.f32[0], 2.2, 1e-5, "should be equal");
31
  REQUIRE_EQ_WITH_TOLERANCE(hy1->data.f32[0], -1.1, 1e-5, "should be equal");
32
  ccv_nnc_dynamic_graph_free(graph);
33
  ccv_nnc_tensor_free(hy1);
34
  ccv_nnc_tensor_free(hy0);
35
}
36
37
TEST_CASE("async run dynamic graph on multiple streams, variant 1")
38
1
{
39
1
  GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 &&
40
1
    ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
41
1
  ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new();
42
1
  ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
43
1
  ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
44
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0);
45
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0);
46
1
  ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
47
1
  ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
48
1
  ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
49
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, stream);
50
1
  ccv_nnc_tensor_t* const hy0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
51
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y0, stream)), TENSOR_LIST(hy0), stream);
52
1
  ccv_nnc_stream_context_wait(stream);
53
1
  ccv_nnc_tensor_t* const hy1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
54
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y1, 0)), TENSOR_LIST(hy1), 0);
55
1
  REQUIRE_EQ_WITH_TOLERANCE(hy0->data.f32[0], 2.2, 1e-5, "should be equal");
56
1
  REQUIRE_EQ_WITH_TOLERANCE(hy1->data.f32[0], -1.1, 1e-5, "should be equal");
57
1
  ccv_nnc_dynamic_graph_free(graph);
58
1
  ccv_nnc_stream_context_free(stream);
59
1
  ccv_nnc_tensor_free(hy1);
60
1
  ccv_nnc_tensor_free(hy0);
61
1
}
62
63
TEST_CASE("async run dynamic graph on multiple streams, variant 2")
64
1
{
65
1
  GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 &&
66
1
    ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
67
1
  ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new();
68
1
  ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
69
1
  ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
70
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0);
71
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0);
72
1
  ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
73
1
  ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
74
1
  ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
75
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, stream);
76
1
  ccv_nnc_stream_context_wait(stream);
77
1
  ccv_nnc_tensor_t* const hy1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
78
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y1, 0)), TENSOR_LIST(hy1), 0);
79
1
  ccv_nnc_tensor_t* const hy0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
80
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y0, 0)), TENSOR_LIST(hy0), 0);
81
1
  REQUIRE_EQ_WITH_TOLERANCE(hy0->data.f32[0], 2.2, 1e-5, "should be equal");
82
1
  REQUIRE_EQ_WITH_TOLERANCE(hy1->data.f32[0], -1.1, 1e-5, "should be equal");
83
1
  ccv_nnc_dynamic_graph_free(graph);
84
1
  ccv_nnc_stream_context_free(stream);
85
1
  ccv_nnc_tensor_free(hy1);
86
1
  ccv_nnc_tensor_free(hy0);
87
1
}
88
89
TEST_CASE("run dynamic graph backward & apply gradients on multiple devices")
90
1
{
91
1
  GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 &&
92
1
    ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
93
1
  ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new();
94
1
  ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
95
1
  ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
96
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0);
97
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0);
98
1
  ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
99
1
  ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
100
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, 0);
101
1
  ccv_nnc_tensor_variable_t const dx0 = ccv_nnc_tensor_variable_new(graph);
102
1
  ccv_nnc_tensor_variable_t const dx1 = ccv_nnc_tensor_variable_new(graph);
103
1
  ccv_nnc_dynamic_graph_backward(graph, TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(dx0, dx1), 0);
104
1
  ccv_nnc_dynamic_graph_backward(graph, TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(dx0, dx1), 0);
105
1
  ccv_nnc_dynamic_graph_apply_gradients(graph, CMD_ADD_FORWARD(1, 1), TENSOR_VARIABLE_LIST(dx0, dx1), TENSOR_VARIABLE_LIST(x0, x1), 0, 2, 0);
106
1
  ccv_nnc_tensor_t* const hx0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
107
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x0, 0)), TENSOR_LIST(hx0), 0);
108
1
  ccv_nnc_tensor_t* const hx1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
109
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x1, 0)), TENSOR_LIST(hx1), 0);
110
1
  REQUIRE_EQ_WITH_TOLERANCE(hx0->data.f32[0], 2 + 1.1 * 4, 1e-5, "should be equal");
111
1
  REQUIRE_EQ_WITH_TOLERANCE(hx1->data.f32[0], -1 + 1.1 * 4, 1e-5, "should be equal");
112
1
  ccv_nnc_dynamic_graph_free(graph);
113
1
  ccv_nnc_tensor_free(hx0);
114
1
  ccv_nnc_tensor_free(hx1);
115
1
}
116
117
TEST_CASE("async run dynamic graph backward & apply gradients on multiple devices")
118
1
{
119
1
  GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 &&
120
1
    ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
121
1
  ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new();
122
1
  ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
123
1
  ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
124
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0);
125
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0);
126
1
  ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
127
1
  ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
128
1
  ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
129
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, stream);
130
1
  ccv_nnc_tensor_variable_t const dx0 = ccv_nnc_tensor_variable_new(graph);
131
1
  ccv_nnc_tensor_variable_t const dx1 = ccv_nnc_tensor_variable_new(graph);
132
1
  ccv_nnc_dynamic_graph_backward(graph, TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(dx0, dx1), stream);
133
1
  ccv_nnc_dynamic_graph_backward(graph, TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(dx0, dx1), stream);
134
1
  ccv_nnc_dynamic_graph_apply_gradients(graph, CMD_ADD_FORWARD(1, 1), TENSOR_VARIABLE_LIST(dx0, dx1), TENSOR_VARIABLE_LIST(x0, x1), 0, 2, stream);
135
1
  ccv_nnc_tensor_t* const hx0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
136
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x0, 0)), TENSOR_LIST(hx0), stream);
137
1
  ccv_nnc_stream_context_wait(stream);
138
1
  ccv_nnc_tensor_t* const hx1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
139
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x1, 0)), TENSOR_LIST(hx1), 0);
140
1
  REQUIRE_EQ_WITH_TOLERANCE(hx0->data.f32[0], 2 + 1.1 * 4, 1e-5, "should be equal");
141
1
  REQUIRE_EQ_WITH_TOLERANCE(hx1->data.f32[0], -1 + 1.1 * 4, 1e-5, "should be equal");
142
1
  ccv_nnc_dynamic_graph_free(graph);
143
1
  ccv_nnc_stream_context_free(stream);
144
1
  ccv_nnc_tensor_free(hx0);
145
1
  ccv_nnc_tensor_free(hx1);
146
1
}
147
148
TEST_CASE("run dynamic graph minimize on multiple devices")
149
1
{
150
1
  GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 &&
151
1
    ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
152
1
  ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new();
153
1
  ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
154
1
  ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
155
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0);
156
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0);
157
1
  ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
158
1
  ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
159
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, 0);
160
1
  ccv_nnc_dynamic_graph_minimize(graph, CMD_ADD_FORWARD(1, 1), TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), 0, 2, 0);
161
1
  ccv_nnc_tensor_t* const hx0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
162
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x0, 0)), TENSOR_LIST(hx0), 0);
163
1
  ccv_nnc_tensor_t* const hx1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
164
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x1, 0)), TENSOR_LIST(hx1), 0);
165
1
  REQUIRE_EQ_WITH_TOLERANCE(hx0->data.f32[0], 2 + 1.1 * 2, 1e-5, "should be equal");
166
1
  REQUIRE_EQ_WITH_TOLERANCE(hx1->data.f32[0], -1 + 1.1 * 2, 1e-5, "should be equal");
167
1
  ccv_nnc_dynamic_graph_free(graph);
168
1
  ccv_nnc_tensor_free(hx0);
169
1
  ccv_nnc_tensor_free(hx1);
170
1
}
171
172
TEST_CASE("async run dynamic graph minimize on multiple devices")
173
1
{
174
1
  GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 &&
175
1
    ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
176
1
  ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new();
177
1
  ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
178
1
  ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
179
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0);
180
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0);
181
1
  ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1));
182
1
  ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1));
183
1
  ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
184
1
  ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, stream);
185
1
  ccv_nnc_dynamic_graph_minimize(graph, CMD_ADD_FORWARD(1, 1), TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), 0, 2, stream);
186
1
  ccv_nnc_tensor_t* const hx0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
187
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x0, 0)), TENSOR_LIST(hx0), stream);
188
1
  ccv_nnc_stream_context_wait(stream);
189
1
  ccv_nnc_tensor_t* const hx1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
190
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x1, 0)), TENSOR_LIST(hx1), 0);
191
1
  REQUIRE_EQ_WITH_TOLERANCE(hx0->data.f32[0], 2 + 1.1 * 2, 1e-5, "should be equal");
192
1
  REQUIRE_EQ_WITH_TOLERANCE(hx1->data.f32[0], -1 + 1.1 * 2, 1e-5, "should be equal");
193
1
  ccv_nnc_dynamic_graph_free(graph);
194
1
  ccv_nnc_stream_context_free(stream);
195
1
  ccv_nnc_tensor_free(hx0);
196
1
  ccv_nnc_tensor_free(hx1);
197
1
}
198
199
TEST_CASE("dynamic graph memory reuse logic")
200
1
{
201
1
  ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new();
202
1
  ccv_nnc_tensor_variable_t a = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 128));
203
1
  ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_from_variable(graph, a);
204
1
  const intptr_t a_ptr = (intptr_t)a_tensor->data.u8;
205
1
  ccv_nnc_tensor_variable_free(graph, a);
206
1
  ccv_nnc_tensor_variable_t b = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 64));
207
1
  ccv_nnc_tensor_t* b_tensor = ccv_nnc_tensor_from_variable(graph, b);
208
1
  const intptr_t b_ptr = (intptr_t)b_tensor->data.u8;
209
1
  REQUIRE(a_ptr == b_ptr, "allocate to the same region, even though it is smaller");
210
1
  ccv_nnc_tensor_variable_t c = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 128));
211
1
  ccv_nnc_tensor_t* c_tensor = ccv_nnc_tensor_from_variable(graph, c);
212
1
  const intptr_t c_ptr = (intptr_t)c_tensor->data.u8;
213
1
  ccv_nnc_tensor_variable_free(graph, b);
214
1
  ccv_nnc_tensor_variable_free(graph, c);
215
1
  ccv_nnc_tensor_variable_t d = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 128));
216
1
  ccv_nnc_tensor_t* d_tensor = ccv_nnc_tensor_from_variable(graph, d);
217
1
  const intptr_t d_ptr = (intptr_t)d_tensor->data.u8;
218
1
  REQUIRE(c_ptr == d_ptr, "c freed last, it is the first to be reused");
219
1
  ccv_nnc_dynamic_graph_free(graph);
220
1
}
221
222
#include "case_main.h"