/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/dynamic.graph.tests.c
Line | Count | Source |
1 | | #include "case.h" |
2 | | #include "ccv_case.h" |
3 | | #include "ccv_nnc_case.h" |
4 | | #include <ccv.h> |
5 | | #include <nnc/ccv_nnc.h> |
6 | | #include <nnc/ccv_nnc_easy.h> |
7 | | #include <3rdparty/dsfmt/dSFMT.h> |
8 | | |
9 | | TEST_SETUP() |
10 | | { |
11 | | ccv_nnc_init(); |
12 | | } |
13 | | |
14 | | TEST_CASE("run dynamic graph on multiple streams") |
15 | | { |
16 | | GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 && |
17 | | ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN)); |
18 | | ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new(); |
19 | | ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
20 | | ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
21 | | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0); |
22 | | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0); |
23 | | ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
24 | | ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
25 | | ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, 0); |
26 | | ccv_nnc_tensor_t* const hy1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
27 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y1, 0)), TENSOR_LIST(hy1), 0); |
28 | | ccv_nnc_tensor_t* const hy0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
29 | | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y0, 0)), TENSOR_LIST(hy0), 0); |
30 | | REQUIRE_EQ_WITH_TOLERANCE(hy0->data.f32[0], 2.2, 1e-5, "should be equal"); |
31 | | REQUIRE_EQ_WITH_TOLERANCE(hy1->data.f32[0], -1.1, 1e-5, "should be equal"); |
32 | | ccv_nnc_dynamic_graph_free(graph); |
33 | | ccv_nnc_tensor_free(hy1); |
34 | | ccv_nnc_tensor_free(hy0); |
35 | | } |
36 | | |
37 | | TEST_CASE("async run dynamic graph on multiple streams, variant 1") |
38 | 1 | { |
39 | 1 | GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 && |
40 | 1 | ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN)); |
41 | 1 | ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new(); |
42 | 1 | ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
43 | 1 | ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
44 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0); |
45 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0); |
46 | 1 | ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
47 | 1 | ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
48 | 1 | ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU); |
49 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, stream); |
50 | 1 | ccv_nnc_tensor_t* const hy0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
51 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y0, stream)), TENSOR_LIST(hy0), stream); |
52 | 1 | ccv_nnc_stream_context_wait(stream); |
53 | 1 | ccv_nnc_tensor_t* const hy1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
54 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y1, 0)), TENSOR_LIST(hy1), 0); |
55 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hy0->data.f32[0], 2.2, 1e-5, "should be equal"); |
56 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hy1->data.f32[0], -1.1, 1e-5, "should be equal"); |
57 | 1 | ccv_nnc_dynamic_graph_free(graph); |
58 | 1 | ccv_nnc_stream_context_free(stream); |
59 | 1 | ccv_nnc_tensor_free(hy1); |
60 | 1 | ccv_nnc_tensor_free(hy0); |
61 | 1 | } |
62 | | |
63 | | TEST_CASE("async run dynamic graph on multiple streams, variant 2") |
64 | 1 | { |
65 | 1 | GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 && |
66 | 1 | ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN)); |
67 | 1 | ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new(); |
68 | 1 | ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
69 | 1 | ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
70 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0); |
71 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0); |
72 | 1 | ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
73 | 1 | ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
74 | 1 | ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU); |
75 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, stream); |
76 | 1 | ccv_nnc_stream_context_wait(stream); |
77 | 1 | ccv_nnc_tensor_t* const hy1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
78 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y1, 0)), TENSOR_LIST(hy1), 0); |
79 | 1 | ccv_nnc_tensor_t* const hy0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
80 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, y0, 0)), TENSOR_LIST(hy0), 0); |
81 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hy0->data.f32[0], 2.2, 1e-5, "should be equal"); |
82 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hy1->data.f32[0], -1.1, 1e-5, "should be equal"); |
83 | 1 | ccv_nnc_dynamic_graph_free(graph); |
84 | 1 | ccv_nnc_stream_context_free(stream); |
85 | 1 | ccv_nnc_tensor_free(hy1); |
86 | 1 | ccv_nnc_tensor_free(hy0); |
87 | 1 | } |
88 | | |
89 | | TEST_CASE("run dynamic graph backward & apply gradients on multiple devices") |
90 | 1 | { |
91 | 1 | GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 && |
92 | 1 | ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN)); |
93 | 1 | ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new(); |
94 | 1 | ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
95 | 1 | ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
96 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0); |
97 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0); |
98 | 1 | ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
99 | 1 | ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
100 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, 0); |
101 | 1 | ccv_nnc_tensor_variable_t const dx0 = ccv_nnc_tensor_variable_new(graph); |
102 | 1 | ccv_nnc_tensor_variable_t const dx1 = ccv_nnc_tensor_variable_new(graph); |
103 | 1 | ccv_nnc_dynamic_graph_backward(graph, TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(dx0, dx1), 0); |
104 | 1 | ccv_nnc_dynamic_graph_backward(graph, TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(dx0, dx1), 0); |
105 | 1 | ccv_nnc_dynamic_graph_apply_gradients(graph, CMD_ADD_FORWARD(1, 1), TENSOR_VARIABLE_LIST(dx0, dx1), TENSOR_VARIABLE_LIST(x0, x1), 0, 2, 0); |
106 | 1 | ccv_nnc_tensor_t* const hx0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
107 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x0, 0)), TENSOR_LIST(hx0), 0); |
108 | 1 | ccv_nnc_tensor_t* const hx1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
109 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x1, 0)), TENSOR_LIST(hx1), 0); |
110 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hx0->data.f32[0], 2 + 1.1 * 4, 1e-5, "should be equal"); |
111 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hx1->data.f32[0], -1 + 1.1 * 4, 1e-5, "should be equal"); |
112 | 1 | ccv_nnc_dynamic_graph_free(graph); |
113 | 1 | ccv_nnc_tensor_free(hx0); |
114 | 1 | ccv_nnc_tensor_free(hx1); |
115 | 1 | } |
116 | | |
117 | | TEST_CASE("async run dynamic graph backward & apply gradients on multiple devices") |
118 | 1 | { |
119 | 1 | GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 && |
120 | 1 | ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN)); |
121 | 1 | ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new(); |
122 | 1 | ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
123 | 1 | ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
124 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0); |
125 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0); |
126 | 1 | ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
127 | 1 | ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
128 | 1 | ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU); |
129 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, stream); |
130 | 1 | ccv_nnc_tensor_variable_t const dx0 = ccv_nnc_tensor_variable_new(graph); |
131 | 1 | ccv_nnc_tensor_variable_t const dx1 = ccv_nnc_tensor_variable_new(graph); |
132 | 1 | ccv_nnc_dynamic_graph_backward(graph, TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(dx0, dx1), stream); |
133 | 1 | ccv_nnc_dynamic_graph_backward(graph, TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(dx0, dx1), stream); |
134 | 1 | ccv_nnc_dynamic_graph_apply_gradients(graph, CMD_ADD_FORWARD(1, 1), TENSOR_VARIABLE_LIST(dx0, dx1), TENSOR_VARIABLE_LIST(x0, x1), 0, 2, stream); |
135 | 1 | ccv_nnc_tensor_t* const hx0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
136 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x0, 0)), TENSOR_LIST(hx0), stream); |
137 | 1 | ccv_nnc_stream_context_wait(stream); |
138 | 1 | ccv_nnc_tensor_t* const hx1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
139 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x1, 0)), TENSOR_LIST(hx1), 0); |
140 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hx0->data.f32[0], 2 + 1.1 * 4, 1e-5, "should be equal"); |
141 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hx1->data.f32[0], -1 + 1.1 * 4, 1e-5, "should be equal"); |
142 | 1 | ccv_nnc_dynamic_graph_free(graph); |
143 | 1 | ccv_nnc_stream_context_free(stream); |
144 | 1 | ccv_nnc_tensor_free(hx0); |
145 | 1 | ccv_nnc_tensor_free(hx1); |
146 | 1 | } |
147 | | |
148 | | TEST_CASE("run dynamic graph minimize on multiple devices") |
149 | 1 | { |
150 | 1 | GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 && |
151 | 1 | ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN)); |
152 | 1 | ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new(); |
153 | 1 | ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
154 | 1 | ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
155 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0); |
156 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0); |
157 | 1 | ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
158 | 1 | ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
159 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, 0); |
160 | 1 | ccv_nnc_dynamic_graph_minimize(graph, CMD_ADD_FORWARD(1, 1), TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), 0, 2, 0); |
161 | 1 | ccv_nnc_tensor_t* const hx0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
162 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x0, 0)), TENSOR_LIST(hx0), 0); |
163 | 1 | ccv_nnc_tensor_t* const hx1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
164 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x1, 0)), TENSOR_LIST(hx1), 0); |
165 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hx0->data.f32[0], 2 + 1.1 * 2, 1e-5, "should be equal"); |
166 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hx1->data.f32[0], -1 + 1.1 * 2, 1e-5, "should be equal"); |
167 | 1 | ccv_nnc_dynamic_graph_free(graph); |
168 | 1 | ccv_nnc_tensor_free(hx0); |
169 | 1 | ccv_nnc_tensor_free(hx1); |
170 | 1 | } |
171 | | |
172 | | TEST_CASE("async run dynamic graph minimize on multiple devices") |
173 | 1 | { |
174 | 1 | GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2 && |
175 | 1 | ccv_nnc_cmd_ok(CCV_NNC_SCALAR_MUL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN)); |
176 | 1 | ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new(); |
177 | 1 | ccv_nnc_tensor_variable_t const x0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
178 | 1 | ccv_nnc_tensor_variable_t const x1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
179 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(2), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x0), 0, 0); |
180 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SET_FORWARD(-1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(x1), 0, 0); |
181 | 1 | ccv_nnc_tensor_variable_t const y0 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 1)); |
182 | 1 | ccv_nnc_tensor_variable_t const y1 = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(001, 32F, 1)); |
183 | 1 | ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU); |
184 | 1 | ccv_nnc_dynamic_graph_exec(graph, CMD_SCALAR_MUL_FORWARD(1.1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(x0, x1), TENSOR_VARIABLE_LIST(y0, y1), 2, stream); |
185 | 1 | ccv_nnc_dynamic_graph_minimize(graph, CMD_ADD_FORWARD(1, 1), TENSOR_VARIABLE_LIST(y0, y1), 0, TENSOR_VARIABLE_LIST(x0, x1), 0, 2, stream); |
186 | 1 | ccv_nnc_tensor_t* const hx0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
187 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x0, 0)), TENSOR_LIST(hx0), stream); |
188 | 1 | ccv_nnc_stream_context_wait(stream); |
189 | 1 | ccv_nnc_tensor_t* const hx1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0); |
190 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(graph, x1, 0)), TENSOR_LIST(hx1), 0); |
191 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hx0->data.f32[0], 2 + 1.1 * 2, 1e-5, "should be equal"); |
192 | 1 | REQUIRE_EQ_WITH_TOLERANCE(hx1->data.f32[0], -1 + 1.1 * 2, 1e-5, "should be equal"); |
193 | 1 | ccv_nnc_dynamic_graph_free(graph); |
194 | 1 | ccv_nnc_stream_context_free(stream); |
195 | 1 | ccv_nnc_tensor_free(hx0); |
196 | 1 | ccv_nnc_tensor_free(hx1); |
197 | 1 | } |
198 | | |
199 | | TEST_CASE("dynamic graph memory reuse logic") |
200 | 1 | { |
201 | 1 | ccv_nnc_dynamic_graph_t* const graph = ccv_nnc_dynamic_graph_new(); |
202 | 1 | ccv_nnc_tensor_variable_t a = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 128)); |
203 | 1 | ccv_nnc_tensor_t* a_tensor = ccv_nnc_tensor_from_variable(graph, a); |
204 | 1 | const intptr_t a_ptr = (intptr_t)a_tensor->data.u8; |
205 | 1 | ccv_nnc_tensor_variable_free(graph, a); |
206 | 1 | ccv_nnc_tensor_variable_t b = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 64)); |
207 | 1 | ccv_nnc_tensor_t* b_tensor = ccv_nnc_tensor_from_variable(graph, b); |
208 | 1 | const intptr_t b_ptr = (intptr_t)b_tensor->data.u8; |
209 | 1 | REQUIRE(a_ptr == b_ptr, "allocate to the same region, even though it is smaller"); |
210 | 1 | ccv_nnc_tensor_variable_t c = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 128)); |
211 | 1 | ccv_nnc_tensor_t* c_tensor = ccv_nnc_tensor_from_variable(graph, c); |
212 | 1 | const intptr_t c_ptr = (intptr_t)c_tensor->data.u8; |
213 | 1 | ccv_nnc_tensor_variable_free(graph, b); |
214 | 1 | ccv_nnc_tensor_variable_free(graph, c); |
215 | 1 | ccv_nnc_tensor_variable_t d = ccv_nnc_tensor_variable_new(graph, GPU_TENSOR_NHWC(000, 32F, 128)); |
216 | 1 | ccv_nnc_tensor_t* d_tensor = ccv_nnc_tensor_from_variable(graph, d); |
217 | 1 | const intptr_t d_ptr = (intptr_t)d_tensor->data.u8; |
218 | 1 | REQUIRE(c_ptr == d_ptr, "c freed last, it is the first to be reused"); |
219 | 1 | ccv_nnc_dynamic_graph_free(graph); |
220 | 1 | } |
221 | | |
222 | | #include "case_main.h" |