Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/parallel.tests.c
Line
Count
Source (jump to first uncovered line)
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("schedule symbolic graph to data parallel with broadcast and reduce")
15
1
{
16
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_CONVOLUTION_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
17
1
  GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2);
18
1
  ccv_nnc_tensor_t* updated[4];
19
1
  ccv_nnc_tensor_t* cpu_inputs[2];
20
1
  ccv_nnc_tensor_t* cpu_fits[2];
21
1
  ccv_nnc_tensor_t* w1_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 3, 5, 5), 0);
22
1
  ccv_nnc_tensor_t* w3_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 8, 5, 5), 0);
23
1
  {
24
1
    ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
25
1
    const ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 3, 32, 32), 0);
26
1
    const ccv_nnc_tensor_symbol_t w1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8, 3, 5, 5), 0);
27
1
    const ccv_nnc_tensor_symbol_t bias1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8), 0);
28
1
    const ccv_nnc_tensor_symbol_t y1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 8, 32, 32), 0);
29
1
    const ccv_nnc_graph_exec_symbol_t conv1 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_CONVOLUTION_FORWARD(1, 8, 5, 5), TENSOR_SYMBOL_LIST(x, w1, bias1), TENSOR_SYMBOL_LIST(y1), "conv1");
30
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, conv1, HINT((1, 1), (2, 2)));
31
1
    const ccv_nnc_tensor_symbol_t y2 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 8, 16, 16), 0);
32
1
    const ccv_nnc_graph_exec_symbol_t avg2 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_AVERAGE_POOL_FORWARD(2, 2), TENSOR_SYMBOL_LIST(y1), TENSOR_SYMBOL_LIST(y2), "avg2");
33
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, avg2, HINT((2, 2)));
34
1
    const ccv_nnc_tensor_symbol_t w3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8, 8, 5, 5), 0);
35
1
    const ccv_nnc_tensor_symbol_t bias3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8), 0);
36
1
    const ccv_nnc_tensor_symbol_t y3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 8, 8, 8), 0);
37
1
    const ccv_nnc_graph_exec_symbol_t conv3 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_CONVOLUTION_FORWARD(1, 8, 5, 5), TENSOR_SYMBOL_LIST(y2, w3, bias3), TENSOR_SYMBOL_LIST(y3), "conv3");
38
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, conv3, HINT((2, 2), (2, 2)));
39
1
    const ccv_nnc_tensor_symbol_t y4 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 8, 1, 1), 0);
40
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_AVERAGE_POOL_FORWARD(8, 8), TENSOR_SYMBOL_LIST(y3), TENSOR_SYMBOL_LIST(y4), "avg4");
41
1
    const ccv_nnc_tensor_symbol_t y4a = ccv_nnc_tensor_symbol_alias_new(symbolic_graph, y4, ccv_nnc_no_ofs, DIM_ALLOC(8, 1, 1, 1), GPU_TENSOR_NCHW(000, 32F, 16, 8), 0);
42
1
    const ccv_nnc_tensor_symbol_t label = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16), "label");
43
1
    const ccv_nnc_tensor_symbol_t y5 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 8), "y5");
44
1
    const ccv_nnc_tensor_symbol_t loss = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16), "loss");
45
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SOFTMAX_CROSSENTROPY_FORWARD(), TENSOR_SYMBOL_LIST(y4a, label), TENSOR_SYMBOL_LIST(loss, y5), "softmax crossentropy");
46
1
    ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
47
1
    ccv_nnc_tensor_symbol_t updated_params[4];
48
1
    ccv_nnc_tensor_symbol_t gradients[4];
49
1
    const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9));
50
1
    ccv_nnc_tensor_symbol_map_t saved_aux[saved_aux_size * 4];
51
1
    ccv_nnc_graph_exec_symbol_t updated_execs[4];
52
1
    ccv_nnc_symbolic_graph_minimize(symbolic_graph, CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9), TENSOR_SYMBOL_LIST(loss), TENSOR_SYMBOL_LIST(w1, bias1, w3, bias3), 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), gradients, updated_params, saved_aux, updated_execs);
53
1
    const ccv_nnc_tensor_symbol_t dloss = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, loss);
54
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SET_FORWARD(1), TENSOR_SYMBOL_LIST(), TENSOR_SYMBOL_LIST(dloss), "set 1");
55
1
    int i;
56
5
    for (i = 0; i < saved_aux_size * 4; 
i++4
)
57
4
      ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SET_FORWARD(0), TENSOR_SYMBOL_LIST(), TENSOR_SYMBOL_LIST(saved_aux[i].source), "set 0");
58
1
    ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
59
1
    ccv_nnc_symbolic_graph_data_parallel(symbolic_graph, 2, TENSOR_SYMBOL_LIST(w1, bias1, w3, bias3), 0, 0, 0, gradients, 4, 0, CCV_NNC_PARALLEL_REDUCE_OP_SUM, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), updated_execs, 4);
60
1
    ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
61
1
    SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
62
1
    ccv_nnc_graph_t* graph;
63
1
    ccv_nnc_tensor_arena_t* tensor_arena;
64
1
    ccv_nnc_graph_exec_arena_t* graph_exec_arena;
65
1
    ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params,
66
1
      0, 0,
67
1
      updated_params, 4,
68
1
      SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph),
69
1
      &graph, &tensor_arena, &graph_exec_arena);
70
1
    ccv_nnc_graph_set_default_static_schedule(graph, CCV_STREAM_CONTEXT_GPU, 0);
71
1
    GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
72
1
    cpu_inputs[0] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 16, 3, 32, 32), 0);
73
1
    cpu_inputs[1] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 16, 3, 32, 32), 0);
74
1
    dsfmt_t dsfmt;
75
1
    dsfmt_init_gen_rand(&dsfmt, 0);
76
49.1k
    for (i = 0; i < 16 * 3 * 32 * 32; 
i++49.1k
)
77
49.1k
      cpu_inputs[0]->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
78
49.1k
    for (i = 0; i < 16 * 3 * 32 * 32; 
i++49.1k
)
79
49.1k
      cpu_inputs[1]->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
80
1
    cpu_fits[0] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 16), 0);
81
1
    cpu_fits[1] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 16), 0);
82
17
    for (i = 0; i < 16; 
i++16
)
83
16
      cpu_fits[0]->data.f32[i] = cpu_fits[1]->data.f32[i] = (int)(dsfmt_genrand_open_close(&dsfmt) * 7.4); // Between 0 to 7.
84
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(cpu_inputs[0], cpu_inputs[1]), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, x), ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(symbolic_graph, x, 1))), 0);
85
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(cpu_fits[0], cpu_fits[1]), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, label), ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(symbolic_graph, label, 1))), 0);
86
1
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, bias1), ccv_nnc_tensor_from_symbol(tensor_arena, bias3)), 0);
87
601
    for (i = 0; i < 8 * 3 * 5 * 5; 
i++600
)
88
600
      w1_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
89
1.60k
    for (i = 0; i < 8 * 8 * 5 * 5; 
i++1.60k
)
90
1.60k
      w3_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
91
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(w1_tensor, w3_tensor), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, w1), ccv_nnc_tensor_from_symbol(tensor_arena, w3)), 0);
92
1
    ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, ccv_nnc_graph_default_stream(graph));
93
1
    ccv_nnc_stream_context_wait(ccv_nnc_graph_default_stream(graph));
94
1
    updated[0] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 3, 5, 5), 0);
95
1
    updated[1] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8), 0);
96
1
    updated[2] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 8, 5, 5), 0);
97
1
    updated[3] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8), 0);
98
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[0]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[1]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[2]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[3])), updated, 4, 0);
99
1
    ccv_nnc_symbolic_graph_free(symbolic_graph);
100
1
    ccv_nnc_graph_free(graph);
101
1
    ccv_nnc_tensor_arena_free(tensor_arena);
102
1
    ccv_nnc_graph_exec_arena_free(graph_exec_arena);
103
1
  }
104
  // Now, doing exactly the same, but with no parallel.
105
1
  {
106
1
    ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
107
1
    const ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 3, 32, 32), 0);
108
1
    const ccv_nnc_tensor_symbol_t w1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8, 3, 5, 5), 0);
109
1
    const ccv_nnc_tensor_symbol_t bias1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8), 0);
110
1
    const ccv_nnc_tensor_symbol_t y1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 8, 32, 32), 0);
111
1
    const ccv_nnc_graph_exec_symbol_t conv1 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_CONVOLUTION_FORWARD(1, 8, 5, 5), TENSOR_SYMBOL_LIST(x, w1, bias1), TENSOR_SYMBOL_LIST(y1), "conv1");
112
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, conv1, HINT((1, 1), (2, 2)));
113
1
    const ccv_nnc_tensor_symbol_t y2 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 8, 16, 16), 0);
114
1
    const ccv_nnc_graph_exec_symbol_t avg2 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_AVERAGE_POOL_FORWARD(2, 2), TENSOR_SYMBOL_LIST(y1), TENSOR_SYMBOL_LIST(y2), "avg2");
115
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, avg2, HINT((2, 2)));
116
1
    const ccv_nnc_tensor_symbol_t w3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8, 8, 5, 5), 0);
117
1
    const ccv_nnc_tensor_symbol_t bias3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8), 0);
118
1
    const ccv_nnc_tensor_symbol_t y3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 8, 8, 8), 0);
119
1
    const ccv_nnc_graph_exec_symbol_t conv3 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_CONVOLUTION_FORWARD(1, 8, 5, 5), TENSOR_SYMBOL_LIST(y2, w3, bias3), TENSOR_SYMBOL_LIST(y3), "conv3");
120
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, conv3, HINT((2, 2), (2, 2)));
121
1
    const ccv_nnc_tensor_symbol_t y4 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 8, 1, 1), 0);
122
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_AVERAGE_POOL_FORWARD(8, 8), TENSOR_SYMBOL_LIST(y3), TENSOR_SYMBOL_LIST(y4), "avg4");
123
1
    const ccv_nnc_tensor_symbol_t label = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32), "label");
124
1
    const ccv_nnc_tensor_symbol_t y5 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 8), "y5");
125
1
    const ccv_nnc_tensor_symbol_t loss = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32), "loss");
126
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SOFTMAX_CROSSENTROPY_FORWARD(), TENSOR_SYMBOL_LIST(y4, label), TENSOR_SYMBOL_LIST(loss, y5), "softmax crossentropy");
127
1
    ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
128
1
    ccv_nnc_tensor_symbol_t updated_params[4];
129
1
    ccv_nnc_tensor_symbol_t gradients[4];
130
1
    const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9));
131
1
    ccv_nnc_tensor_symbol_map_t saved_aux[saved_aux_size * 4];
132
1
    ccv_nnc_graph_exec_symbol_t updated_execs[4];
133
1
    ccv_nnc_symbolic_graph_minimize(symbolic_graph, CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9), TENSOR_SYMBOL_LIST(loss), TENSOR_SYMBOL_LIST(w1, bias1, w3, bias3), 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), gradients, updated_params, saved_aux, updated_execs);
134
1
    const ccv_nnc_tensor_symbol_t dloss = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, loss);
135
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SET_FORWARD(1), TENSOR_SYMBOL_LIST(), TENSOR_SYMBOL_LIST(dloss), "set 1");
136
1
    int i;
137
5
    for (i = 0; i < saved_aux_size * 4; 
i++4
)
138
4
      ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SET_FORWARD(0), TENSOR_SYMBOL_LIST(), TENSOR_SYMBOL_LIST(saved_aux[i].source), "set 0");
139
1
    ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
140
1
    ccv_nnc_graph_t* graph;
141
1
    ccv_nnc_tensor_arena_t* tensor_arena;
142
1
    ccv_nnc_graph_exec_arena_t* graph_exec_arena;
143
1
    ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params,
144
1
      0, 0,
145
1
      updated_params, 4,
146
1
      SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph),
147
1
      &graph, &tensor_arena, &graph_exec_arena);
148
1
    ccv_nnc_tensor_t* cpu_input = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 32, 3, 32, 32), 0);
149
1
    memcpy(cpu_input->data.f32, cpu_inputs[0]->data.f32, sizeof(float) * 16 * 3 * 32 * 32);
150
1
    memcpy(cpu_input->data.f32 + 16 * 3 * 32 * 32, cpu_inputs[1]->data.f32, sizeof(float) * 16 * 3 * 32 * 32);
151
1
    ccv_nnc_tensor_t* cpu_fit = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 32), 0);
152
1
    memcpy(cpu_fit->data.f32, cpu_fits[0]->data.f32, sizeof(float) * 16);
153
1
    memcpy(cpu_fit->data.f32 + 16, cpu_fits[1]->data.f32, sizeof(float) * 16);
154
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(cpu_input), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, x)), 0);
155
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(cpu_fit), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, label)), 0);
156
1
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, bias1), ccv_nnc_tensor_from_symbol(tensor_arena, bias3)), 0);
157
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(w1_tensor, w3_tensor), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, w1), ccv_nnc_tensor_from_symbol(tensor_arena, w3)), 0);
158
1
    ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
159
1
    ccv_nnc_tensor_t* np_updated[4];
160
1
    np_updated[0] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 3, 5, 5), 0);
161
1
    np_updated[1] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8), 0);
162
1
    np_updated[2] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 8, 5, 5), 0);
163
1
    np_updated[3] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8), 0);
164
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[0]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[1]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[2]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[3])), np_updated, 4, 0);
165
1
    ccv_nnc_symbolic_graph_free(symbolic_graph);
166
1
    ccv_nnc_graph_free(graph);
167
1
    ccv_nnc_tensor_arena_free(tensor_arena);
168
1
    ccv_nnc_graph_exec_arena_free(graph_exec_arena);
169
1
    REQUIRE_TENSOR_EQ(np_updated[0], updated[0], "updated params should be equal");
170
1
    REQUIRE_TENSOR_EQ(np_updated[1], updated[1], "updated params should be equal");
171
1
    REQUIRE_TENSOR_EQ(np_updated[2], updated[2], "updated params should be equal");
172
1
    REQUIRE_TENSOR_EQ(np_updated[3], updated[3], "updated params should be equal");
173
1
    ccv_nnc_tensor_free(cpu_input);
174
1
    ccv_nnc_tensor_free(cpu_fit);
175
1
    ccv_nnc_tensor_free(np_updated[0]);
176
1
    ccv_nnc_tensor_free(np_updated[1]);
177
1
    ccv_nnc_tensor_free(np_updated[2]);
178
1
    ccv_nnc_tensor_free(np_updated[3]);
179
1
  }
180
0
  ccv_nnc_tensor_free(updated[0]);
181
1
  ccv_nnc_tensor_free(updated[1]);
182
1
  ccv_nnc_tensor_free(updated[2]);
183
1
  ccv_nnc_tensor_free(updated[3]);
184
1
  ccv_nnc_tensor_free(cpu_inputs[0]);
185
1
  ccv_nnc_tensor_free(cpu_inputs[1]);
186
1
  ccv_nnc_tensor_free(cpu_fits[0]);
187
1
  ccv_nnc_tensor_free(cpu_fits[1]);
188
1
  ccv_nnc_tensor_free(w1_tensor);
189
1
  ccv_nnc_tensor_free(w3_tensor);
190
1
}
191
192
TEST_CASE("schedule symbolic graph to data parallel with allreduce")
193
1
{
194
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_CONVOLUTION_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN));
195
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_COMM_ALLREDUCE_FORWARD, CCV_NNC_BACKEND_GPU_NCCL));
196
1
  GUARD_ELSE_RETURN(ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU) >= 2);
197
1
  ccv_nnc_tensor_t* updated[4];
198
1
  ccv_nnc_tensor_t* cpu_inputs[2];
199
1
  ccv_nnc_tensor_t* cpu_fits[2];
200
1
  ccv_nnc_tensor_t* w1_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 3, 5, 5), 0);
201
1
  ccv_nnc_tensor_t* w3_tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 8, 5, 5), 0);
202
1
  {
203
1
    ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
204
1
    const ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 3, 32, 32), 0);
205
1
    const ccv_nnc_tensor_symbol_t w1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8, 3, 5, 5), 0);
206
1
    const ccv_nnc_tensor_symbol_t bias1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8), 0);
207
1
    const ccv_nnc_tensor_symbol_t y1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 8, 32, 32), 0);
208
1
    const ccv_nnc_graph_exec_symbol_t conv1 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_CONVOLUTION_FORWARD(1, 8, 5, 5), TENSOR_SYMBOL_LIST(x, w1, bias1), TENSOR_SYMBOL_LIST(y1), "conv1");
209
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, conv1, HINT((1, 1), (2, 2)));
210
1
    const ccv_nnc_tensor_symbol_t y2 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 8, 16, 16), 0);
211
1
    const ccv_nnc_graph_exec_symbol_t avg2 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_AVERAGE_POOL_FORWARD(2, 2), TENSOR_SYMBOL_LIST(y1), TENSOR_SYMBOL_LIST(y2), "avg2");
212
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, avg2, HINT((2, 2)));
213
1
    const ccv_nnc_tensor_symbol_t w3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8, 8, 5, 5), 0);
214
1
    const ccv_nnc_tensor_symbol_t bias3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8), 0);
215
1
    const ccv_nnc_tensor_symbol_t y3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 8, 8, 8), 0);
216
1
    const ccv_nnc_graph_exec_symbol_t conv3 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_CONVOLUTION_FORWARD(1, 8, 5, 5), TENSOR_SYMBOL_LIST(y2, w3, bias3), TENSOR_SYMBOL_LIST(y3), "conv3");
217
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, conv3, HINT((2, 2), (2, 2)));
218
1
    const ccv_nnc_tensor_symbol_t y4 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 8, 1, 1), 0);
219
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_AVERAGE_POOL_FORWARD(8, 8), TENSOR_SYMBOL_LIST(y3), TENSOR_SYMBOL_LIST(y4), "avg4");
220
1
    const ccv_nnc_tensor_symbol_t y4a = ccv_nnc_tensor_symbol_alias_new(symbolic_graph, y4, ccv_nnc_no_ofs, DIM_ALLOC(8, 1, 1, 1), GPU_TENSOR_NCHW(000, 32F, 16, 8), 0);
221
1
    const ccv_nnc_tensor_symbol_t label = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16), "label");
222
1
    const ccv_nnc_tensor_symbol_t y5 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16, 8), "y5");
223
1
    const ccv_nnc_tensor_symbol_t loss = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 16), "loss");
224
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SOFTMAX_CROSSENTROPY_FORWARD(), TENSOR_SYMBOL_LIST(y4a, label), TENSOR_SYMBOL_LIST(loss, y5), "softmax crossentropy");
225
1
    ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
226
1
    ccv_nnc_tensor_symbol_t updated_params[4];
227
1
    ccv_nnc_tensor_symbol_t gradients[4];
228
1
    const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9));
229
1
    ccv_nnc_tensor_symbol_map_t saved_aux[saved_aux_size * 4];
230
1
    ccv_nnc_graph_exec_symbol_t updated_execs[4];
231
1
    ccv_nnc_symbolic_graph_minimize(symbolic_graph, CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9), TENSOR_SYMBOL_LIST(loss), TENSOR_SYMBOL_LIST(w1, bias1, w3, bias3), 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), gradients, updated_params, saved_aux, updated_execs);
232
1
    const ccv_nnc_tensor_symbol_t dloss = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, loss);
233
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SET_FORWARD(1), TENSOR_SYMBOL_LIST(), TENSOR_SYMBOL_LIST(dloss), "set 1");
234
1
    int i;
235
5
    for (i = 0; i < saved_aux_size * 4; 
i++4
)
236
4
      ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SET_FORWARD(0), TENSOR_SYMBOL_LIST(), TENSOR_SYMBOL_LIST(saved_aux[i].source), "set 0");
237
1
    ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
238
1
    ccv_nnc_symbolic_graph_data_parallel(symbolic_graph, 2, TENSOR_SYMBOL_LIST(w1, bias1, w3, bias3), gradients, 4, 0, 0, 0, 0, CCV_NNC_PARALLEL_REDUCE_OP_SUM, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), updated_execs, 4);
239
1
    ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
240
1
    SYMBOLIC_GRAPH_GEN(symbolic_graph, CCV_NNC_LONG_DOT_GRAPH);
241
1
    ccv_nnc_graph_t* graph;
242
1
    ccv_nnc_tensor_arena_t* tensor_arena;
243
1
    ccv_nnc_graph_exec_arena_t* graph_exec_arena;
244
1
    ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params,
245
1
      0, 0,
246
1
      updated_params, 4,
247
1
      SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph),
248
1
      &graph, &tensor_arena, &graph_exec_arena);
249
1
    ccv_nnc_graph_set_default_static_schedule(graph, CCV_STREAM_CONTEXT_GPU, 0);
250
1
    GRAPH_GEN(graph, CCV_NNC_LONG_DOT_GRAPH);
251
1
    cpu_inputs[0] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 16, 3, 32, 32), 0);
252
1
    cpu_inputs[1] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 16, 3, 32, 32), 0);
253
1
    dsfmt_t dsfmt;
254
1
    dsfmt_init_gen_rand(&dsfmt, 0);
255
49.1k
    for (i = 0; i < 16 * 3 * 32 * 32; 
i++49.1k
)
256
49.1k
      cpu_inputs[0]->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
257
49.1k
    for (i = 0; i < 16 * 3 * 32 * 32; 
i++49.1k
)
258
49.1k
      cpu_inputs[1]->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
259
1
    cpu_fits[0] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 16), 0);
260
1
    cpu_fits[1] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 16), 0);
261
17
    for (i = 0; i < 16; 
i++16
)
262
16
      cpu_fits[0]->data.f32[i] = cpu_fits[1]->data.f32[i] = (int)(dsfmt_genrand_open_close(&dsfmt) * 7.4); // Between 0 to 7.
263
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(cpu_inputs[0], cpu_inputs[1]), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, x), ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(symbolic_graph, x, 1))), 0);
264
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(cpu_fits[0], cpu_fits[1]), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, label), ccv_nnc_tensor_from_symbol(tensor_arena, ccv_nnc_tensor_symbol_copy(symbolic_graph, label, 1))), 0);
265
1
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, bias1), ccv_nnc_tensor_from_symbol(tensor_arena, bias3)), 0);
266
601
    for (i = 0; i < 8 * 3 * 5 * 5; 
i++600
)
267
600
      w1_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
268
1.60k
    for (i = 0; i < 8 * 8 * 5 * 5; 
i++1.60k
)
269
1.60k
      w3_tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
270
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(w1_tensor, w3_tensor), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, w1), ccv_nnc_tensor_from_symbol(tensor_arena, w3)), 0);
271
1
    ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, ccv_nnc_graph_default_stream(graph));
272
1
    ccv_nnc_stream_context_wait(ccv_nnc_graph_default_stream(graph));
273
1
    updated[0] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 3, 5, 5), 0);
274
1
    updated[1] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8), 0);
275
1
    updated[2] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 8, 5, 5), 0);
276
1
    updated[3] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8), 0);
277
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[0]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[1]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[2]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[3])), updated, 4, 0);
278
1
    ccv_nnc_symbolic_graph_free(symbolic_graph);
279
1
    ccv_nnc_graph_free(graph);
280
1
    ccv_nnc_tensor_arena_free(tensor_arena);
281
1
    ccv_nnc_graph_exec_arena_free(graph_exec_arena);
282
1
  }
283
  // Now, doing exactly the same, but with no parallel.
284
1
  {
285
1
    ccv_nnc_symbolic_graph_t* const symbolic_graph = ccv_nnc_symbolic_graph_new();
286
1
    const ccv_nnc_tensor_symbol_t x = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 3, 32, 32), 0);
287
1
    const ccv_nnc_tensor_symbol_t w1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8, 3, 5, 5), 0);
288
1
    const ccv_nnc_tensor_symbol_t bias1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8), 0);
289
1
    const ccv_nnc_tensor_symbol_t y1 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 8, 32, 32), 0);
290
1
    const ccv_nnc_graph_exec_symbol_t conv1 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_CONVOLUTION_FORWARD(1, 8, 5, 5), TENSOR_SYMBOL_LIST(x, w1, bias1), TENSOR_SYMBOL_LIST(y1), "conv1");
291
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, conv1, HINT((1, 1), (2, 2)));
292
1
    const ccv_nnc_tensor_symbol_t y2 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 8, 16, 16), 0);
293
1
    const ccv_nnc_graph_exec_symbol_t avg2 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_AVERAGE_POOL_FORWARD(2, 2), TENSOR_SYMBOL_LIST(y1), TENSOR_SYMBOL_LIST(y2), "avg2");
294
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, avg2, HINT((2, 2)));
295
1
    const ccv_nnc_tensor_symbol_t w3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8, 8, 5, 5), 0);
296
1
    const ccv_nnc_tensor_symbol_t bias3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 8), 0);
297
1
    const ccv_nnc_tensor_symbol_t y3 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 8, 8, 8), 0);
298
1
    const ccv_nnc_graph_exec_symbol_t conv3 = ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_CONVOLUTION_FORWARD(1, 8, 5, 5), TENSOR_SYMBOL_LIST(y2, w3, bias3), TENSOR_SYMBOL_LIST(y3), "conv3");
299
1
    ccv_nnc_graph_exec_symbol_set_hint(symbolic_graph, conv3, HINT((2, 2), (2, 2)));
300
1
    const ccv_nnc_tensor_symbol_t y4 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 8, 1, 1), 0);
301
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_AVERAGE_POOL_FORWARD(8, 8), TENSOR_SYMBOL_LIST(y3), TENSOR_SYMBOL_LIST(y4), "avg4");
302
1
    const ccv_nnc_tensor_symbol_t label = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32), "label");
303
1
    const ccv_nnc_tensor_symbol_t y5 = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32, 8), "y5");
304
1
    const ccv_nnc_tensor_symbol_t loss = ccv_nnc_tensor_symbol_new(symbolic_graph, GPU_TENSOR_NCHW(000, 32F, 32), "loss");
305
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SOFTMAX_CROSSENTROPY_FORWARD(), TENSOR_SYMBOL_LIST(y4, label), TENSOR_SYMBOL_LIST(loss, y5), "softmax crossentropy");
306
1
    ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_ALL_EXECS | CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
307
1
    ccv_nnc_tensor_symbol_t updated_params[4];
308
1
    ccv_nnc_tensor_symbol_t gradients[4];
309
1
    const int saved_aux_size = ccv_nnc_minimizer_saved_aux_size(CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9));
310
1
    ccv_nnc_tensor_symbol_map_t saved_aux[saved_aux_size * 4];
311
1
    ccv_nnc_graph_exec_symbol_t updated_execs[4];
312
1
    ccv_nnc_symbolic_graph_minimize(symbolic_graph, CMD_SGD_FORWARD(0, 0.001, 1, 0.99, 0.9, 0.9), TENSOR_SYMBOL_LIST(loss), TENSOR_SYMBOL_LIST(w1, bias1, w3, bias3), 0, 0, SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph), gradients, updated_params, saved_aux, updated_execs);
313
1
    const ccv_nnc_tensor_symbol_t dloss = ccv_nnc_tensor_symbol_for_backward(symbolic_graph, loss);
314
1
    ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SET_FORWARD(1), TENSOR_SYMBOL_LIST(), TENSOR_SYMBOL_LIST(dloss), "set 1");
315
1
    int i;
316
5
    for (i = 0; i < saved_aux_size * 4; 
i++4
)
317
4
      ccv_nnc_graph_exec_symbol_new(symbolic_graph, CMD_SET_FORWARD(0), TENSOR_SYMBOL_LIST(), TENSOR_SYMBOL_LIST(saved_aux[i].source), "set 0");
318
1
    ccv_nnc_graph_exec_symbol_autogen(symbolic_graph, 0, 0, CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS);
319
1
    ccv_nnc_graph_t* graph;
320
1
    ccv_nnc_tensor_arena_t* tensor_arena;
321
1
    ccv_nnc_graph_exec_arena_t* graph_exec_arena;
322
1
    ccv_nnc_symbolic_graph_compile(symbolic_graph, ccv_nnc_default_compile_params,
323
1
      0, 0,
324
1
      updated_params, 4,
325
1
      SYMBOLIC_GRAPH_SOURCES(symbolic_graph), SYMBOLIC_GRAPH_DESTINATIONS(symbolic_graph),
326
1
      &graph, &tensor_arena, &graph_exec_arena);
327
1
    ccv_nnc_tensor_t* cpu_input = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 32, 3, 32, 32), 0);
328
1
    memcpy(cpu_input->data.f32, cpu_inputs[0]->data.f32, sizeof(float) * 16 * 3 * 32 * 32);
329
1
    memcpy(cpu_input->data.f32 + 16 * 3 * 32 * 32, cpu_inputs[1]->data.f32, sizeof(float) * 16 * 3 * 32 * 32);
330
1
    ccv_nnc_tensor_t* cpu_fit = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 32), 0);
331
1
    memcpy(cpu_fit->data.f32, cpu_fits[0]->data.f32, sizeof(float) * 16);
332
1
    memcpy(cpu_fit->data.f32 + 16, cpu_fits[1]->data.f32, sizeof(float) * 16);
333
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(cpu_input), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, x)), 0);
334
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(cpu_fit), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, label)), 0);
335
1
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, bias1), ccv_nnc_tensor_from_symbol(tensor_arena, bias3)), 0);
336
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(w1_tensor, w3_tensor), TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, w1), ccv_nnc_tensor_from_symbol(tensor_arena, w3)), 0);
337
1
    ccv_nnc_graph_run(graph, 0, TRAVERSE_FULL, 0, 0);
338
1
    ccv_nnc_tensor_t* np_updated[4];
339
1
    np_updated[0] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 3, 5, 5), 0);
340
1
    np_updated[1] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8), 0);
341
1
    np_updated[2] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8, 8, 5, 5), 0);
342
1
    np_updated[3] = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 8), 0);
343
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[0]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[1]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[2]), ccv_nnc_tensor_from_symbol(tensor_arena, updated_params[3])), np_updated, 4, 0);
344
1
    ccv_nnc_symbolic_graph_free(symbolic_graph);
345
1
    ccv_nnc_graph_free(graph);
346
1
    ccv_nnc_tensor_arena_free(tensor_arena);
347
1
    ccv_nnc_graph_exec_arena_free(graph_exec_arena);
348
1
    REQUIRE_TENSOR_EQ(np_updated[0], updated[0], "updated params should be equal");
349
1
    REQUIRE_TENSOR_EQ(np_updated[1], updated[1], "updated params should be equal");
350
1
    REQUIRE_TENSOR_EQ(np_updated[2], updated[2], "updated params should be equal");
351
1
    REQUIRE_TENSOR_EQ(np_updated[3], updated[3], "updated params should be equal");
352
1
    ccv_nnc_tensor_free(cpu_input);
353
1
    ccv_nnc_tensor_free(cpu_fit);
354
1
    ccv_nnc_tensor_free(np_updated[0]);
355
1
    ccv_nnc_tensor_free(np_updated[1]);
356
1
    ccv_nnc_tensor_free(np_updated[2]);
357
1
    ccv_nnc_tensor_free(np_updated[3]);
358
1
  }
359
0
  ccv_nnc_tensor_free(updated[0]);
360
1
  ccv_nnc_tensor_free(updated[1]);
361
1
  ccv_nnc_tensor_free(updated[2]);
362
1
  ccv_nnc_tensor_free(updated[3]);
363
1
  ccv_nnc_tensor_free(cpu_inputs[0]);
364
1
  ccv_nnc_tensor_free(cpu_inputs[1]);
365
1
  ccv_nnc_tensor_free(cpu_fits[0]);
366
1
  ccv_nnc_tensor_free(cpu_fits[1]);
367
1
  ccv_nnc_tensor_free(w1_tensor);
368
1
  ccv_nnc_tensor_free(w3_tensor);
369
1
}
370
371
#include "case_main.h"