/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/imdb.tests.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "case.h" |
2 | | #include "ccv_case.h" |
3 | | #include "ccv_nnc_case.h" |
4 | | #include <ccv.h> |
5 | | #include <ccv_internal.h> |
6 | | #include <nnc/ccv_nnc.h> |
7 | | #include <nnc/ccv_nnc_easy.h> |
8 | | #include <3rdparty/dsfmt/dSFMT.h> |
9 | | #include <ctype.h> |
10 | | #include <3rdparty/khash/khash.h> |
11 | | |
12 | | TEST_SETUP() |
13 | | { |
14 | | ccv_nnc_init(); |
15 | | } |
16 | | |
17 | | KHASH_MAP_INIT_STR(vocab_map, int) |
18 | | |
19 | | static CCV_WARN_UNUSED(ccv_nnc_tensor_t*) _text_to_tensor_index(const char* const filename, const khash_t(vocab_map)* const vocab, const int vocab_size, const int max_length) |
20 | 768 | { |
21 | 768 | const int end_flag = vocab_size - 2; |
22 | 768 | const int pad_flag = vocab_size - 1; |
23 | 768 | char* const word = (char*)ccmalloc(1024); |
24 | 768 | ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, max_length), 0); |
25 | 768 | FILE* const file = fopen(filename, "r"); |
26 | 768 | int t = 0; |
27 | 183k | while (fscanf(file, "%1023s", word) != EOF) |
28 | 182k | { |
29 | 182k | if (t >= max_length) |
30 | 60 | break; |
31 | 182k | int j; |
32 | 1.03M | for(j = 0; word[j]; j++853k ) |
33 | 853k | word[j] = tolower(word[j]); |
34 | 182k | char* saveptr; |
35 | 182k | const char* token = strtok_r(word, ".,<>/~`@#$%^&*+\\\"", &saveptr); |
36 | 367k | while (token) |
37 | 184k | { |
38 | 184k | if (t >= max_length) |
39 | 0 | break; |
40 | 184k | const khiter_t k = kh_get(vocab_map, vocab, token); |
41 | 184k | if (k != kh_end(vocab)) |
42 | 176k | tensor->data.i32[t++] = kh_val(vocab, k); |
43 | 184k | token = strtok_r(0, ".,<>/~`@#$%^&*+\\\"", &saveptr); |
44 | 184k | } |
45 | 182k | } |
46 | 768 | fclose(file); |
47 | 768 | if (t < max_length) |
48 | 708 | { |
49 | 708 | tensor->data.i32[t] = end_flag; |
50 | 216k | for (++t; t < max_length; t++215k ) |
51 | 215k | tensor->data.i32[t] = pad_flag; |
52 | 708 | } |
53 | 768 | ccfree(word); |
54 | 768 | return tensor; |
55 | 768 | } |
56 | | |
57 | | typedef struct { |
58 | | ccv_nnc_tensor_t* tensor; |
59 | | ccv_nnc_tensor_t* mask; |
60 | | int c; |
61 | | } ccv_nnc_text_t; |
62 | | |
63 | | static ccv_array_t* _array_from_disk_new(const char* const list, const char* const base_dir, const khash_t(vocab_map)* const vocab, const int vocab_size, const int max_length, const int limit) |
64 | 6 | { |
65 | 6 | FILE *r = fopen(list, "r"); |
66 | 6 | assert(r && "list doesn't exists"); |
67 | 6 | const int pad_flag = vocab_size - 1; |
68 | 6 | int dirlen = (base_dir != 0) ? strlen(base_dir) + 1 : 00 ; |
69 | 6 | ccv_array_t* categorizeds = ccv_array_new(sizeof(ccv_nnc_text_t), 64, 0); |
70 | 6 | int c; |
71 | 6 | char* file = (char*)ccmalloc(1024); |
72 | 6 | char* filename = (char*)ccmalloc(1024); |
73 | 768 | while (fscanf(r, "%d %1023s", &c, file) != EOF) |
74 | 768 | { |
75 | 768 | if (base_dir != 0) |
76 | 768 | { |
77 | 768 | strncpy(filename, base_dir, 1024); |
78 | 768 | filename[dirlen - 1] = '/'; |
79 | 768 | } |
80 | 768 | strncpy(filename + dirlen, file, 1024 - dirlen); |
81 | 768 | ccv_nnc_tensor_t* const tensor = _text_to_tensor_index(filename, vocab, vocab_size, max_length); |
82 | 768 | int length = 0; |
83 | 768 | int i; |
84 | 178k | for (i = 0; !length && i < max_length178k ; i++178k ) |
85 | 178k | if (tensor->data.i32[i] == pad_flag) |
86 | 708 | length = i; |
87 | 768 | ccv_nnc_tensor_t* const mask = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 1), 0); |
88 | 768 | mask->data.i32[0] = length ? length708 : max_length60 ; |
89 | 768 | ccv_nnc_text_t categorized = { |
90 | 768 | .tensor = tensor, |
91 | 768 | .mask = mask, |
92 | 768 | .c = c |
93 | 768 | }; |
94 | 768 | ccv_array_push(categorizeds, &categorized); |
95 | 768 | if (limit > 0 && categorizeds->rnum >= limit) |
96 | 6 | break; |
97 | 768 | } |
98 | 6 | ccfree(filename); |
99 | 6 | ccfree(file); |
100 | 6 | fclose(r); |
101 | 6 | return categorizeds; |
102 | 6 | } |
103 | | |
104 | | static ccv_cnnp_model_t* _self_attention_new(const int k, const int h, const int b, const int t, const float dropout) |
105 | 4 | { |
106 | 4 | const ccv_cnnp_model_io_t x = ccv_cnnp_input(); |
107 | 4 | ccv_cnnp_model_io_t mask = ccv_cnnp_input(); |
108 | 4 | ccv_cnnp_model_io_t multiheads = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(x)); |
109 | 4 | ccv_cnnp_model_t* const tokeys = ccv_cnnp_dense(k * h, 1, 0, 1, "tokeys"); |
110 | 4 | ccv_cnnp_model_t* const toqueries = ccv_cnnp_dense(k * h, 1, 0, 1, "toqueries"); |
111 | 4 | ccv_cnnp_model_t* const tovalues = ccv_cnnp_dense(k * h, 1, 0, 1, "tovalues"); |
112 | 4 | ccv_cnnp_model_io_t keys = ccv_cnnp_model_apply(tokeys, MODEL_IO_LIST(multiheads)); |
113 | 4 | ccv_cnnp_model_io_t queries = ccv_cnnp_model_apply(toqueries, MODEL_IO_LIST(multiheads)); |
114 | 4 | ccv_cnnp_model_io_t values = ccv_cnnp_model_apply(tovalues, MODEL_IO_LIST(multiheads)); |
115 | 4 | keys = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(t, b, h, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(keys)); |
116 | 4 | queries = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(t, b, h, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(queries)); |
117 | 4 | values = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(t, b, h, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(values)); |
118 | 4 | keys = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(keys)); |
119 | 4 | queries = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(queries)); |
120 | 4 | values = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(values)); |
121 | 4 | keys = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * h, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(keys)); |
122 | 4 | queries = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * h, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(queries)); |
123 | 4 | values = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * h, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(values)); |
124 | 4 | ccv_cnnp_model_io_t dot = ccv_cnnp_model_apply(ccv_cnnp_matmul(NO_TRANSPOSE, TRANSPOSE(1, 2), 0, 0), MODEL_IO_LIST(queries, keys)); |
125 | 4 | const float scale = 1. / sqrt(k); |
126 | 4 | dot = ccv_cnnp_model_apply(ccv_cnnp_scalar_mul(scale, 0), MODEL_IO_LIST(dot)); |
127 | 4 | dot = ccv_cnnp_model_apply(ccv_cnnp_masked_fill(0, -1e9, 0), MODEL_IO_LIST(dot, mask)); |
128 | 4 | dot = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * h * t, t), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(dot)); |
129 | 4 | dot = ccv_cnnp_model_apply(ccv_cnnp_softmax(0), MODEL_IO_LIST(dot)); |
130 | 4 | if (dropout > 0) |
131 | 4 | dot = ccv_cnnp_model_apply(ccv_cnnp_dropout(dropout, 0, 0), MODEL_IO_LIST(dot)); |
132 | 4 | dot = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * h, t, t), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(dot)); |
133 | 4 | ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_matmul(NO_TRANSPOSE, NO_TRANSPOSE, 0, 0), MODEL_IO_LIST(dot, values)); |
134 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(h, b, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out)); |
135 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(out)); |
136 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * t, h * k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out)); |
137 | 4 | ccv_cnnp_model_t* const unifyheads = ccv_cnnp_dense(k, 0, 0, 1, "unifyheads"); |
138 | 4 | out = ccv_cnnp_model_apply(unifyheads, MODEL_IO_LIST(out)); |
139 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(t, b, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out)); |
140 | 4 | return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), 1, "self-attention"); |
141 | 4 | } |
142 | | |
143 | | static ccv_cnnp_model_t* _transformer_block_new(const int k, const int h, const int b, const int t, const int ff, const float dropout) |
144 | 4 | { |
145 | 4 | ccv_cnnp_model_io_t const x = ccv_cnnp_input(); |
146 | 4 | ccv_cnnp_model_io_t const mask = ccv_cnnp_input(); |
147 | 4 | ccv_cnnp_model_t* const self_attention = _self_attention_new(k, h, b, t, dropout); |
148 | 4 | ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(self_attention, MODEL_IO_LIST(x, mask)); |
149 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(x, out)); |
150 | 4 | ccv_cnnp_model_io_t first = ccv_cnnp_model_apply(ccv_cnnp_layer_norm(1e-5, DIM_ALLOC(2), 1, 1, 1, 0), MODEL_IO_LIST(out)); |
151 | 4 | if (dropout) |
152 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_dropout(dropout, 0, 0), MODEL_IO_LIST(first)); |
153 | 0 | else |
154 | 0 | out = first; |
155 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out)); |
156 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_dense(ff, 0, 0, 1, 0), MODEL_IO_LIST(out)); |
157 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(out)); |
158 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_dense(k, 0, 0, 1, 0), MODEL_IO_LIST(out)); |
159 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(t, b, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out)); |
160 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(first, out)); |
161 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_layer_norm(1e-5, DIM_ALLOC(2), 1, 1, 1, 0), MODEL_IO_LIST(out)); |
162 | 4 | if (dropout > 0) |
163 | 4 | out = ccv_cnnp_model_apply(ccv_cnnp_dropout(dropout, 0, 0), MODEL_IO_LIST(out)); |
164 | 4 | return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), 1, "transformer"); |
165 | 4 | } |
166 | | |
167 | | static ccv_cnnp_model_t* _classifier_transformer_new(const int layers, const int k, const int h, const int b, const int t, const int ff, const float dropout) |
168 | 1 | { |
169 | 1 | ccv_cnnp_model_io_t const x = ccv_cnnp_input(); |
170 | 1 | ccv_cnnp_model_io_t const mask = ccv_cnnp_input(); |
171 | 1 | ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(x)); |
172 | 1 | int i; |
173 | 3 | for (i = 0; i < layers; i++2 ) |
174 | 2 | out = ccv_cnnp_model_apply(_transformer_block_new(k, h, b, t, ff, dropout), MODEL_IO_LIST(out, mask)); |
175 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(out)); // t, b, k -> b, t, k |
176 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_transpose(1, 2, 0), MODEL_IO_LIST(out)); // b, t, k -> b, k, t |
177 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b, k, t, 1), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out)); |
178 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_average_pool(DIM_ALLOC(0, 0), ccv_nnc_no_hint, 0), MODEL_IO_LIST(out)); |
179 | | // Last layer, get it to 2. |
180 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_flatten(0), MODEL_IO_LIST(out)); |
181 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_dense(2, 0, 0, 1, 0), MODEL_IO_LIST(out)); |
182 | 1 | return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), 1, "classifier"); |
183 | 1 | } |
184 | | |
185 | | typedef struct { |
186 | | int layers; |
187 | | int h; |
188 | | int ff; |
189 | | float dropout; |
190 | | } classifier_transformer_params_t; |
191 | | |
192 | | static ccv_cnnp_model_t* _dynamic_classifier_transformer(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context) |
193 | 1 | { |
194 | 1 | const classifier_transformer_params_t* const params = (classifier_transformer_params_t*)context; |
195 | 1 | const int b = inputs[0].dim[0]; |
196 | 1 | const int t = inputs[0].dim[1]; |
197 | 1 | const int k = inputs[0].dim[2]; |
198 | 1 | const int ff = params->ff * k; |
199 | 1 | return _classifier_transformer_new(params->layers, k, params->h, b, t, ff, params->dropout); |
200 | 1 | } |
201 | | |
202 | | static ccv_cnnp_model_t* _binary_classifier_transformer_new(const int layers, const int k, const int h, const int b, const int t, const int ff, const float dropout) |
203 | 1 | { |
204 | 1 | ccv_cnnp_model_io_t const x = ccv_cnnp_input(); |
205 | 1 | ccv_cnnp_model_io_t const mask = ccv_cnnp_input(); |
206 | 1 | ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(x)); |
207 | 1 | int i; |
208 | 3 | for (i = 0; i < layers; i++2 ) |
209 | 2 | out = ccv_cnnp_model_apply(_transformer_block_new(k, h, b, t, ff, dropout), MODEL_IO_LIST(out, mask)); |
210 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(out)); // t, b, k -> b, t, k |
211 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_transpose(1, 2, 0), MODEL_IO_LIST(out)); // b, t, k -> b, k, t |
212 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b, k, t, 1), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out)); |
213 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_average_pool(DIM_ALLOC(0, 0), ccv_nnc_no_hint, 0), MODEL_IO_LIST(out)); |
214 | | // Last layer, get it to 1. |
215 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_flatten(0), MODEL_IO_LIST(out)); |
216 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_dense(1, 0, 0, 1, 0), MODEL_IO_LIST(out)); |
217 | 1 | return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), 1, "classifier"); |
218 | 1 | } |
219 | | |
220 | | static ccv_cnnp_model_t* _dynamic_binary_classifier_transformer(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context) |
221 | 1 | { |
222 | 1 | const classifier_transformer_params_t* const params = (classifier_transformer_params_t*)context; |
223 | 1 | const int b = inputs[0].dim[0]; |
224 | 1 | const int t = inputs[0].dim[1]; |
225 | 1 | const int k = inputs[0].dim[2]; |
226 | 1 | const int ff = params->ff * k; |
227 | 1 | return _binary_classifier_transformer_new(params->layers, k, params->h, b, t, ff, params->dropout); |
228 | 1 | } |
229 | | |
230 | | static void _vocab_init(const char* const vocab_file, khash_t(vocab_map)** const vocab_ref, int* const vocab_size_ref) |
231 | 3 | { |
232 | 3 | FILE* const vocab_ptr = fopen(vocab_file, "r"); |
233 | 3 | khash_t(vocab_map)* const vocab = kh_init(vocab_map); |
234 | 3 | int i, ret; |
235 | 3 | char* const word = (char*)ccmalloc(1024); |
236 | 268k | for (i = 0; fscanf(vocab_ptr, "%1023s", word) != EOF; i++268k ) |
237 | 268k | { |
238 | 268k | const khiter_t k = kh_put(vocab_map, vocab, strdup(word), &ret); |
239 | 268k | kh_val(vocab, k) = i; |
240 | 268k | } |
241 | 3 | ccfree(word); |
242 | 3 | fclose(vocab_ptr); |
243 | 3 | *vocab_ref = vocab; |
244 | 3 | *vocab_size_ref = i; |
245 | 3 | } |
246 | | |
247 | | static void _vocab_destroy(khash_t(vocab_map)* const vocab) |
248 | 3 | { |
249 | | // Free keys. |
250 | 393k | for (khiter_t k = kh_begin3 (vocab); k != kh_end(vocab); k++393k ) |
251 | 393k | if (kh_exist(vocab, k)) |
252 | 268k | free((void*)kh_key(vocab, k)); |
253 | 3 | kh_destroy(vocab_map, vocab); |
254 | 3 | } |
255 | | |
256 | | static int train_imdb_fix(const int epoch_limit, const int vocab_size, const int batch_size, const int max_length, const int embedding_size, ccv_cnnp_dataframe_t* const train_data, ccv_cnnp_dataframe_t* const test_data) |
257 | 1 | { |
258 | 1 | const int tensor_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, tensor), 0); |
259 | 1 | const int one_hot_idx = ccv_cnnp_dataframe_one_hot(train_data, 0, offsetof(ccv_nnc_text_t, c), 2, 1, 0, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0); |
260 | 1 | const int mask_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, mask), 0); |
261 | 1 | const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU); |
262 | 1 | ccv_cnnp_dataframe_t* const batched_data = ccv_cnnp_dataframe_combine_new(train_data, COLUMN_ID_LIST(tensor_idx, one_hot_idx, mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW); |
263 | 1 | const int test_tensor_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, tensor), 0); |
264 | 1 | const int test_one_hot_idx = ccv_cnnp_dataframe_one_hot(test_data, 0, offsetof(ccv_nnc_text_t, c), 2, 1, 0, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0); |
265 | 1 | const int test_mask_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, mask), 0); |
266 | 1 | ccv_cnnp_dataframe_t* const test_batched_data = ccv_cnnp_dataframe_combine_new(test_data, COLUMN_ID_LIST(test_tensor_idx, test_one_hot_idx, test_mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW); |
267 | 1 | int gpu_batched[device_count * 2]; |
268 | 1 | int test_gpu_batched[device_count * 2]; |
269 | 1 | int i, j; |
270 | 5 | for (i = 0; i < device_count; i++4 ) |
271 | 4 | { |
272 | 4 | const int seq_len_batched = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3 + 2, 0); |
273 | 4 | const int tupled_mask_batched = ccv_cnnp_dataframe_one_squared(batched_data, COLUMN_ID_LIST(seq_len_batched), 0, max_length, 0); |
274 | 4 | gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, 0, i * 3, 2, i, 0); |
275 | 4 | gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, tupled_mask_batched, 0, 1, i, 0); |
276 | 4 | const int test_seq_len_batched = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3 + 2, 0); |
277 | 4 | const int test_tupled_mask_batched = ccv_cnnp_dataframe_one_squared(test_batched_data, COLUMN_ID_LIST(test_seq_len_batched), 0, max_length, 0); |
278 | 4 | test_gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, 0, i * 3, 2, i, 0); |
279 | 4 | test_gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_tupled_mask_batched, 0, 1, i, 0); |
280 | 4 | } |
281 | 1 | ccv_cnnp_dataframe_iter_t* const iter = ccv_cnnp_dataframe_iter_new(batched_data, gpu_batched, device_count * 2); |
282 | 1 | ccv_nnc_dynamic_graph_t* const dynamic_graph = ccv_nnc_dynamic_graph_new(); |
283 | 1 | ccv_nnc_tensor_variable_t vocab_vec[device_count]; |
284 | 1 | ccv_nnc_tensor_variable_t seq_vec[device_count]; |
285 | 1 | ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1); |
286 | 1 | ccv_nnc_tensor_param_t vocab_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size); |
287 | 1 | vocab_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params); |
288 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(vocab_vec[0]), 0, 0); |
289 | 1 | ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size); |
290 | 1 | seq_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params); |
291 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(seq_vec[0]), 0, 0); |
292 | 4 | for (i = 1; i < device_count; i++3 ) |
293 | 3 | { |
294 | 3 | CCV_TENSOR_SET_DEVICE_ID(vocab_params.type, i); |
295 | 3 | vocab_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params); |
296 | 3 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(vocab_vec[0]), TENSOR_VARIABLE_LIST(vocab_vec[i]), 0, 0); |
297 | 3 | CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i); |
298 | 3 | seq_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params); |
299 | 3 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(seq_vec[0]), TENSOR_VARIABLE_LIST(seq_vec[i]), 0, 0); |
300 | 3 | } |
301 | 1 | ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0); |
302 | 1 | ccv_nnc_tensor_t* const seq_indices_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32S, batch_size * max_length), 0); |
303 | 65 | for (i = 0; i < batch_size; i++64 ) |
304 | 32.8k | for (j = 0; 64 j < max_length; j++32.7k ) |
305 | 32.7k | seq_indices_cpu->data.i32[i * max_length + j] = j; |
306 | 1 | ccv_nnc_tensor_variable_t seq_indices[device_count]; |
307 | 5 | for (i = 0; i < device_count; i++4 ) |
308 | 4 | { |
309 | 4 | ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length); |
310 | 4 | CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i); |
311 | 4 | seq_indices[i] = ccv_nnc_tensor_constant_new(dynamic_graph, seq_params); |
312 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(seq_indices_cpu), TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, seq_indices[i], 0)), 0); |
313 | 4 | } |
314 | 1 | ccv_nnc_tensor_free(seq_indices_cpu); |
315 | 1 | classifier_transformer_params_t classifier_transformer_params = { |
316 | 1 | .layers = 2, |
317 | 1 | .h = 8, |
318 | 1 | .ff = 4, |
319 | 1 | .dropout = 0.1, |
320 | 1 | }; |
321 | 1 | ccv_cnnp_model_t* const transformer = ccv_cnnp_dynamic_new(_dynamic_classifier_transformer, &classifier_transformer_params, 0); |
322 | 1 | ccv_cnnp_model_set_data_parallel(transformer, device_count); |
323 | 1 | const int epoch_end = (ccv_cnnp_dataframe_row_count(train_data) + device_count * batch_size - 1) / (device_count * batch_size); |
324 | 1 | ccv_cnnp_dataframe_shuffle(train_data); |
325 | 1 | ccv_nnc_cmd_t adam = CMD_ADAM_FORWARD(1, 0.0001, 0.9, 0.98, 0, 1e-9, 0); |
326 | 1 | const int aux_size = ccv_nnc_minimizer_saved_aux_size(adam); |
327 | 1 | ccv_nnc_tensor_variable_t saved_auxs[device_count * aux_size * 2]; |
328 | 5 | for (i = 0; i < device_count; i++4 ) |
329 | 4 | { |
330 | 12 | for (j = 0; j < aux_size; j++8 ) |
331 | 8 | { |
332 | 8 | ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size); |
333 | 8 | CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i); |
334 | 8 | saved_auxs[i * aux_size * 2 + j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
335 | 8 | } |
336 | 12 | for (j = 0; j < aux_size; j++8 ) |
337 | 8 | { |
338 | 8 | ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size); |
339 | 8 | CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i); |
340 | 8 | saved_auxs[i* aux_size * 2 + aux_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
341 | 8 | } |
342 | 4 | } |
343 | 1 | ccv_nnc_tensor_t* const out_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 2), 0); |
344 | 1 | ccv_nnc_tensor_t* const fit_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 2), 0); |
345 | 1 | ccv_nnc_tensor_t** tensor[device_count * 2]; |
346 | 1 | int epoch = 0; |
347 | 1 | ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU); |
348 | 2 | for (i = 0; epoch < epoch_limit; i++1 ) |
349 | 1 | { |
350 | 1 | float learn_rate = 0.0001 * ccv_min(i / (10000. / batch_size), 1) * device_count; |
351 | 1 | adam = CMD_ADAM_FORWARD(i + 1, learn_rate, 0.9, 0.98, 0, 1e-9, 0); |
352 | 1 | ccv_cnnp_dataframe_iter_next(iter, (void**)tensor, device_count, stream); |
353 | 1 | ccv_nnc_tensor_t word_indices_tensor[device_count]; |
354 | 1 | ccv_nnc_tensor_t mask_tensor[device_count]; |
355 | 1 | ccv_nnc_tensor_variable_t word_indices[device_count]; |
356 | 1 | ccv_nnc_tensor_variable_t word_vec[device_count]; |
357 | 1 | ccv_nnc_tensor_variable_t pos_vec[device_count]; |
358 | 1 | ccv_nnc_tensor_variable_t select_vec[device_count]; |
359 | 1 | ccv_nnc_tensor_variable_t vec[device_count * 2]; |
360 | 1 | ccv_nnc_tensor_variable_t out[device_count]; |
361 | 5 | for (j = 0; j < device_count; j++4 ) |
362 | 4 | { |
363 | 4 | ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length); |
364 | 4 | CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j); |
365 | 4 | word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0); |
366 | 4 | word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params); |
367 | 4 | ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]); |
368 | 4 | ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * max_length, embedding_size); |
369 | 4 | CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j); |
370 | 4 | word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params); |
371 | 4 | pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params); |
372 | 4 | select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
373 | 4 | out[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
374 | 4 | } |
375 | 1 | ccv_nnc_tensor_variable_t tvin[device_count * 2]; |
376 | 5 | for (j = 0; j < device_count; j++4 ) |
377 | 4 | tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j]; |
378 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, stream); |
379 | 5 | for (j = 0; j < device_count; j++4 ) |
380 | 4 | tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices[j]; |
381 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, stream); |
382 | 5 | for (j = 0; j < device_count; j++4 ) |
383 | 4 | tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j]; |
384 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, stream); |
385 | 1 | ccv_cnnp_dataframe_iter_peek(iter, (void**)(tensor + device_count), device_count, device_count, stream); |
386 | 5 | for (j = 0; j < device_count; j++4 ) |
387 | 4 | { |
388 | 4 | ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, max_length, embedding_size); |
389 | 4 | CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j); |
390 | 4 | vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params); |
391 | 4 | ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, max_length, max_length); |
392 | 4 | CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j); |
393 | 4 | mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0); |
394 | 4 | vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params); |
395 | 4 | ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]); |
396 | 4 | } |
397 | 1 | ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 0, vec, device_count * 2, out, device_count, 0, stream); |
398 | 1 | ccv_nnc_tensor_variable_t softmax[device_count]; |
399 | 1 | ccv_nnc_tensor_variable_t fit[device_count]; |
400 | 1 | ccv_nnc_tensor_variable_t vocab_vec_grad[device_count]; |
401 | 1 | ccv_nnc_tensor_variable_t seq_vec_grad[device_count]; |
402 | 5 | for (j = 0; j < device_count; j++4 ) |
403 | 4 | { |
404 | 4 | softmax[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
405 | 4 | fit[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
406 | 4 | ccv_nnc_tensor_variable_set(dynamic_graph, fit[j], tensor[j][1]); |
407 | 4 | vocab_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
408 | 4 | seq_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
409 | 4 | } |
410 | 1 | ccv_nnc_tensor_variable_t tvout[device_count * 2]; |
411 | 5 | for (j = 0; j < device_count; j++4 ) |
412 | 4 | tvin[j * 2] = out[j], tvin[j * 2 + 1] = fit[j], tvout[j * 2] = 0, tvout[j * 2 + 1] = softmax[j]; |
413 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_SOFTMAX_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, tvout, device_count * 2, device_count, stream); |
414 | 5 | for (j = 0; j < device_count; j++4 ) |
415 | 4 | tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = seq_vec[j], tvout[j * 2] = vocab_vec_grad[j], tvout[j * 2 + 1] = seq_vec_grad[j]; |
416 | 1 | ccv_nnc_dynamic_graph_backward(dynamic_graph, softmax, device_count, 0, tvin, device_count * 2, tvout, device_count * 2, stream); |
417 | 1 | ccv_cnnp_model_set_minimizer(transformer, adam, 0, 0, 0); |
418 | 5 | for (j = 0; j < device_count; j++4 ) |
419 | 4 | tvin[j * 2] = vocab_vec_grad[j], tvin[j * 2 + 1] = seq_vec_grad[j], tvout[j * 2] = vocab_vec[j], tvout[j * 2 + 1] = seq_vec[j]; |
420 | 1 | ccv_nnc_dynamic_graph_apply_gradients(dynamic_graph, adam, tvin, device_count * 2, tvout, device_count * 2, saved_auxs, device_count, stream); |
421 | 1 | ccv_nnc_stream_context_wait(stream); |
422 | 5 | for (j = 0; j < device_count; j++4 ) |
423 | 4 | { |
424 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]); |
425 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]); |
426 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]); |
427 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]); |
428 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]); |
429 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, out[j]); |
430 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, fit[j]); |
431 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]); |
432 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, softmax[j]); |
433 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, vocab_vec_grad[j]); |
434 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, seq_vec_grad[j]); |
435 | 4 | } |
436 | 1 | if ((i + 1) % epoch_end == 0) |
437 | 1 | { |
438 | 1 | ++epoch; |
439 | 1 | ccv_cnnp_dataframe_shuffle(train_data); |
440 | 1 | ccv_cnnp_dataframe_iter_set_cursor(iter, 0); |
441 | 1 | } |
442 | 1 | } |
443 | 1 | ccv_nnc_stream_context_free(stream); |
444 | 1 | int correct = 0; |
445 | 1 | ccv_cnnp_dataframe_iter_t* const test_iter = ccv_cnnp_dataframe_iter_new(test_batched_data, test_gpu_batched, device_count * 2); |
446 | 1 | int k; |
447 | 1 | ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1); |
448 | 1 | const int row_count = ccv_cnnp_dataframe_row_count(test_data); |
449 | 2 | for (k = 0; k < row_count; k += batch_size * device_count1 ) |
450 | 1 | { |
451 | 1 | ccv_cnnp_dataframe_iter_next(test_iter, (void**)tensor, device_count, 0); |
452 | 1 | ccv_nnc_tensor_t word_indices_tensor[device_count]; |
453 | 1 | ccv_nnc_tensor_t mask_tensor[device_count]; |
454 | 1 | ccv_nnc_tensor_variable_t word_indices[device_count]; |
455 | 1 | ccv_nnc_tensor_variable_t word_vec[device_count]; |
456 | 1 | ccv_nnc_tensor_variable_t pos_vec[device_count]; |
457 | 1 | ccv_nnc_tensor_variable_t select_vec[device_count]; |
458 | 1 | ccv_nnc_tensor_variable_t vec[device_count * 2]; |
459 | 1 | ccv_nnc_tensor_variable_t out[device_count]; |
460 | 5 | for (j = 0; j < device_count; j++4 ) |
461 | 4 | { |
462 | 4 | ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length); |
463 | 4 | CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j); |
464 | 4 | word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0); |
465 | 4 | word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params); |
466 | 4 | ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]); |
467 | 4 | ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * max_length, embedding_size); |
468 | 4 | CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j); |
469 | 4 | word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params); |
470 | 4 | pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params); |
471 | 4 | select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
472 | 4 | out[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
473 | 4 | } |
474 | 1 | ccv_nnc_tensor_variable_t tvin[device_count * 2]; |
475 | 5 | for (j = 0; j < device_count; j++4 ) |
476 | 4 | tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j]; |
477 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, 0); |
478 | 5 | for (j = 0; j < device_count; j++4 ) |
479 | 4 | tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices[j]; |
480 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, 0); |
481 | 5 | for (j = 0; j < device_count; j++4 ) |
482 | 4 | tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j]; |
483 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, 0); |
484 | 1 | ccv_cnnp_dataframe_iter_peek(test_iter, (void**)(tensor + device_count), device_count, device_count, 0); |
485 | 5 | for (j = 0; j < device_count; j++4 ) |
486 | 4 | { |
487 | 4 | ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, max_length, embedding_size); |
488 | 4 | CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j); |
489 | 4 | vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params); |
490 | 4 | ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, max_length, max_length); |
491 | 4 | CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j); |
492 | 4 | mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0); |
493 | 4 | vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params); |
494 | 4 | ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]); |
495 | 4 | } |
496 | 1 | ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 1, vec, device_count * 2, out, device_count, 0, 0); |
497 | 1 | int d; |
498 | 5 | for (d = 0; d < device_count; d++4 ) |
499 | 4 | { |
500 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, out[d], 0)), TENSOR_LIST(out_cpu), 0); |
501 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor[d][1]), TENSOR_LIST(fit_cpu), 0); |
502 | 132 | for (j = 0; j < ccv_min(row_count - k - d * batch_size, batch_size); j++128 ) |
503 | 128 | { |
504 | 128 | const int truth = (fit_cpu->data.f32[j * 2] < fit_cpu->data.f32[j * 2 + 1]); |
505 | 128 | const int prediction = (out_cpu->data.f32[j * 2] < out_cpu->data.f32[j * 2 + 1]); |
506 | 128 | if (truth == prediction) |
507 | 0 | ++correct; |
508 | 128 | } |
509 | 4 | } |
510 | 5 | for (j = 0; j < device_count; j++4 ) |
511 | 4 | { |
512 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]); |
513 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]); |
514 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]); |
515 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]); |
516 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]); |
517 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, out[j]); |
518 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]); |
519 | 4 | } |
520 | 1 | } |
521 | 1 | ccv_cnnp_dataframe_iter_free(test_iter); |
522 | 1 | ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0); |
523 | 1 | ccv_cnnp_model_free(transformer); |
524 | 1 | ccv_cnnp_dataframe_iter_free(iter); |
525 | 1 | ccv_cnnp_dataframe_free(batched_data); |
526 | 1 | ccv_cnnp_dataframe_free(test_batched_data); |
527 | 1 | ccv_nnc_dynamic_graph_free(dynamic_graph); |
528 | 1 | ccv_nnc_tensor_free(out_cpu); |
529 | 1 | ccv_nnc_tensor_free(fit_cpu); |
530 | 1 | return correct; |
531 | 1 | } |
532 | | |
533 | | TEST_CASE("train a categorical transformer classifier on imdb reviews to 80% with mix of dynamic graph and cnnp model") |
534 | 1 | { |
535 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS) && |
536 | 1 | ccv_nnc_cmd_ok(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_GPU_CUBLAS) && |
537 | 1 | ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
538 | 1 | ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN)); |
539 | 1 | const char* const train_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/train.txt"; |
540 | 1 | const char* const test_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/test.txt"; |
541 | 1 | const char* const vocab_file = "/fast/Data/IMDB_Movie_Reviews/aclImdb/imdb.vocab"; |
542 | 1 | const char* const base_dir = "/fast/Data/IMDB_Movie_Reviews/aclImdb/"; |
543 | 1 | FILE* train_open = fopen(train_list, "rb"); |
544 | 1 | FILE* test_open = fopen(test_list, "rb"); |
545 | 1 | FILE* vocab_open = fopen(vocab_file, "rb"); |
546 | 1 | if (train_open) |
547 | 1 | fclose(train_open); |
548 | 1 | if (test_open) |
549 | 1 | fclose(test_open); |
550 | 1 | if (vocab_open) |
551 | 1 | fclose(vocab_open); |
552 | 1 | if (!train_open || !test_open || !vocab_open) |
553 | 0 | { GUARD_ELSE_RETURN(0); } |
554 | 1 | khash_t(vocab_map)* vocab; |
555 | 1 | int vocab_size; |
556 | 1 | _vocab_init(vocab_file, &vocab, &vocab_size); |
557 | 1 | const int max_length = 512; |
558 | 1 | ccv_array_t* train_set; |
559 | 1 | ccv_cnnp_dataframe_t* train_data; |
560 | 1 | ccv_array_t* test_set; |
561 | 1 | ccv_cnnp_dataframe_t* test_data; |
562 | 1 | if (!ccv_is_coverage()) |
563 | 0 | { |
564 | 0 | train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 0); |
565 | 0 | train_data = ccv_cnnp_dataframe_from_array_new(train_set); |
566 | 0 | test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 0); |
567 | 0 | test_data = ccv_cnnp_dataframe_from_array_new(test_set); |
568 | 0 | const int correct = train_imdb_fix(10, vocab_size, 64, max_length, 128, train_data, test_data); |
569 | 0 | REQUIRE((float)correct / test_set->rnum > 0.80, "%f should be larger than 80%%", (float)correct / test_set->rnum); |
570 | 1 | } else { |
571 | 1 | train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 128); |
572 | 1 | train_data = ccv_cnnp_dataframe_from_array_new(train_set); |
573 | 1 | test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 128); |
574 | 1 | test_data = ccv_cnnp_dataframe_from_array_new(test_set); |
575 | 1 | train_imdb_fix(1, vocab_size, 64, max_length, 128, train_data, test_data); |
576 | 1 | } |
577 | 1 | ccv_cnnp_dataframe_free(train_data); |
578 | 1 | ccv_cnnp_dataframe_free(test_data); |
579 | 1 | int i; |
580 | 129 | for (i = 0; i < train_set->rnum; i++128 ) |
581 | 128 | { |
582 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->tensor); |
583 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->mask); |
584 | 128 | } |
585 | 1 | ccv_array_free(train_set); |
586 | 129 | for (i = 0; i < test_set->rnum; i++128 ) |
587 | 128 | { |
588 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->tensor); |
589 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->mask); |
590 | 128 | } |
591 | 1 | ccv_array_free(test_set); |
592 | 1 | _vocab_destroy(vocab); |
593 | 1 | } |
594 | | |
595 | | static int train_imdb_flex(const int epoch_limit, const int vocab_size, const int batch_size, const int max_length, const int embedding_size, ccv_cnnp_dataframe_t* const train_data, ccv_cnnp_dataframe_t* const test_data) |
596 | 1 | { |
597 | 1 | const int tensor_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, tensor), 0); |
598 | 1 | const int one_hot_idx = ccv_cnnp_dataframe_copy_scalar(train_data, 0, offsetof(ccv_nnc_text_t, c), CCV_32S, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0); |
599 | 1 | const int mask_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, mask), 0); |
600 | 1 | const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU); |
601 | 1 | ccv_cnnp_dataframe_t* const batched_data = ccv_cnnp_dataframe_combine_new(train_data, COLUMN_ID_LIST(tensor_idx, one_hot_idx, mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW); |
602 | 1 | const int test_tensor_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, tensor), 0); |
603 | 1 | const int test_one_hot_idx = ccv_cnnp_dataframe_copy_scalar(test_data, 0, offsetof(ccv_nnc_text_t, c), CCV_32S, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0); |
604 | 1 | const int test_mask_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, mask), 0); |
605 | 1 | ccv_cnnp_dataframe_t* const test_batched_data = ccv_cnnp_dataframe_combine_new(test_data, COLUMN_ID_LIST(test_tensor_idx, test_one_hot_idx, test_mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW); |
606 | 1 | int gpu_batched[device_count * 3]; |
607 | 1 | int seq_len_batched[device_count]; |
608 | 1 | int data_batched[device_count]; |
609 | 1 | int test_gpu_batched[device_count * 3]; |
610 | 1 | int test_seq_len_batched[device_count]; |
611 | 1 | int test_data_batched[device_count]; |
612 | 1 | int i, j; |
613 | 5 | for (i = 0; i < device_count; i++4 ) |
614 | 4 | { |
615 | 4 | seq_len_batched[i] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3 + 2, 0); |
616 | 4 | data_batched[i] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3, 0); |
617 | 4 | test_seq_len_batched[i] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3 + 2, 0); |
618 | 4 | test_data_batched[i] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3, 0); |
619 | 4 | } |
620 | 1 | const int mask_batched = ccv_cnnp_dataframe_one_squared(batched_data, seq_len_batched, device_count, 1, max_length, 0); |
621 | 1 | const int trunc_data_batched = ccv_cnnp_dataframe_truncate(batched_data, data_batched, device_count, seq_len_batched, device_count, 0); |
622 | 1 | const int test_mask_batched = ccv_cnnp_dataframe_one_squared(test_batched_data, test_seq_len_batched, device_count, 1, max_length, 0); |
623 | 1 | const int test_trunc_data_batched = ccv_cnnp_dataframe_truncate(test_batched_data, test_data_batched, device_count, test_seq_len_batched, device_count, 0); |
624 | 5 | for (i = 0; i < device_count; i++4 ) |
625 | 4 | { |
626 | 4 | gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, trunc_data_batched, i, 1, i, 0); |
627 | 4 | gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, mask_batched, i, 1, i, 0); |
628 | 4 | gpu_batched[i + device_count * 2] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, 0, i * 3 + 1, 1, i, 0); |
629 | 4 | test_gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_trunc_data_batched, i, 1, i, 0); |
630 | 4 | test_gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_mask_batched, i, 1, i, 0); |
631 | 4 | test_gpu_batched[i + device_count * 2] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, 0, i * 3 + 1, 1, i, 0); |
632 | 4 | } |
633 | 1 | ccv_cnnp_dataframe_iter_t* const iter = ccv_cnnp_dataframe_iter_new(batched_data, gpu_batched, device_count * 3); |
634 | 1 | ccv_nnc_dynamic_graph_t* const dynamic_graph = ccv_nnc_dynamic_graph_new(); |
635 | 1 | ccv_nnc_tensor_variable_t vocab_vec[device_count]; |
636 | 1 | ccv_nnc_tensor_variable_t seq_vec[device_count]; |
637 | 1 | ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1); |
638 | 1 | ccv_nnc_tensor_param_t vocab_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size); |
639 | 1 | vocab_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params); |
640 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(vocab_vec[0]), 0, 0); |
641 | 1 | ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size); |
642 | 1 | seq_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params); |
643 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(seq_vec[0]), 0, 0); |
644 | 4 | for (i = 1; i < device_count; i++3 ) |
645 | 3 | { |
646 | 3 | CCV_TENSOR_SET_DEVICE_ID(vocab_params.type, i); |
647 | 3 | vocab_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params); |
648 | 3 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(vocab_vec[0]), TENSOR_VARIABLE_LIST(vocab_vec[i]), 0, 0); |
649 | 3 | CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i); |
650 | 3 | seq_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params); |
651 | 3 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(seq_vec[0]), TENSOR_VARIABLE_LIST(seq_vec[i]), 0, 0); |
652 | 3 | } |
653 | 1 | ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0); |
654 | 1 | ccv_nnc_tensor_t* const seq_indices_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32S, batch_size * max_length), 0); |
655 | 1 | ccv_nnc_tensor_variable_t seq_indices[device_count]; |
656 | 5 | for (i = 0; i < device_count; i++4 ) |
657 | 4 | { |
658 | 4 | ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length); |
659 | 4 | CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i); |
660 | 4 | seq_indices[i] = ccv_nnc_tensor_constant_new(dynamic_graph, seq_params); |
661 | 4 | } |
662 | 1 | classifier_transformer_params_t classifier_transformer_params = { |
663 | 1 | .layers = 2, |
664 | 1 | .h = 8, |
665 | 1 | .ff = 4, |
666 | 1 | .dropout = 0.1, |
667 | 1 | }; |
668 | 1 | ccv_cnnp_model_t* const transformer = ccv_cnnp_dynamic_new(_dynamic_binary_classifier_transformer, &classifier_transformer_params, 0); |
669 | 1 | ccv_cnnp_model_set_data_parallel(transformer, device_count); |
670 | 1 | const int epoch_end = (ccv_cnnp_dataframe_row_count(train_data) + device_count * batch_size - 1) / (device_count * batch_size); |
671 | 1 | ccv_cnnp_dataframe_shuffle(train_data); |
672 | 1 | ccv_nnc_cmd_t adam = CMD_ADAM_FORWARD(1, 0.0001, 0.9, 0.98, 0, 1e-9, 0); |
673 | 1 | const int aux_size = ccv_nnc_minimizer_saved_aux_size(adam); |
674 | 1 | ccv_nnc_tensor_variable_t saved_auxs[device_count * aux_size * 2]; |
675 | 5 | for (i = 0; i < device_count; i++4 ) |
676 | 4 | { |
677 | 12 | for (j = 0; j < aux_size; j++8 ) |
678 | 8 | { |
679 | 8 | ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size); |
680 | 8 | CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i); |
681 | 8 | saved_auxs[i * aux_size * 2 + j] = ccv_nnc_tensor_variable_new(dynamic_graph, saved_aux_params); |
682 | 8 | } |
683 | 12 | for (j = 0; j < aux_size; j++8 ) |
684 | 8 | { |
685 | 8 | ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size); |
686 | 8 | CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i); |
687 | 8 | saved_auxs[i* aux_size * 2 + aux_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph, saved_aux_params); |
688 | 8 | } |
689 | 4 | } |
690 | 1 | ccv_nnc_tensor_t* const out_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 1), 0); |
691 | 1 | ccv_nnc_tensor_t* const fit_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 1), 0); |
692 | 1 | ccv_nnc_tensor_t** tensor[device_count * 3]; |
693 | 1 | int epoch = 0; |
694 | 1 | ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU); |
695 | 2 | for (i = 0; epoch < epoch_limit; i++1 ) |
696 | 1 | { |
697 | 1 | float learn_rate = 0.0001 * ccv_min(i / (10000. / batch_size), 1) * device_count; |
698 | 1 | adam = CMD_ADAM_FORWARD(i + 1, learn_rate, 0.9, 0.98, 0, 1e-9, 0); |
699 | 1 | ccv_cnnp_dataframe_iter_next(iter, (void**)tensor, device_count, stream); |
700 | 1 | ccv_nnc_tensor_t word_indices_tensor[device_count]; |
701 | 1 | ccv_nnc_tensor_t mask_tensor[device_count]; |
702 | 1 | ccv_nnc_tensor_variable_t word_indices[device_count]; |
703 | 1 | ccv_nnc_tensor_variable_t word_vec[device_count]; |
704 | 1 | ccv_nnc_tensor_variable_t pos_vec[device_count]; |
705 | 1 | ccv_nnc_tensor_variable_t select_vec[device_count]; |
706 | 1 | ccv_nnc_tensor_variable_t vec[device_count * 2]; |
707 | 1 | ccv_nnc_tensor_variable_t out[device_count]; |
708 | 1 | ccv_nnc_tensor_variable_t seq_indices_alias[device_count]; |
709 | 1 | int batch_length = 0; |
710 | 5 | for (j = 0; j < device_count; j++4 ) |
711 | 4 | { |
712 | 4 | batch_length = tensor[j][0]->info.dim[1]; |
713 | 4 | ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length); |
714 | 4 | CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j); |
715 | 4 | word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0); |
716 | 4 | word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params); |
717 | 4 | ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]); |
718 | 4 | ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * batch_length, embedding_size); |
719 | 4 | CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j); |
720 | 4 | word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params); |
721 | 4 | pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params); |
722 | 4 | select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
723 | 4 | out[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
724 | 4 | ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length); |
725 | 4 | CCV_TENSOR_SET_DEVICE_ID(seq_params.type, j); |
726 | 4 | seq_indices_alias[j] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, seq_indices[j], ccv_nnc_no_ofs, DIM_ALLOC(), seq_params); |
727 | 4 | } |
728 | 65 | for (j = 0; j < batch_size; j++64 ) |
729 | 64 | { |
730 | 64 | int k; |
731 | 32.8k | for (k = 0; k < batch_length; k++32.7k ) |
732 | 32.7k | seq_indices_cpu->data.i32[j * batch_length + k] = k; |
733 | 64 | } |
734 | 5 | for (j = 0; j < device_count; j++4 ) |
735 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(seq_indices_cpu), TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, seq_indices[j])), 0); |
736 | 1 | ccv_nnc_tensor_variable_t tvin[device_count * 2]; |
737 | 5 | for (j = 0; j < device_count; j++4 ) |
738 | 4 | tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j]; |
739 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, stream); |
740 | 5 | for (j = 0; j < device_count; j++4 ) |
741 | 4 | tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices_alias[j]; |
742 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, stream); |
743 | 5 | for (j = 0; j < device_count; j++4 ) |
744 | 4 | tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j]; |
745 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, stream); |
746 | 1 | ccv_cnnp_dataframe_iter_peek(iter, (void**)(tensor + device_count), device_count, device_count * 2, stream); |
747 | 5 | for (j = 0; j < device_count; j++4 ) |
748 | 4 | { |
749 | 4 | ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, batch_length, embedding_size); |
750 | 4 | CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j); |
751 | 4 | vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params); |
752 | 4 | ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, batch_length, batch_length); |
753 | 4 | CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j); |
754 | 4 | mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0); |
755 | 4 | vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params); |
756 | 4 | ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]); |
757 | 4 | } |
758 | 1 | ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 0, vec, device_count * 2, out, device_count, 0, stream); |
759 | 1 | ccv_nnc_tensor_variable_t sigmoid[device_count]; |
760 | 1 | ccv_nnc_tensor_variable_t fit[device_count]; |
761 | 1 | ccv_nnc_tensor_variable_t vocab_vec_grad[device_count]; |
762 | 1 | ccv_nnc_tensor_variable_t seq_vec_grad[device_count]; |
763 | 5 | for (j = 0; j < device_count; j++4 ) |
764 | 4 | { |
765 | 4 | sigmoid[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
766 | 4 | fit[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
767 | 4 | ccv_nnc_tensor_variable_set(dynamic_graph, fit[j], tensor[j + device_count * 2][0]); |
768 | 4 | vocab_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
769 | 4 | seq_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
770 | 4 | } |
771 | 1 | ccv_nnc_tensor_variable_t tvout[device_count * 2]; |
772 | 5 | for (j = 0; j < device_count; j++4 ) |
773 | 4 | tvin[j * 2] = out[j], tvin[j * 2 + 1] = fit[j], tvout[j * 2] = 0, tvout[j * 2 + 1] = sigmoid[j]; |
774 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, tvout, device_count * 2, device_count, stream); |
775 | 5 | for (j = 0; j < device_count; j++4 ) |
776 | 4 | tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = seq_vec[j], tvout[j * 2] = vocab_vec_grad[j], tvout[j * 2 + 1] = seq_vec_grad[j]; |
777 | 1 | ccv_nnc_dynamic_graph_backward(dynamic_graph, sigmoid, device_count, 0, tvin, device_count * 2, tvout, device_count * 2, stream); |
778 | 1 | ccv_cnnp_model_set_minimizer(transformer, adam, 0, 0, 0); |
779 | 5 | for (j = 0; j < device_count; j++4 ) |
780 | 4 | tvin[j * 2] = vocab_vec_grad[j], tvin[j * 2 + 1] = seq_vec_grad[j], tvout[j * 2] = vocab_vec[j], tvout[j * 2 + 1] = seq_vec[j]; |
781 | 1 | ccv_nnc_dynamic_graph_apply_gradients(dynamic_graph, adam, tvin, device_count * 2, tvout, device_count * 2, saved_auxs, device_count, stream); |
782 | 1 | ccv_nnc_stream_context_wait(stream); |
783 | 5 | for (j = 0; j < device_count; j++4 ) |
784 | 4 | { |
785 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]); |
786 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]); |
787 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]); |
788 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]); |
789 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]); |
790 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, out[j]); |
791 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, fit[j]); |
792 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]); |
793 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, sigmoid[j]); |
794 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, vocab_vec_grad[j]); |
795 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, seq_vec_grad[j]); |
796 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, seq_indices_alias[j]); |
797 | 4 | } |
798 | 1 | if ((i + 1) % epoch_end == 0) |
799 | 1 | { |
800 | 1 | ++epoch; |
801 | 1 | ccv_cnnp_dataframe_shuffle(train_data); |
802 | 1 | ccv_cnnp_dataframe_iter_set_cursor(iter, 0); |
803 | 1 | } |
804 | 1 | } |
805 | 1 | ccv_nnc_stream_context_free(stream); |
806 | 1 | int correct = 0; |
807 | 1 | ccv_cnnp_dataframe_iter_t* const test_iter = ccv_cnnp_dataframe_iter_new(test_batched_data, test_gpu_batched, device_count * 3); |
808 | 1 | int k; |
809 | 1 | ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1); |
810 | 1 | const int row_count = ccv_cnnp_dataframe_row_count(test_data); |
811 | 2 | for (k = 0; k < row_count; k += batch_size * device_count1 ) |
812 | 1 | { |
813 | 1 | ccv_cnnp_dataframe_iter_next(test_iter, (void**)tensor, device_count * 3, 0); |
814 | 1 | ccv_nnc_tensor_t word_indices_tensor[device_count]; |
815 | 1 | ccv_nnc_tensor_t mask_tensor[device_count]; |
816 | 1 | ccv_nnc_tensor_variable_t word_indices[device_count]; |
817 | 1 | ccv_nnc_tensor_variable_t word_vec[device_count]; |
818 | 1 | ccv_nnc_tensor_variable_t pos_vec[device_count]; |
819 | 1 | ccv_nnc_tensor_variable_t select_vec[device_count]; |
820 | 1 | ccv_nnc_tensor_variable_t vec[device_count * 2]; |
821 | 1 | ccv_nnc_tensor_variable_t out[device_count]; |
822 | 1 | ccv_nnc_tensor_variable_t seq_indices_alias[device_count]; |
823 | 1 | int batch_length = 0; |
824 | 5 | for (j = 0; j < device_count; j++4 ) |
825 | 4 | { |
826 | 4 | batch_length = tensor[j][0]->info.dim[1]; |
827 | 4 | ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length); |
828 | 4 | CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j); |
829 | 4 | word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0); |
830 | 4 | word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params); |
831 | 4 | ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]); |
832 | 4 | ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * batch_length, embedding_size); |
833 | 4 | CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j); |
834 | 4 | word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params); |
835 | 4 | pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params); |
836 | 4 | select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
837 | 4 | out[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
838 | 4 | ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length); |
839 | 4 | CCV_TENSOR_SET_DEVICE_ID(seq_params.type, j); |
840 | 4 | seq_indices_alias[j] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, seq_indices[j], ccv_nnc_no_ofs, DIM_ALLOC(), seq_params); |
841 | 4 | } |
842 | 65 | for (j = 0; j < batch_size; j++64 ) |
843 | 64 | { |
844 | 64 | int k; |
845 | 32.8k | for (k = 0; k < batch_length; k++32.7k ) |
846 | 32.7k | seq_indices_cpu->data.i32[j * batch_length + k] = k; |
847 | 64 | } |
848 | 5 | for (j = 0; j < device_count; j++4 ) |
849 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(seq_indices_cpu), TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, seq_indices[j])), 0); |
850 | 1 | ccv_nnc_tensor_variable_t tvin[device_count * 2]; |
851 | 5 | for (j = 0; j < device_count; j++4 ) |
852 | 4 | tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j]; |
853 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, 0); |
854 | 5 | for (j = 0; j < device_count; j++4 ) |
855 | 4 | tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices_alias[j]; |
856 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, 0); |
857 | 5 | for (j = 0; j < device_count; j++4 ) |
858 | 4 | tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j]; |
859 | 1 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, 0); |
860 | 5 | for (j = 0; j < device_count; j++4 ) |
861 | 4 | { |
862 | 4 | ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, batch_length, embedding_size); |
863 | 4 | CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j); |
864 | 4 | vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params); |
865 | 4 | ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, batch_length, batch_length); |
866 | 4 | CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j); |
867 | 4 | mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0); |
868 | 4 | vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params); |
869 | 4 | ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]); |
870 | 4 | } |
871 | 1 | ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 1, vec, device_count * 2, out, device_count, 0, 0); |
872 | 1 | int d; |
873 | 5 | for (d = 0; d < device_count; d++4 ) |
874 | 4 | { |
875 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, out[d], 0)), TENSOR_LIST(out_cpu), 0); |
876 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor[d + device_count * 2][0]), TENSOR_LIST(fit_cpu), 0); |
877 | 132 | for (j = 0; j < ccv_min(row_count - k - d * batch_size, batch_size); j++128 ) |
878 | 128 | { |
879 | 128 | const int truth = (fit_cpu->data.f32[j] > 0.5); |
880 | 128 | const int prediction = (out_cpu->data.f32[j] > 0); |
881 | 128 | if (truth == prediction) |
882 | 128 | ++correct; |
883 | 128 | } |
884 | 4 | } |
885 | 5 | for (j = 0; j < device_count; j++4 ) |
886 | 4 | { |
887 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]); |
888 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]); |
889 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]); |
890 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]); |
891 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]); |
892 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, out[j]); |
893 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]); |
894 | 4 | ccv_nnc_tensor_variable_free(dynamic_graph, seq_indices_alias[j]); |
895 | 4 | } |
896 | 1 | } |
897 | 1 | ccv_nnc_tensor_free(seq_indices_cpu); |
898 | 1 | ccv_cnnp_dataframe_iter_free(test_iter); |
899 | 1 | ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0); |
900 | 1 | ccv_cnnp_model_free(transformer); |
901 | 1 | ccv_cnnp_dataframe_iter_free(iter); |
902 | 1 | ccv_cnnp_dataframe_free(batched_data); |
903 | 1 | ccv_cnnp_dataframe_free(test_batched_data); |
904 | 1 | ccv_nnc_dynamic_graph_free(dynamic_graph); |
905 | 1 | ccv_nnc_tensor_free(out_cpu); |
906 | 1 | ccv_nnc_tensor_free(fit_cpu); |
907 | 1 | return correct; |
908 | 1 | } |
909 | | |
910 | | TEST_CASE("train a binary transformer classifier on imdb reviews to 80% with mix of dynamic graph and cnnp model and dynamic inputs") |
911 | 1 | { |
912 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS) && |
913 | 1 | ccv_nnc_cmd_ok(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_GPU_CUBLAS) && |
914 | 1 | ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
915 | 1 | ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN)); |
916 | 1 | const char* const train_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/train.txt"; |
917 | 1 | const char* const test_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/test.txt"; |
918 | 1 | const char* const vocab_file = "/fast/Data/IMDB_Movie_Reviews/aclImdb/imdb.vocab"; |
919 | 1 | const char* const base_dir = "/fast/Data/IMDB_Movie_Reviews/aclImdb/"; |
920 | 1 | FILE* train_open = fopen(train_list, "rb"); |
921 | 1 | FILE* test_open = fopen(test_list, "rb"); |
922 | 1 | FILE* vocab_open = fopen(vocab_file, "rb"); |
923 | 1 | if (train_open) |
924 | 1 | fclose(train_open); |
925 | 1 | if (test_open) |
926 | 1 | fclose(test_open); |
927 | 1 | if (vocab_open) |
928 | 1 | fclose(vocab_open); |
929 | 1 | if (!train_open || !test_open || !vocab_open) |
930 | 0 | { GUARD_ELSE_RETURN(0); } |
931 | 1 | khash_t(vocab_map)* vocab; |
932 | 1 | int vocab_size; |
933 | 1 | _vocab_init(vocab_file, &vocab, &vocab_size); |
934 | 1 | const int max_length = 512; |
935 | 1 | ccv_array_t* train_set; |
936 | 1 | ccv_cnnp_dataframe_t* train_data; |
937 | 1 | ccv_array_t* test_set; |
938 | 1 | ccv_cnnp_dataframe_t* test_data; |
939 | 1 | if (!ccv_is_coverage()) |
940 | 0 | { |
941 | 0 | train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 0); |
942 | 0 | train_data = ccv_cnnp_dataframe_from_array_new(train_set); |
943 | 0 | test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 0); |
944 | 0 | test_data = ccv_cnnp_dataframe_from_array_new(test_set); |
945 | 0 | const int correct = train_imdb_flex(10, vocab_size, 64, max_length, 128, train_data, test_data); |
946 | 0 | REQUIRE((float)correct / test_set->rnum > 0.80, "%f should be larger than 80%%", (float)correct / test_set->rnum); |
947 | 1 | } else { |
948 | 1 | train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 128); |
949 | 1 | train_data = ccv_cnnp_dataframe_from_array_new(train_set); |
950 | 1 | test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 128); |
951 | 1 | test_data = ccv_cnnp_dataframe_from_array_new(test_set); |
952 | 1 | train_imdb_flex(1, vocab_size, 64, max_length, 128, train_data, test_data); |
953 | 1 | } |
954 | 1 | ccv_cnnp_dataframe_free(train_data); |
955 | 1 | ccv_cnnp_dataframe_free(test_data); |
956 | 1 | int i; |
957 | 129 | for (i = 0; i < train_set->rnum; i++128 ) |
958 | 128 | { |
959 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->tensor); |
960 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->mask); |
961 | 128 | } |
962 | 1 | ccv_array_free(train_set); |
963 | 129 | for (i = 0; i < test_set->rnum; i++128 ) |
964 | 128 | { |
965 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->tensor); |
966 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->mask); |
967 | 128 | } |
968 | 1 | ccv_array_free(test_set); |
969 | 1 | _vocab_destroy(vocab); |
970 | 1 | } |
971 | | |
972 | | static ccv_cnnp_model_t* _classifier_lstm_new(const int batch_size, const int batch_length, const int num_layers, const int hidden_size, const float dropout) |
973 | 1 | { |
974 | 1 | ccv_cnnp_model_io_t const x = ccv_cnnp_input(); |
975 | 1 | ccv_cnnp_model_io_t const mask = ccv_cnnp_input(); |
976 | 1 | ccv_cnnp_model_io_t const index = ccv_cnnp_input(); |
977 | 1 | ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_lstm(1, hidden_size, 0, num_layers, 1, 1, 0, dropout, 1, 0), MODEL_IO_LIST(x, mask)); |
978 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(batch_size * batch_length, 128), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out)); |
979 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_index_select(0), MODEL_IO_LIST(out, index)); |
980 | | // Last layer, get it to 1. |
981 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_flatten(0), MODEL_IO_LIST(out)); |
982 | 1 | out = ccv_cnnp_model_apply(ccv_cnnp_dense(1, 0, 0, 1, 0), MODEL_IO_LIST(out)); |
983 | 1 | return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask, index), MODEL_IO_LIST(out), 1, "classifier"); |
984 | 1 | } |
985 | | |
986 | | typedef struct { |
987 | | int num_layers; |
988 | | int hidden_size; |
989 | | float dropout; |
990 | | } classifier_lstm_params_t; |
991 | | |
992 | | static ccv_cnnp_model_t* _dynamic_classifier_lstm(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context) |
993 | 1 | { |
994 | 1 | const classifier_lstm_params_t* const params = (classifier_lstm_params_t*)context; |
995 | 1 | const int batch_size = inputs[0].dim[0]; |
996 | 1 | const int batch_length = inputs[0].dim[1]; |
997 | 1 | return _classifier_lstm_new(batch_size, batch_length, params->num_layers, params->hidden_size, params->dropout); |
998 | 1 | } |
999 | | |
1000 | | static void _ccv_cnnp_mask_to_index(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
1001 | 4 | { |
1002 | 4 | int i, j; |
1003 | 8 | for (i = 0; i < batch_size; i++4 ) |
1004 | 4 | { |
1005 | 4 | ccv_nnc_tensor_t* const input = (ccv_nnc_tensor_t*)column_data[0][i]; |
1006 | 4 | ccv_nnc_tensor_t* output = (ccv_nnc_tensor_t*)data[i]; |
1007 | 4 | ccv_nnc_tensor_param_t params = input->info; |
1008 | 4 | output = output ? ccv_nnc_tensor_resize(output, params)2 : ccv_nnc_tensor_new(0, params, 0)2 ; |
1009 | 4 | int max_seq_length = 0; |
1010 | 260 | for (j = 0; j < params.dim[0]; j++256 ) |
1011 | 256 | if (input->data.i32[j] > max_seq_length) |
1012 | 11 | max_seq_length = input->data.i32[j]; |
1013 | 260 | for (j = 0; j < params.dim[0]; j++256 ) |
1014 | 256 | output->data.i32[j] = ccv_max(max_seq_length * j + input->data.i32[j] - 1, 0); |
1015 | 4 | data[i] = output; |
1016 | 4 | } |
1017 | 4 | } |
1018 | | |
1019 | | static void _ccv_cnnp_tensor_deinit(void* const data, void* const context) |
1020 | 2 | { |
1021 | 2 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)data); |
1022 | 2 | } |
1023 | | |
1024 | | static int train_imdb_lstm(const int epoch_limit, const int vocab_size, const int batch_size, const int max_length, const int embedding_size, ccv_cnnp_dataframe_t* const train_data, ccv_cnnp_dataframe_t* const test_data) |
1025 | 1 | { |
1026 | 1 | const int tensor_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, tensor), 0); |
1027 | 1 | const int one_hot_idx = ccv_cnnp_dataframe_copy_scalar(train_data, 0, offsetof(ccv_nnc_text_t, c), CCV_32S, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0); |
1028 | 1 | const int mask_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, mask), 0); |
1029 | 1 | ccv_cnnp_dataframe_t* const batched_data = ccv_cnnp_dataframe_combine_new(train_data, COLUMN_ID_LIST(tensor_idx, one_hot_idx, mask_idx), batch_size, 1, CCV_TENSOR_FORMAT_NCHW); |
1030 | 1 | const int test_tensor_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, tensor), 0); |
1031 | 1 | const int test_one_hot_idx = ccv_cnnp_dataframe_copy_scalar(test_data, 0, offsetof(ccv_nnc_text_t, c), CCV_32S, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0); |
1032 | 1 | const int test_mask_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, mask), 0); |
1033 | 1 | ccv_cnnp_dataframe_t* const test_batched_data = ccv_cnnp_dataframe_combine_new(test_data, COLUMN_ID_LIST(test_tensor_idx, test_one_hot_idx, test_mask_idx), batch_size, 1, CCV_TENSOR_FORMAT_NCHW); |
1034 | 1 | int gpu_batched[4]; |
1035 | 1 | int seq_len_batched[1]; |
1036 | 1 | int index_batched[1]; |
1037 | 1 | int data_batched[1]; |
1038 | 1 | int test_gpu_batched[4]; |
1039 | 1 | int test_seq_len_batched[1]; |
1040 | 1 | int test_index_batched[1]; |
1041 | 1 | int test_data_batched[1]; |
1042 | 1 | int i, j; |
1043 | 2 | for (i = 0; i < 1; i++1 ) |
1044 | 1 | { |
1045 | 1 | seq_len_batched[i] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3 + 2, 0); |
1046 | 1 | index_batched[i] = ccv_cnnp_dataframe_map(batched_data, _ccv_cnnp_mask_to_index, CCV_STREAM_CONTEXT_CPU, _ccv_cnnp_tensor_deinit, COLUMN_ID_LIST(seq_len_batched[i]), 0, 0, 0); |
1047 | 1 | index_batched[i] = ccv_cnnp_dataframe_make_tuple(batched_data, COLUMN_ID_LIST(index_batched[i]), 0); |
1048 | 1 | data_batched[i] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3, 0); |
1049 | 1 | test_seq_len_batched[i] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3 + 2, 0); |
1050 | 1 | test_index_batched[i] = ccv_cnnp_dataframe_map(test_batched_data, _ccv_cnnp_mask_to_index, CCV_STREAM_CONTEXT_CPU, _ccv_cnnp_tensor_deinit, COLUMN_ID_LIST(test_seq_len_batched[i]), 0, 0, 0); |
1051 | 1 | test_index_batched[i] = ccv_cnnp_dataframe_make_tuple(test_batched_data, COLUMN_ID_LIST(test_index_batched[i]), 0); |
1052 | 1 | test_data_batched[i] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3, 0); |
1053 | 1 | } |
1054 | 1 | const int trunc_data_batched = ccv_cnnp_dataframe_truncate(batched_data, data_batched, 1, seq_len_batched, 1, 0); |
1055 | 1 | const int test_trunc_data_batched = ccv_cnnp_dataframe_truncate(test_batched_data, test_data_batched, 1, test_seq_len_batched, 1, 0); |
1056 | 2 | for (i = 0; i < 1; i++1 ) |
1057 | 1 | { |
1058 | 1 | gpu_batched[i * 4] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, trunc_data_batched, i, 1, i, 0); |
1059 | 1 | gpu_batched[i * 4 + 1] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, 0, i * 3 + 1, 1, i, 0); |
1060 | 1 | gpu_batched[i * 4 + 2] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3 + 2, 0); |
1061 | 1 | gpu_batched[i * 4 + 3] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, index_batched[i], 0, 1, i, 0); |
1062 | 1 | test_gpu_batched[i * 4] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_trunc_data_batched, i, 1, i, 0); |
1063 | 1 | test_gpu_batched[i * 4 + 1] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, 0, i * 3 + 1, 1, i, 0); |
1064 | 1 | test_gpu_batched[i * 4 + 2] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3 + 2, 0); |
1065 | 1 | test_gpu_batched[i * 4 + 3] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_index_batched[i], 0, 1, i, 0); |
1066 | 1 | } |
1067 | 1 | ccv_cnnp_dataframe_iter_t* const iter = ccv_cnnp_dataframe_iter_new(batched_data, gpu_batched, 4); |
1068 | 1 | ccv_nnc_dynamic_graph_t* const dynamic_graph = ccv_nnc_dynamic_graph_new(); |
1069 | 1 | ccv_nnc_tensor_t* const vocab_vec_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, vocab_size, embedding_size), 0); |
1070 | 1 | ccv_nnc_cmd_exec(CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(vocab_vec_cpu), 0); |
1071 | 1 | ccv_nnc_tensor_variable_t vocab_vec[1]; |
1072 | 2 | for (i = 0; i < 1; i++1 ) |
1073 | 1 | { |
1074 | 1 | ccv_nnc_tensor_param_t vocab_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size); |
1075 | 1 | CCV_TENSOR_SET_DEVICE_ID(vocab_params.type, i); |
1076 | 1 | vocab_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params); |
1077 | 1 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(vocab_vec_cpu), TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, vocab_vec[i])), 0); |
1078 | 1 | } |
1079 | 1 | ccv_nnc_tensor_free(vocab_vec_cpu); |
1080 | 1 | classifier_lstm_params_t classifier_lstm_params = { |
1081 | 1 | .num_layers = 2, |
1082 | 1 | .hidden_size = 128, |
1083 | 1 | .dropout = 0.2, |
1084 | 1 | }; |
1085 | 1 | ccv_cnnp_model_t* const lstm = ccv_cnnp_dynamic_new(_dynamic_classifier_lstm, &classifier_lstm_params, 0); |
1086 | 1 | const int epoch_end = (ccv_cnnp_dataframe_row_count(train_data) + batch_size - 1) / batch_size; |
1087 | 1 | ccv_cnnp_dataframe_shuffle(train_data); |
1088 | 1 | ccv_nnc_cmd_t optim = CMD_LAMB_FORWARD(1, 0.001, 0.9, 0.999, 0, 1e-6); |
1089 | 1 | const int aux_size = ccv_nnc_minimizer_saved_aux_size(optim); |
1090 | 1 | ccv_nnc_tensor_variable_t saved_auxs[aux_size * 2]; |
1091 | 2 | for (i = 0; i < 1; i++1 ) |
1092 | 1 | { |
1093 | 3 | for (j = 0; j < aux_size; j++2 ) |
1094 | 2 | { |
1095 | 2 | ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size); |
1096 | 2 | CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i); |
1097 | 2 | saved_auxs[i * aux_size * 2 + j] = ccv_nnc_tensor_variable_new(dynamic_graph, saved_aux_params); |
1098 | 2 | } |
1099 | 3 | for (j = 0; j < aux_size; j++2 ) |
1100 | 2 | { |
1101 | 2 | ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size); |
1102 | 2 | CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i); |
1103 | 2 | saved_auxs[i* aux_size * 2 + aux_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph, saved_aux_params); |
1104 | 2 | } |
1105 | 1 | } |
1106 | 1 | ccv_nnc_tensor_t* const out_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 1), 0); |
1107 | 1 | ccv_nnc_tensor_t* const fit_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 1), 0); |
1108 | 1 | ccv_nnc_tensor_t** tensor[4]; |
1109 | 1 | int epoch = 0; |
1110 | 3 | for (i = 0; epoch < epoch_limit; i++2 ) |
1111 | 2 | { |
1112 | 2 | float learn_rate = 0.001; |
1113 | 2 | optim = CMD_LAMB_FORWARD(i + 1, learn_rate, 0.9, 0.999, 0, 1e-6); |
1114 | 2 | int status = ccv_cnnp_dataframe_iter_next(iter, (void**)tensor, 4, 0); |
1115 | 2 | assert(status == 0); |
1116 | 2 | ccv_nnc_tensor_t word_indices_tensor[1]; |
1117 | 2 | ccv_nnc_tensor_t mask_tensor[1]; |
1118 | 2 | ccv_nnc_tensor_t index_tensor[1]; |
1119 | 2 | ccv_nnc_tensor_variable_t word_indices[1]; |
1120 | 2 | ccv_nnc_tensor_variable_t word_vec[1]; |
1121 | 2 | ccv_nnc_tensor_variable_t vec[1 * 3]; |
1122 | 2 | ccv_nnc_tensor_variable_t out[1]; |
1123 | 4 | for (j = 0; j < 1; j++2 ) |
1124 | 2 | { |
1125 | 2 | const int batch_length = tensor[j * 4][0]->info.dim[1]; |
1126 | 2 | ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length); |
1127 | 2 | CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j); |
1128 | 2 | word_indices_tensor[j] = ccv_nnc_tensor(tensor[j * 4][0]->data.f32, word_indices_params, 0); |
1129 | 2 | word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params); |
1130 | 2 | ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]); |
1131 | 2 | ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * batch_length, embedding_size); |
1132 | 2 | CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j); |
1133 | 2 | word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params); |
1134 | 2 | ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, batch_length, embedding_size); |
1135 | 2 | CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j); |
1136 | 2 | vec[j * 3] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, word_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params); |
1137 | 2 | ccv_nnc_tensor_param_t mask_params = CPU_TENSOR_NCHW(32S, batch_size); |
1138 | 2 | assert(((ccv_nnc_tensor_t*)tensor[j * 4 + 2])->info.dim[0] == batch_size); |
1139 | 2 | CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j); |
1140 | 2 | ccv_nnc_tensor_param_t index_params = GPU_TENSOR_NCHW(000, 32S, batch_size); |
1141 | 2 | assert(tensor[j * 4 + 3][0]->info.dim[0] == batch_size); |
1142 | 2 | CCV_TENSOR_SET_DEVICE_ID(index_params.type, j); |
1143 | 2 | mask_tensor[j] = ccv_nnc_tensor(((ccv_nnc_tensor_t*)tensor[j * 4 + 2])->data.i32, mask_params, 0); |
1144 | 2 | index_tensor[j] = ccv_nnc_tensor(tensor[j * 4 + 3][0]->data.i32, index_params, 0); |
1145 | 2 | vec[j * 3 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params); |
1146 | 2 | vec[j * 3 + 2] = ccv_nnc_tensor_constant_new(dynamic_graph, index_params); |
1147 | 2 | ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 3 + 1], &mask_tensor[j]); |
1148 | 2 | ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 3 + 2], &index_tensor[j]); |
1149 | 2 | out[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
1150 | 2 | } |
1151 | 2 | ccv_nnc_tensor_variable_t tvin[1 * 2]; |
1152 | 4 | for (j = 0; j < 1; j++2 ) |
1153 | 2 | tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j]; |
1154 | 2 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, 2, word_vec, 1, 1, 0); |
1155 | 2 | ccv_nnc_dynamic_graph_evaluate(dynamic_graph, lstm, 0, vec, 3, out, 1, 0, 0); |
1156 | 2 | ccv_nnc_tensor_variable_t sigmoid[1]; |
1157 | 2 | ccv_nnc_tensor_variable_t fit[1]; |
1158 | 2 | ccv_nnc_tensor_variable_t vocab_vec_grad[1]; |
1159 | 4 | for (j = 0; j < 1; j++2 ) |
1160 | 2 | { |
1161 | 2 | sigmoid[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
1162 | 2 | fit[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
1163 | 2 | ccv_nnc_tensor_variable_set(dynamic_graph, fit[j], tensor[j * 4 + 1][0]); |
1164 | 2 | vocab_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
1165 | 2 | } |
1166 | 2 | ccv_nnc_tensor_variable_t tvout[2]; |
1167 | 4 | for (j = 0; j < 1; j++2 ) |
1168 | 2 | tvin[j * 2] = out[j], tvin[j * 2 + 1] = fit[j], tvout[j * 2] = 0, tvout[j * 2 + 1] = sigmoid[j]; |
1169 | 2 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, tvin, 2, tvout, 2, 1, 0); |
1170 | 4 | for (j = 0; j < 1; j++2 ) |
1171 | 2 | tvin[j] = vocab_vec[j], tvout[j] = vocab_vec_grad[j]; |
1172 | 2 | ccv_nnc_dynamic_graph_backward(dynamic_graph, sigmoid, 1, 0, tvin, 1, tvout, 1, 0); |
1173 | 2 | ccv_cnnp_model_set_minimizer(lstm, optim, 0, 0, 0); |
1174 | 4 | for (j = 0; j < 1; j++2 ) |
1175 | 2 | tvin[j] = vocab_vec_grad[j], tvout[j] = vocab_vec[j]; |
1176 | 2 | ccv_nnc_dynamic_graph_apply_gradients(dynamic_graph, optim, tvin, 1, tvout, 1, saved_auxs, 1, 0); |
1177 | 4 | for (j = 0; j < 1; j++2 ) |
1178 | 2 | { |
1179 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3]); |
1180 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3 + 1]); |
1181 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3 + 2]); |
1182 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]); |
1183 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]); |
1184 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, out[j]); |
1185 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, fit[j]); |
1186 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, sigmoid[j]); |
1187 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, vocab_vec_grad[j]); |
1188 | 2 | } |
1189 | 2 | if ((i + 1) % epoch_end == 0) |
1190 | 1 | { |
1191 | 1 | ++epoch; |
1192 | 1 | ccv_cnnp_dataframe_shuffle(train_data); |
1193 | 1 | ccv_cnnp_dataframe_iter_set_cursor(iter, 0); |
1194 | 1 | } |
1195 | 2 | } |
1196 | 1 | int correct = 0; |
1197 | 1 | ccv_cnnp_dataframe_iter_t* const test_iter = ccv_cnnp_dataframe_iter_new(test_batched_data, test_gpu_batched, 4); |
1198 | 1 | int k; |
1199 | 1 | ccv_cnnp_dataframe_shuffle(test_data); |
1200 | 1 | ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1); |
1201 | 1 | const int row_count = ccv_cnnp_dataframe_row_count(test_data); |
1202 | 3 | for (k = 0; k < row_count; k += batch_size2 ) |
1203 | 2 | { |
1204 | 2 | ccv_cnnp_dataframe_iter_next(test_iter, (void**)tensor, 4, 0); |
1205 | 2 | ccv_nnc_tensor_t word_indices_tensor[1]; |
1206 | 2 | ccv_nnc_tensor_t mask_tensor[1]; |
1207 | 2 | ccv_nnc_tensor_t index_tensor[1]; |
1208 | 2 | ccv_nnc_tensor_variable_t word_indices[1]; |
1209 | 2 | ccv_nnc_tensor_variable_t word_vec[1]; |
1210 | 2 | ccv_nnc_tensor_variable_t vec[3]; |
1211 | 2 | ccv_nnc_tensor_variable_t out[1]; |
1212 | 4 | for (j = 0; j < 1; j++2 ) |
1213 | 2 | { |
1214 | 2 | const int batch_length = tensor[j * 4][0]->info.dim[1]; |
1215 | 2 | ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length); |
1216 | 2 | CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j); |
1217 | 2 | word_indices_tensor[j] = ccv_nnc_tensor(tensor[j * 4][0]->data.f32, word_indices_params, 0); |
1218 | 2 | word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params); |
1219 | 2 | ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]); |
1220 | 2 | ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * batch_length, embedding_size); |
1221 | 2 | CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j); |
1222 | 2 | word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params); |
1223 | 2 | ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, batch_length, embedding_size); |
1224 | 2 | CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j); |
1225 | 2 | vec[j * 3] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, word_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params); |
1226 | 2 | ccv_nnc_tensor_param_t mask_params = CPU_TENSOR_NCHW(32S, batch_size); |
1227 | 2 | CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j); |
1228 | 2 | assert(((ccv_nnc_tensor_t*)tensor[j * 4 + 2])->info.dim[0] == batch_size); |
1229 | 2 | ccv_nnc_tensor_param_t index_params = GPU_TENSOR_NCHW(000, 32S, batch_size); |
1230 | 2 | CCV_TENSOR_SET_DEVICE_ID(index_params.type, j); |
1231 | 2 | assert(tensor[j * 4 + 3][0]->info.dim[0] == batch_size); |
1232 | 2 | mask_tensor[j] = ccv_nnc_tensor(((ccv_nnc_tensor_t*)tensor[j * 4 + 2])->data.i32, mask_params, 0); |
1233 | 2 | index_tensor[j] = ccv_nnc_tensor(tensor[j * 4 + 3][0]->data.i32, index_params, 0); |
1234 | 2 | vec[j * 3 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params); |
1235 | 2 | ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 3 + 1], &mask_tensor[j]); |
1236 | 2 | vec[j * 3 + 2] = ccv_nnc_tensor_constant_new(dynamic_graph, index_params); |
1237 | 2 | ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 3 + 2], &index_tensor[j]); |
1238 | 2 | out[j] = ccv_nnc_tensor_variable_new(dynamic_graph); |
1239 | 2 | } |
1240 | 2 | ccv_nnc_tensor_variable_t tvin[2]; |
1241 | 4 | for (j = 0; j < 1; j++2 ) |
1242 | 2 | tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j]; |
1243 | 2 | ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, 2, word_vec, 1, 1, 0); |
1244 | 2 | ccv_nnc_dynamic_graph_evaluate(dynamic_graph, lstm, 1, vec, 3, out, 1, 0, 0); |
1245 | 2 | int d; |
1246 | 4 | for (d = 0; d < 1; d++2 ) |
1247 | 2 | { |
1248 | 2 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, out[d])), TENSOR_LIST(out_cpu), 0); |
1249 | 2 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor[d * 4 + 1][0]), TENSOR_LIST(fit_cpu), 0); |
1250 | 130 | for (j = 0; j < ccv_min(row_count - k - d * batch_size, batch_size); j++128 ) |
1251 | 128 | { |
1252 | 128 | const int truth = (fit_cpu->data.f32[j] > 0.5); |
1253 | 128 | const int prediction = (out_cpu->data.f32[j] > 0); |
1254 | 128 | if (truth == prediction) |
1255 | 91 | ++correct; |
1256 | 128 | } |
1257 | 2 | } |
1258 | 4 | for (j = 0; j < 1; j++2 ) |
1259 | 2 | { |
1260 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3]); |
1261 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3 + 1]); |
1262 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3 + 2]); |
1263 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]); |
1264 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]); |
1265 | 2 | ccv_nnc_tensor_variable_free(dynamic_graph, out[j]); |
1266 | 2 | } |
1267 | 2 | } |
1268 | 1 | ccv_cnnp_dataframe_iter_free(test_iter); |
1269 | 1 | ccv_cnnp_model_free(lstm); |
1270 | 1 | ccv_cnnp_dataframe_iter_free(iter); |
1271 | 1 | ccv_cnnp_dataframe_free(batched_data); |
1272 | 1 | ccv_cnnp_dataframe_free(test_batched_data); |
1273 | 1 | ccv_nnc_dynamic_graph_free(dynamic_graph); |
1274 | 1 | ccv_nnc_tensor_free(out_cpu); |
1275 | 1 | return correct; |
1276 | 1 | } |
1277 | | |
1278 | | TEST_CASE("train a binary LSTM classifier on imdb reviews to 80% with mix of dynamic graph and cnnp model and dynamic inputs") |
1279 | 1 | { |
1280 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS) && |
1281 | 1 | ccv_nnc_cmd_ok(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_GPU_CUBLAS) && |
1282 | 1 | ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) && |
1283 | 1 | ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN)); |
1284 | 1 | const char* const train_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/train.txt"; |
1285 | 1 | const char* const test_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/test.txt"; |
1286 | 1 | const char* const vocab_file = "/fast/Data/IMDB_Movie_Reviews/aclImdb/imdb.vocab"; |
1287 | 1 | const char* const base_dir = "/fast/Data/IMDB_Movie_Reviews/aclImdb/"; |
1288 | 1 | FILE* train_open = fopen(train_list, "rb"); |
1289 | 1 | FILE* test_open = fopen(test_list, "rb"); |
1290 | 1 | FILE* vocab_open = fopen(vocab_file, "rb"); |
1291 | 1 | if (train_open) |
1292 | 1 | fclose(train_open); |
1293 | 1 | if (test_open) |
1294 | 1 | fclose(test_open); |
1295 | 1 | if (vocab_open) |
1296 | 1 | fclose(vocab_open); |
1297 | 1 | if (!train_open || !test_open || !vocab_open) |
1298 | 0 | { GUARD_ELSE_RETURN(0); } |
1299 | 1 | khash_t(vocab_map)* vocab; |
1300 | 1 | int vocab_size; |
1301 | 1 | _vocab_init(vocab_file, &vocab, &vocab_size); |
1302 | 1 | const int max_length = 512; |
1303 | 1 | ccv_array_t* train_set; |
1304 | 1 | ccv_cnnp_dataframe_t* train_data; |
1305 | 1 | ccv_array_t* test_set; |
1306 | 1 | ccv_cnnp_dataframe_t* test_data; |
1307 | 1 | if (!ccv_is_coverage()) |
1308 | 0 | { |
1309 | 0 | train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 0); |
1310 | 0 | train_data = ccv_cnnp_dataframe_from_array_new(train_set); |
1311 | 0 | test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 0); |
1312 | 0 | test_data = ccv_cnnp_dataframe_from_array_new(test_set); |
1313 | 0 | const int correct = train_imdb_lstm(3, vocab_size, 64, max_length, 128, train_data, test_data); |
1314 | 0 | REQUIRE((float)correct / test_set->rnum > 0.80, "%f should be larger than 80%%", (float)correct / test_set->rnum); |
1315 | 1 | } else { |
1316 | 1 | train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 128); |
1317 | 1 | train_data = ccv_cnnp_dataframe_from_array_new(train_set); |
1318 | 1 | test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 128); |
1319 | 1 | test_data = ccv_cnnp_dataframe_from_array_new(test_set); |
1320 | 1 | train_imdb_lstm(1, vocab_size, 64, max_length, 128, train_data, test_data); |
1321 | 1 | } |
1322 | 1 | ccv_cnnp_dataframe_free(train_data); |
1323 | 1 | ccv_cnnp_dataframe_free(test_data); |
1324 | 1 | int i; |
1325 | 129 | for (i = 0; i < train_set->rnum; i++128 ) |
1326 | 128 | { |
1327 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->tensor); |
1328 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->mask); |
1329 | 128 | } |
1330 | 1 | ccv_array_free(train_set); |
1331 | 129 | for (i = 0; i < test_set->rnum; i++128 ) |
1332 | 128 | { |
1333 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->tensor); |
1334 | 128 | ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->mask); |
1335 | 128 | } |
1336 | 1 | ccv_array_free(test_set); |
1337 | 1 | _vocab_destroy(vocab); |
1338 | 1 | } |
1339 | | |
1340 | | #include "case_main.h" |