Coverage Report

Created: 2021-04-12 01:11

/home/liu/buildslave/linux-x64-runtests/build/test/int/nnc/imdb.tests.c
Line
Count
Source (jump to first uncovered line)
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <ccv_internal.h>
6
#include <nnc/ccv_nnc.h>
7
#include <nnc/ccv_nnc_easy.h>
8
#include <3rdparty/dsfmt/dSFMT.h>
9
#include <ctype.h>
10
#include <3rdparty/khash/khash.h>
11
12
TEST_SETUP()
13
{
14
  ccv_nnc_init();
15
}
16
17
KHASH_MAP_INIT_STR(vocab_map, int)
18
19
static CCV_WARN_UNUSED(ccv_nnc_tensor_t*) _text_to_tensor_index(const char* const filename, const khash_t(vocab_map)* const vocab, const int vocab_size, const int max_length)
20
512
{
21
512
  const int end_flag = vocab_size - 2;
22
512
  const int pad_flag = vocab_size - 1;
23
512
  char* const word = (char*)ccmalloc(1024);
24
512
  ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, max_length), 0);
25
512
  FILE* const file = fopen(filename, "r");
26
512
  int t = 0;
27
122k
  while (fscanf(file, "%1023s", word) != EOF)
28
121k
  {
29
121k
    if (t >= max_length)
30
40
      break;
31
121k
    int j;
32
690k
    for(j = 0; word[j]; 
j++568k
)
33
568k
      word[j] = tolower(word[j]);
34
121k
    char* saveptr;
35
121k
    const char* token = strtok_r(word, ".,<>/~`@#$%^&*+\\\"", &saveptr);
36
245k
    while (token)
37
123k
    {
38
123k
      if (t >= max_length)
39
0
        break;
40
123k
      const khiter_t k = kh_get(vocab_map, vocab, token);
41
123k
      if (k != kh_end(vocab))
42
123k
        
tensor->data.i32[t++] = 117k
kh_val117k
(vocab, k);
43
123k
      token = strtok_r(0, ".,<>/~`@#$%^&*+\\\"", &saveptr);
44
123k
    }
45
121k
  }
46
512
  fclose(file);
47
512
  if (t < max_length)
48
472
  {
49
472
    tensor->data.i32[t] = end_flag;
50
144k
    for (++t; t < max_length; 
t++143k
)
51
143k
      tensor->data.i32[t] = pad_flag;
52
472
  }
53
512
  ccfree(word);
54
512
  return tensor;
55
512
}
56
57
typedef struct {
58
  ccv_nnc_tensor_t* tensor;
59
  ccv_nnc_tensor_t* mask;
60
  int c;
61
} ccv_nnc_text_t;
62
63
static ccv_array_t* _array_from_disk_new(const char* const list, const char* const base_dir, const khash_t(vocab_map)* const vocab, const int vocab_size, const int max_length, const int limit)
64
4
{
65
4
  FILE *r = fopen(list, "r");
66
4
  assert(r && "list doesn't exists");
67
4
  const int pad_flag = vocab_size - 1;
68
4
  int dirlen = (base_dir != 0) ? strlen(base_dir) + 1 : 
00
;
69
4
  ccv_array_t* categorizeds = ccv_array_new(sizeof(ccv_nnc_text_t), 64, 0);
70
4
  int c;
71
4
  char* file = (char*)ccmalloc(1024);
72
4
  char* filename = (char*)ccmalloc(1024);
73
512
  while (fscanf(r, "%d %1023s", &c, file) != EOF)
74
512
  {
75
512
    if (base_dir != 0)
76
512
    {
77
512
      strncpy(filename, base_dir, 1024);
78
512
      filename[dirlen - 1] = '/';
79
512
    }
80
512
    strncpy(filename + dirlen, file, 1024 - dirlen);
81
512
    ccv_nnc_tensor_t* const tensor = _text_to_tensor_index(filename, vocab, vocab_size, max_length);
82
512
    int length = 0;
83
512
    int i;
84
119k
    for (i = 0; !length && 
i < max_length118k
;
i++118k
)
85
118k
      if (tensor->data.i32[i] == pad_flag)
86
472
        length = i;
87
512
    ccv_nnc_tensor_t* const mask = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 1), 0);
88
512
    mask->data.i32[0] = length;
89
512
    ccv_nnc_text_t categorized = {
90
512
      .tensor = tensor,
91
512
      .mask = mask,
92
512
      .c = c
93
512
    };
94
512
    ccv_array_push(categorizeds, &categorized);
95
512
    if (limit > 0 && categorizeds->rnum >= limit)
96
4
      break;
97
512
  }
98
4
  ccfree(filename);
99
4
  ccfree(file);
100
4
  fclose(r);
101
4
  return categorizeds;
102
4
}
103
104
static ccv_cnnp_model_t* _self_attention_new(const int k, const int h, const int b, const int t, const float dropout)
105
6
{
106
6
  const ccv_cnnp_model_io_t x = ccv_cnnp_input();
107
6
  ccv_cnnp_model_io_t mask = ccv_cnnp_input();
108
6
  ccv_cnnp_model_io_t multiheads = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(b * t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(x));
109
6
  ccv_cnnp_model_t* const tokeys = ccv_cnnp_dense(k * h, 1, "tokeys");
110
6
  ccv_cnnp_model_t* const toqueries = ccv_cnnp_dense(k * h, 1, "toqueries");
111
6
  ccv_cnnp_model_t* const tovalues = ccv_cnnp_dense(k * h, 1, "tovalues");
112
6
  ccv_cnnp_model_io_t keys = ccv_cnnp_model_apply(tokeys, MODEL_IO_LIST(multiheads));
113
6
  ccv_cnnp_model_io_t queries = ccv_cnnp_model_apply(toqueries, MODEL_IO_LIST(multiheads));
114
6
  ccv_cnnp_model_io_t values = ccv_cnnp_model_apply(tovalues, MODEL_IO_LIST(multiheads));
115
6
  keys = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(t, b, h, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(keys));
116
6
  queries = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(t, b, h, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(queries));
117
6
  values = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(t, b, h, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(values));
118
6
  keys = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(keys));
119
6
  queries = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(queries));
120
6
  values = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(values));
121
6
  keys = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(b * h, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(keys));
122
6
  queries = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(b * h, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(queries));
123
6
  values = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(b * h, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(values));
124
6
  ccv_cnnp_model_io_t dot = ccv_cnnp_model_apply(ccv_cnnp_matmul(NO_TRANSPOSE, TRANSPOSE(1, 2), 0), MODEL_IO_LIST(queries, keys));
125
6
  const float scale = 1. / sqrt(k);
126
6
  dot = ccv_cnnp_model_apply(ccv_cnnp_scalar_mul(scale, 0), MODEL_IO_LIST(dot));
127
6
  dot = ccv_cnnp_model_apply(ccv_cnnp_masked_fill(0, -1e9, 0), MODEL_IO_LIST(dot, mask));
128
6
  dot = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(b * h * t, t), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(dot));
129
6
  dot = ccv_cnnp_model_apply(ccv_cnnp_softmax(0), MODEL_IO_LIST(dot));
130
6
  if (dropout > 0)
131
6
    dot = ccv_cnnp_model_apply(ccv_cnnp_dropout(dropout, 0, 0), MODEL_IO_LIST(dot));
132
6
  dot = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(b * h, t, t), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(dot));
133
6
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_matmul(NO_TRANSPOSE, NO_TRANSPOSE, 0), MODEL_IO_LIST(dot, values));
134
6
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(h, b, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
135
6
  out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(out));
136
6
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(b * t, h * k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
137
6
  ccv_cnnp_model_t* const unifyheads = ccv_cnnp_dense(k, 0, "unifyheads");
138
6
  out = ccv_cnnp_model_apply(unifyheads, MODEL_IO_LIST(out));
139
6
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(t, b, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
140
6
  return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), "self-attention");
141
6
}
142
143
static ccv_cnnp_model_t* _transformer_block_new(const int k, const int h, const int b, const int t, const int ff, const float dropout)
144
6
{
145
6
  ccv_cnnp_model_io_t const x = ccv_cnnp_input();
146
6
  ccv_cnnp_model_io_t const mask = ccv_cnnp_input();
147
6
  ccv_cnnp_model_t* const self_attention = _self_attention_new(k, h, b, t, dropout);
148
6
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(self_attention, MODEL_IO_LIST(x, mask));
149
6
  out = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(x, out));
150
6
  ccv_cnnp_model_io_t first = ccv_cnnp_model_apply(ccv_cnnp_layer_norm(1e-5, DIM_ALLOC(2), 1, 0), MODEL_IO_LIST(out));
151
6
  if (dropout)
152
6
    out = ccv_cnnp_model_apply(ccv_cnnp_dropout(dropout, 0, 0), MODEL_IO_LIST(first));
153
0
  else
154
0
    out = first;
155
6
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(b * t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
156
6
  out = ccv_cnnp_model_apply(ccv_cnnp_dense(ff, 0, 0), MODEL_IO_LIST(out));
157
6
  out = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(out));
158
6
  out = ccv_cnnp_model_apply(ccv_cnnp_dense(k, 0, 0), MODEL_IO_LIST(out));
159
6
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(t, b, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
160
6
  out = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(first, out));
161
6
  out = ccv_cnnp_model_apply(ccv_cnnp_layer_norm(1e-5, DIM_ALLOC(2), 1, 0), MODEL_IO_LIST(out));
162
6
  if (dropout > 0)
163
6
    out = ccv_cnnp_model_apply(ccv_cnnp_dropout(dropout, 0, 0), MODEL_IO_LIST(out));
164
6
  return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), "transformer");
165
6
}
166
167
static ccv_cnnp_model_t* _classifier_transformer_new(const int layers, const int k, const int h, const int b, const int t, const int ff, const float dropout)
168
1
{
169
1
  ccv_cnnp_model_io_t const x = ccv_cnnp_input();
170
1
  ccv_cnnp_model_io_t const mask = ccv_cnnp_input();
171
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(x));
172
1
  int i;
173
3
  for (i = 0; i < layers; 
i++2
)
174
2
    out = ccv_cnnp_model_apply(_transformer_block_new(k, h, b, t, ff, dropout), MODEL_IO_LIST(out, mask));
175
1
  out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(out)); // t, b, k -> b, t, k
176
1
  out = ccv_cnnp_model_apply(ccv_cnnp_transpose(1, 2, 0), MODEL_IO_LIST(out)); // b, t, k -> b, k, t
177
1
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(b, k, t, 1), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
178
1
  out = ccv_cnnp_model_apply(ccv_cnnp_average_pool(DIM_ALLOC(0, 0), ccv_nnc_no_hint, 0), MODEL_IO_LIST(out));
179
1
  // Last layer, get it to 2.
180
1
  out = ccv_cnnp_model_apply(ccv_cnnp_flatten(0), MODEL_IO_LIST(out));
181
1
  out = ccv_cnnp_model_apply(ccv_cnnp_dense(2, 0, 0), MODEL_IO_LIST(out));
182
1
  return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), "classifier");
183
1
}
184
185
typedef struct {
186
  int layers;
187
  int h;
188
  int ff;
189
  float dropout;
190
} classifier_transformer_params_t;
191
192
static ccv_cnnp_model_t* _dynamic_classifier_transformer(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context)
193
1
{
194
1
  const classifier_transformer_params_t* const params = (classifier_transformer_params_t*)context;
195
1
  const int b = inputs[0].dim[0];
196
1
  const int t = inputs[0].dim[1];
197
1
  const int k = inputs[0].dim[2];
198
1
  const int ff = params->ff * k;
199
1
  return _classifier_transformer_new(params->layers, k, params->h, b, t, ff, params->dropout);
200
1
}
201
202
static ccv_cnnp_model_t* _binary_classifier_transformer_new(const int layers, const int k, const int h, const int b, const int t, const int ff, const float dropout)
203
2
{
204
2
  ccv_cnnp_model_io_t const x = ccv_cnnp_input();
205
2
  ccv_cnnp_model_io_t const mask = ccv_cnnp_input();
206
2
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(x));
207
2
  int i;
208
6
  for (i = 0; i < layers; 
i++4
)
209
4
    out = ccv_cnnp_model_apply(_transformer_block_new(k, h, b, t, ff, dropout), MODEL_IO_LIST(out, mask));
210
2
  out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(out)); // t, b, k -> b, t, k
211
2
  out = ccv_cnnp_model_apply(ccv_cnnp_transpose(1, 2, 0), MODEL_IO_LIST(out)); // b, t, k -> b, k, t
212
2
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(DIM_ALLOC(b, k, t, 1), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
213
2
  out = ccv_cnnp_model_apply(ccv_cnnp_average_pool(DIM_ALLOC(0, 0), ccv_nnc_no_hint, 0), MODEL_IO_LIST(out));
214
2
  // Last layer, get it to 1.
215
2
  out = ccv_cnnp_model_apply(ccv_cnnp_flatten(0), MODEL_IO_LIST(out));
216
2
  out = ccv_cnnp_model_apply(ccv_cnnp_dense(1, 0, 0), MODEL_IO_LIST(out));
217
2
  return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), "classifier");
218
2
}
219
220
static ccv_cnnp_model_t* _dynamic_binary_classifier_transformer(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context)
221
2
{
222
2
  const classifier_transformer_params_t* const params = (classifier_transformer_params_t*)context;
223
2
  const int b = inputs[0].dim[0];
224
2
  const int t = inputs[0].dim[1];
225
2
  const int k = inputs[0].dim[2];
226
2
  const int ff = params->ff * k;
227
2
  return _binary_classifier_transformer_new(params->layers, k, params->h, b, t, ff, params->dropout);
228
2
}
229
230
static void _vocab_init(const char* const vocab_file, khash_t(vocab_map)** const vocab_ref, int* const vocab_size_ref)
231
2
{
232
2
  FILE* const vocab_ptr = fopen(vocab_file, "r");
233
2
  khash_t(vocab_map)* const vocab = kh_init(vocab_map);
234
2
  int i, ret;
235
2
  char* const word = (char*)ccmalloc(1024);
236
179k
  for (i = 0; fscanf(vocab_ptr, "%1023s", word) != EOF; 
i++179k
)
237
179k
  {
238
179k
    const khiter_t k = kh_put(vocab_map, vocab, strdup(word), &ret);
239
179k
    kh_val(vocab, k) = i;
240
179k
  }
241
2
  ccfree(word);
242
2
  fclose(vocab_ptr);
243
2
  *vocab_ref = vocab;
244
2
  *vocab_size_ref = i;
245
2
}
246
247
static void _vocab_destroy(khash_t(vocab_map)* const vocab)
248
2
{
249
2
  // Free keys.
250
262k
  for (khiter_t k = 
kh_begin2
(vocab); k != kh_end(vocab);
k++262k
)
251
262k
    if (kh_exist(vocab, k))
252
262k
      
free((void*)179k
kh_key179k
(vocab, k));
253
2
  kh_destroy(vocab_map, vocab);
254
2
}
255
256
static int train_imdb_fix(const int epoch_limit, const int vocab_size, const int batch_size, const int max_length, const int embedding_size, ccv_cnnp_dataframe_t* const train_data, ccv_cnnp_dataframe_t* const test_data)
257
1
{
258
1
  const int tensor_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, tensor), 0);
259
1
  const int one_hot_idx = ccv_cnnp_dataframe_one_hot(train_data, 0, offsetof(ccv_nnc_text_t, c), 2, 1, 0, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0);
260
1
  const int mask_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, mask), 0);
261
1
  const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
262
1
  ccv_cnnp_dataframe_t* const batched_data = ccv_cnnp_dataframe_combine_new(train_data, COLUMN_ID_LIST(tensor_idx, one_hot_idx, mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW);
263
1
  const int test_tensor_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, tensor), 0);
264
1
  const int test_one_hot_idx = ccv_cnnp_dataframe_one_hot(test_data, 0, offsetof(ccv_nnc_text_t, c), 2, 1, 0, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0);
265
1
  const int test_mask_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, mask), 0);
266
1
  ccv_cnnp_dataframe_t* const test_batched_data = ccv_cnnp_dataframe_combine_new(test_data, COLUMN_ID_LIST(test_tensor_idx, test_one_hot_idx, test_mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW);
267
1
  int gpu_batched[device_count * 2];
268
1
  int test_gpu_batched[device_count * 2];
269
1
  int i, j;
270
5
  for (i = 0; i < device_count; 
i++4
)
271
4
  {
272
4
    const int seq_len_batched = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3 + 2, 0);
273
4
    const int tupled_mask_batched = ccv_cnnp_dataframe_one_squared(batched_data, COLUMN_ID_LIST(seq_len_batched), 0, max_length, 0);
274
4
    gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, 0, i * 3, 2, i, 0);
275
4
    gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, tupled_mask_batched, 0, 1, i, 0);
276
4
    const int test_seq_len_batched = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3 + 2, 0);
277
4
    const int test_tupled_mask_batched = ccv_cnnp_dataframe_one_squared(test_batched_data, COLUMN_ID_LIST(test_seq_len_batched), 0, max_length, 0);
278
4
    test_gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, 0, i * 3, 2, i, 0);
279
4
    test_gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_tupled_mask_batched, 0, 1, i, 0);
280
4
  }
281
1
  ccv_cnnp_dataframe_iter_t* const iter = ccv_cnnp_dataframe_iter_new(batched_data, gpu_batched, device_count * 2);
282
1
  ccv_nnc_dynamic_graph_t* const dynamic_graph = ccv_nnc_dynamic_graph_new();
283
1
  ccv_nnc_tensor_variable_t vocab_vec[device_count];
284
1
  ccv_nnc_tensor_variable_t seq_vec[device_count];
285
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1);
286
1
  ccv_nnc_tensor_param_t vocab_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size);
287
1
  vocab_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params);
288
1
  ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(vocab_vec[0]), 0, 0);
289
1
  ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size);
290
1
  seq_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params);
291
1
  ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(seq_vec[0]), 0, 0);
292
4
  for (i = 1; i < device_count; 
i++3
)
293
3
  {
294
3
    CCV_TENSOR_SET_DEVICE_ID(vocab_params.type, i);
295
3
    vocab_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params);
296
3
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(vocab_vec[0]), TENSOR_VARIABLE_LIST(vocab_vec[i]), 0, 0);
297
3
    CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i);
298
3
    seq_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params);
299
3
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(seq_vec[0]), TENSOR_VARIABLE_LIST(seq_vec[i]), 0, 0);
300
3
  }
301
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0);
302
1
  ccv_nnc_tensor_t* const seq_indices_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32S, batch_size * max_length), 0);
303
65
  for (i = 0; i < batch_size; 
i++64
)
304
32.8k
    
for (j = 0; 64
j < max_length;
j++32.7k
)
305
32.7k
      seq_indices_cpu->data.i32[i * max_length + j] = j;
306
1
  ccv_nnc_tensor_variable_t seq_indices[device_count];
307
5
  for (i = 0; i < device_count; 
i++4
)
308
4
  {
309
4
    ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length);
310
4
    CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i);
311
4
    seq_indices[i] = ccv_nnc_tensor_constant_new(dynamic_graph, seq_params);
312
4
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(seq_indices_cpu), TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, seq_indices[i], 0)), 0);
313
4
  }
314
1
  ccv_nnc_tensor_free(seq_indices_cpu);
315
1
  classifier_transformer_params_t classifier_transformer_params = {
316
1
    .layers = 2,
317
1
    .h = 8,
318
1
    .ff = 4,
319
1
    .dropout = 0.1,
320
1
  };
321
1
  ccv_cnnp_model_t* const transformer = ccv_cnnp_dynamic_new(_dynamic_classifier_transformer, &classifier_transformer_params, 0);
322
1
  ccv_cnnp_model_set_data_parallel(transformer, device_count);
323
1
  const int epoch_end = (ccv_cnnp_dataframe_row_count(train_data) + device_count * batch_size - 1) / (device_count * batch_size);
324
1
  ccv_cnnp_dataframe_shuffle(train_data);
325
1
  ccv_nnc_cmd_t adam = CMD_ADAM_FORWARD(1, 0.0001, 0.9, 0.98, 0, 1e-9);
326
1
  const int aux_size = ccv_nnc_minimizer_saved_aux_size(adam);
327
1
  ccv_nnc_tensor_variable_t saved_auxs[device_count * aux_size * 2];
328
5
  for (i = 0; i < device_count; 
i++4
)
329
4
  {
330
12
    for (j = 0; j < aux_size; 
j++8
)
331
8
    {
332
8
      ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size);
333
8
      CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i);
334
8
      saved_auxs[i * aux_size * 2 + j] = ccv_nnc_tensor_variable_new(dynamic_graph);
335
8
    }
336
12
    for (j = 0; j < aux_size; 
j++8
)
337
8
    {
338
8
      ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size);
339
8
      CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i);
340
8
      saved_auxs[i* aux_size * 2 + aux_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph);
341
8
    }
342
4
  }
343
1
  ccv_nnc_tensor_t* const out_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 2), 0);
344
1
  ccv_nnc_tensor_t* const fit_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 2), 0);
345
1
  ccv_nnc_tensor_t** tensor[device_count * 2];
346
1
  int epoch = 0;
347
1
  ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
348
2
  for (i = 0; epoch < epoch_limit; 
i++1
)
349
1
  {
350
1
    float learn_rate = 0.0001 * ccv_min(i / (10000. / batch_size), 1) * device_count;
351
1
    adam = CMD_ADAM_FORWARD(i + 1, learn_rate, 0.9, 0.98, 0, 1e-9);
352
1
    ccv_cnnp_dataframe_iter_next(iter, (void**)tensor, device_count, stream);
353
1
    ccv_nnc_tensor_t word_indices_tensor[device_count];
354
1
    ccv_nnc_tensor_t mask_tensor[device_count];
355
1
    ccv_nnc_tensor_variable_t word_indices[device_count];
356
1
    ccv_nnc_tensor_variable_t word_vec[device_count];
357
1
    ccv_nnc_tensor_variable_t pos_vec[device_count];
358
1
    ccv_nnc_tensor_variable_t select_vec[device_count];
359
1
    ccv_nnc_tensor_variable_t vec[device_count * 2];
360
1
    ccv_nnc_tensor_variable_t out[device_count];
361
5
    for (j = 0; j < device_count; 
j++4
)
362
4
    {
363
4
      ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length);
364
4
      CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j);
365
4
      word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0);
366
4
      word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params);
367
4
      ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]);
368
4
      ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * max_length, embedding_size);
369
4
      CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j);
370
4
      word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
371
4
      pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
372
4
      select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
373
4
      ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, max_length, embedding_size);
374
4
      CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j);
375
4
      vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params);
376
4
      out[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
377
4
    }
378
1
    ccv_nnc_tensor_variable_t tvin[device_count * 2];
379
5
    for (j = 0; j < device_count; 
j++4
)
380
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j];
381
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, stream);
382
5
    for (j = 0; j < device_count; 
j++4
)
383
4
      tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices[j];
384
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, stream);
385
5
    for (j = 0; j < device_count; 
j++4
)
386
4
      tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j];
387
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, stream);
388
1
    ccv_cnnp_dataframe_iter_peek(iter, (void**)(tensor + device_count), device_count, device_count, stream);
389
5
    for (j = 0; j < device_count; 
j++4
)
390
4
    {
391
4
      ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, max_length, max_length);
392
4
      CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j);
393
4
      mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0);
394
4
      vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params);
395
4
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]);
396
4
    }
397
1
    ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 0, vec, device_count * 2, out, device_count, 0, stream);
398
1
    ccv_nnc_tensor_variable_t softmax[device_count];
399
1
    ccv_nnc_tensor_variable_t fit[device_count];
400
1
    ccv_nnc_tensor_variable_t vocab_vec_grad[device_count];
401
1
    ccv_nnc_tensor_variable_t seq_vec_grad[device_count];
402
5
    for (j = 0; j < device_count; 
j++4
)
403
4
    {
404
4
      softmax[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
405
4
      fit[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
406
4
      ccv_nnc_tensor_variable_set(dynamic_graph, fit[j], tensor[j][1]);
407
4
      vocab_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
408
4
      seq_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
409
4
    }
410
1
    ccv_nnc_tensor_variable_t tvout[device_count * 2];
411
5
    for (j = 0; j < device_count; 
j++4
)
412
4
      tvin[j * 2] = out[j], tvin[j * 2 + 1] = fit[j], tvout[j * 2] = 0, tvout[j * 2 + 1] = softmax[j];
413
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_SOFTMAX_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, tvout, device_count * 2, device_count, stream);
414
5
    for (j = 0; j < device_count; 
j++4
)
415
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = seq_vec[j], tvout[j * 2] = vocab_vec_grad[j], tvout[j * 2 + 1] = seq_vec_grad[j];
416
1
    ccv_nnc_dynamic_graph_backward(dynamic_graph, softmax, device_count, 0, tvin, device_count * 2, tvout, device_count * 2, stream);
417
1
    ccv_cnnp_model_set_minimizer(transformer, adam, 0, 0, 0);
418
5
    for (j = 0; j < device_count; 
j++4
)
419
4
      tvin[j * 2] = vocab_vec_grad[j], tvin[j * 2 + 1] = seq_vec_grad[j], tvout[j * 2] = vocab_vec[j], tvout[j * 2 + 1] = seq_vec[j];
420
1
    ccv_nnc_dynamic_graph_apply_gradients(dynamic_graph, adam, tvin, device_count * 2, tvout, device_count * 2, saved_auxs, device_count, stream);
421
1
    ccv_nnc_stream_context_wait(stream);
422
5
    for (j = 0; j < device_count; 
j++4
)
423
4
    {
424
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]);
425
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]);
426
4
      ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]);
427
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]);
428
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]);
429
4
      ccv_nnc_tensor_variable_free(dynamic_graph, out[j]);
430
4
      ccv_nnc_tensor_variable_free(dynamic_graph, fit[j]);
431
4
      ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]);
432
4
      ccv_nnc_tensor_variable_free(dynamic_graph, softmax[j]);
433
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vocab_vec_grad[j]);
434
4
      ccv_nnc_tensor_variable_free(dynamic_graph, seq_vec_grad[j]);
435
4
    }
436
1
    if ((i + 1) % epoch_end == 0)
437
1
    {
438
1
      ++epoch;
439
1
      ccv_cnnp_dataframe_shuffle(train_data);
440
1
      ccv_cnnp_dataframe_iter_set_cursor(iter, 0);
441
1
    }
442
1
  }
443
1
  ccv_nnc_stream_context_free(stream);
444
1
  int correct = 0;
445
1
  ccv_cnnp_dataframe_iter_t* const test_iter = ccv_cnnp_dataframe_iter_new(test_batched_data, test_gpu_batched, device_count * 2);
446
1
  int k;
447
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1);
448
1
  const int row_count = ccv_cnnp_dataframe_row_count(test_data);
449
2
  for (k = 0; k < row_count; 
k += batch_size * device_count1
)
450
1
  {
451
1
    ccv_cnnp_dataframe_iter_next(test_iter, (void**)tensor, device_count, 0);
452
1
    ccv_nnc_tensor_t word_indices_tensor[device_count];
453
1
    ccv_nnc_tensor_t mask_tensor[device_count];
454
1
    ccv_nnc_tensor_variable_t word_indices[device_count];
455
1
    ccv_nnc_tensor_variable_t word_vec[device_count];
456
1
    ccv_nnc_tensor_variable_t pos_vec[device_count];
457
1
    ccv_nnc_tensor_variable_t select_vec[device_count];
458
1
    ccv_nnc_tensor_variable_t vec[device_count * 2];
459
1
    ccv_nnc_tensor_variable_t out[device_count];
460
5
    for (j = 0; j < device_count; 
j++4
)
461
4
    {
462
4
      ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length);
463
4
      CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j);
464
4
      word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0);
465
4
      word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params);
466
4
      ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]);
467
4
      ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * max_length, embedding_size);
468
4
      CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j);
469
4
      word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
470
4
      pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
471
4
      select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
472
4
      ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, max_length, embedding_size);
473
4
      CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j);
474
4
      vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params);
475
4
      out[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
476
4
    }
477
1
    ccv_nnc_tensor_variable_t tvin[device_count * 2];
478
5
    for (j = 0; j < device_count; 
j++4
)
479
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j];
480
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, 0);
481
5
    for (j = 0; j < device_count; 
j++4
)
482
4
      tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices[j];
483
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, 0);
484
5
    for (j = 0; j < device_count; 
j++4
)
485
4
      tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j];
486
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, 0);
487
1
    ccv_cnnp_dataframe_iter_peek(test_iter, (void**)(tensor + device_count), device_count, device_count, 0);
488
5
    for (j = 0; j < device_count; 
j++4
)
489
4
    {
490
4
      ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, max_length, max_length);
491
4
      CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j);
492
4
      mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0);
493
4
      vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params);
494
4
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]);
495
4
    }
496
1
    ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 1, vec, device_count * 2, out, device_count, 0, 0);
497
1
    int d;
498
5
    for (d = 0; d < device_count; 
d++4
)
499
4
    {
500
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, out[d], 0)), TENSOR_LIST(out_cpu), 0);
501
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor[d][1]), TENSOR_LIST(fit_cpu), 0);
502
132
      for (j = 0; j < ccv_min(row_count - k - d * batch_size, batch_size); 
j++128
)
503
128
      {
504
128
        const int truth = (fit_cpu->data.f32[j * 2] < fit_cpu->data.f32[j * 2 + 1]);
505
128
        const int prediction = (out_cpu->data.f32[j * 2] < out_cpu->data.f32[j * 2 + 1]);
506
128
        if (truth == prediction)
507
128
          ++correct;
508
128
      }
509
4
    }
510
5
    for (j = 0; j < device_count; 
j++4
)
511
4
    {
512
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]);
513
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]);
514
4
      ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]);
515
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]);
516
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]);
517
4
      ccv_nnc_tensor_variable_free(dynamic_graph, out[j]);
518
4
      ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]);
519
4
    }
520
1
  }
521
1
  ccv_cnnp_dataframe_iter_free(test_iter);
522
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0);
523
1
  ccv_cnnp_model_free(transformer);
524
1
  ccv_cnnp_dataframe_iter_free(iter);
525
1
  ccv_cnnp_dataframe_free(batched_data);
526
1
  ccv_cnnp_dataframe_free(test_batched_data);
527
1
  ccv_nnc_dynamic_graph_free(dynamic_graph);
528
1
  ccv_nnc_tensor_free(out_cpu);
529
1
  ccv_nnc_tensor_free(fit_cpu);
530
1
  return correct;
531
1
}
532
533
TEST_CASE("train a categorical transformer classifier on imdb reviews to 80% with mix of dynamic graph and cnnp model")
534
1
{
535
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS) &&
536
1
      ccv_nnc_cmd_ok(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_GPU_CUBLAS) &&
537
1
      ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
538
1
      ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN));
539
1
  const char* const train_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/train.txt";
540
1
  const char* const test_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/test.txt";
541
1
  const char* const vocab_file = "/fast/Data/IMDB_Movie_Reviews/aclImdb/imdb.vocab";
542
1
  const char* const base_dir = "/fast/Data/IMDB_Movie_Reviews/aclImdb/";
543
1
  FILE* train_open = fopen(train_list, "rb");
544
1
  FILE* test_open = fopen(test_list, "rb");
545
1
  FILE* vocab_open = fopen(vocab_file, "rb");
546
1
  if (train_open)
547
1
    fclose(train_open);
548
1
  if (test_open)
549
1
    fclose(test_open);
550
1
  if (vocab_open)
551
1
    fclose(vocab_open);
552
1
  if (!train_open || !test_open || !vocab_open)
553
0
    { GUARD_ELSE_RETURN(0); }
554
1
  khash_t(vocab_map)* vocab;
555
1
  int vocab_size;
556
1
  _vocab_init(vocab_file, &vocab, &vocab_size);
557
1
  const int max_length = 512;
558
1
  ccv_array_t* train_set;
559
1
  ccv_cnnp_dataframe_t* train_data;
560
1
  ccv_array_t* test_set;
561
1
  ccv_cnnp_dataframe_t* test_data;
562
1
  if (!ccv_is_coverage())
563
0
  {
564
0
    train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 0);
565
0
    train_data = ccv_cnnp_dataframe_from_array_new(train_set);
566
0
    test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 0);
567
0
    test_data = ccv_cnnp_dataframe_from_array_new(test_set);
568
0
    const int correct = train_imdb_fix(10, vocab_size, 64, max_length, 128, train_data, test_data);
569
0
    REQUIRE((float)correct / test_set->rnum > 0.80, "%f should be larger than 80%%", (float)correct / test_set->rnum);
570
1
  } else {
571
1
    train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 128);
572
1
    train_data = ccv_cnnp_dataframe_from_array_new(train_set);
573
1
    test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 128);
574
1
    test_data = ccv_cnnp_dataframe_from_array_new(test_set);
575
1
    train_imdb_fix(1, vocab_size, 64, max_length, 128, train_data, test_data);
576
1
  }
577
1
  ccv_cnnp_dataframe_free(train_data);
578
1
  ccv_cnnp_dataframe_free(test_data);
579
1
  int i;
580
129
  for (i = 0; i < train_set->rnum; 
i++128
)
581
128
  {
582
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->tensor);
583
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->mask);
584
128
  }
585
1
  ccv_array_free(train_set);
586
129
  for (i = 0; i < test_set->rnum; 
i++128
)
587
128
  {
588
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->tensor);
589
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->mask);
590
128
  }
591
1
  ccv_array_free(test_set);
592
1
  _vocab_destroy(vocab);
593
1
}
594
595
static int train_imdb_flex(const int epoch_limit, const int vocab_size, const int batch_size, const int max_length, const int embedding_size, ccv_cnnp_dataframe_t* const train_data, ccv_cnnp_dataframe_t* const test_data)
596
1
{
597
1
  const int tensor_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, tensor), 0);
598
1
  const int one_hot_idx = ccv_cnnp_dataframe_copy_scalar(train_data, 0, offsetof(ccv_nnc_text_t, c), CCV_32S, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0);
599
1
  const int mask_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, mask), 0);
600
1
  const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
601
1
  ccv_cnnp_dataframe_t* const batched_data = ccv_cnnp_dataframe_combine_new(train_data, COLUMN_ID_LIST(tensor_idx, one_hot_idx, mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW);
602
1
  const int test_tensor_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, tensor), 0);
603
1
  const int test_one_hot_idx = ccv_cnnp_dataframe_copy_scalar(test_data, 0, offsetof(ccv_nnc_text_t, c), CCV_32S, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0);
604
1
  const int test_mask_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, mask), 0);
605
1
  ccv_cnnp_dataframe_t* const test_batched_data = ccv_cnnp_dataframe_combine_new(test_data, COLUMN_ID_LIST(test_tensor_idx, test_one_hot_idx, test_mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW);
606
1
  int gpu_batched[device_count * 3];
607
1
  int seq_len_batched[device_count];
608
1
  int data_batched[device_count];
609
1
  int test_gpu_batched[device_count * 3];
610
1
  int test_seq_len_batched[device_count];
611
1
  int test_data_batched[device_count];
612
1
  int i, j;
613
5
  for (i = 0; i < device_count; 
i++4
)
614
4
  {
615
4
    seq_len_batched[i] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3 + 2, 0);
616
4
    data_batched[i] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3, 0);
617
4
    test_seq_len_batched[i] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3 + 2, 0);
618
4
    test_data_batched[i] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3, 0);
619
4
  }
620
1
  const int mask_batched = ccv_cnnp_dataframe_one_squared(batched_data, seq_len_batched, device_count, 1, max_length, 0);
621
1
  const int trunc_data_batched = ccv_cnnp_dataframe_truncate(batched_data, data_batched, device_count, seq_len_batched, device_count, 0);
622
1
  const int test_mask_batched = ccv_cnnp_dataframe_one_squared(test_batched_data, test_seq_len_batched, device_count, 1, max_length, 0);
623
1
  const int test_trunc_data_batched = ccv_cnnp_dataframe_truncate(test_batched_data, test_data_batched, device_count, test_seq_len_batched, device_count, 0);
624
5
  for (i = 0; i < device_count; 
i++4
)
625
4
  {
626
4
    gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, trunc_data_batched, i, 1, i, 0);
627
4
    gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, mask_batched, i, 1, i, 0);
628
4
    gpu_batched[i + device_count * 2] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, 0, i * 3 + 1, 1, i, 0);
629
4
    test_gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_trunc_data_batched, i, 1, i, 0);
630
4
    test_gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_mask_batched, i, 1, i, 0);
631
4
    test_gpu_batched[i + device_count * 2] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, 0, i * 3 + 1, 1, i, 0);
632
4
  }
633
1
  ccv_cnnp_dataframe_iter_t* const iter = ccv_cnnp_dataframe_iter_new(batched_data, gpu_batched, device_count * 3);
634
1
  ccv_nnc_dynamic_graph_t* const dynamic_graph = ccv_nnc_dynamic_graph_new();
635
1
  ccv_nnc_tensor_variable_t vocab_vec[device_count];
636
1
  ccv_nnc_tensor_variable_t seq_vec[device_count];
637
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1);
638
1
  ccv_nnc_tensor_param_t vocab_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size);
639
1
  vocab_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params);
640
1
  ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(vocab_vec[0]), 0, 0);
641
1
  ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size);
642
1
  seq_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params);
643
1
  ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(seq_vec[0]), 0, 0);
644
4
  for (i = 1; i < device_count; 
i++3
)
645
3
  {
646
3
    CCV_TENSOR_SET_DEVICE_ID(vocab_params.type, i);
647
3
    vocab_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params);
648
3
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(vocab_vec[0]), TENSOR_VARIABLE_LIST(vocab_vec[i]), 0, 0);
649
3
    CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i);
650
3
    seq_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params);
651
3
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(seq_vec[0]), TENSOR_VARIABLE_LIST(seq_vec[i]), 0, 0);
652
3
  }
653
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0);
654
1
  ccv_nnc_tensor_t* const seq_indices_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32S, batch_size * max_length), 0);
655
1
  ccv_nnc_tensor_variable_t seq_indices[device_count];
656
5
  for (i = 0; i < device_count; 
i++4
)
657
4
  {
658
4
    ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length);
659
4
    CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i);
660
4
    seq_indices[i] = ccv_nnc_tensor_constant_new(dynamic_graph, seq_params);
661
4
  }
662
1
  classifier_transformer_params_t classifier_transformer_params = {
663
1
    .layers = 2,
664
1
    .h = 8,
665
1
    .ff = 4,
666
1
    .dropout = 0.1,
667
1
  };
668
1
  ccv_cnnp_model_t* const transformer = ccv_cnnp_dynamic_new(_dynamic_binary_classifier_transformer, &classifier_transformer_params, 0);
669
1
  ccv_cnnp_model_set_data_parallel(transformer, device_count);
670
1
  const int epoch_end = (ccv_cnnp_dataframe_row_count(train_data) + device_count * batch_size - 1) / (device_count * batch_size);
671
1
  ccv_cnnp_dataframe_shuffle(train_data);
672
1
  ccv_nnc_cmd_t adam = CMD_ADAM_FORWARD(1, 0.0001, 0.9, 0.98, 0, 1e-9);
673
1
  const int aux_size = ccv_nnc_minimizer_saved_aux_size(adam);
674
1
  ccv_nnc_tensor_variable_t saved_auxs[device_count * aux_size * 2];
675
5
  for (i = 0; i < device_count; 
i++4
)
676
4
  {
677
12
    for (j = 0; j < aux_size; 
j++8
)
678
8
    {
679
8
      ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size);
680
8
      CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i);
681
8
      saved_auxs[i * aux_size * 2 + j] = ccv_nnc_tensor_variable_new(dynamic_graph, saved_aux_params);
682
8
    }
683
12
    for (j = 0; j < aux_size; 
j++8
)
684
8
    {
685
8
      ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size);
686
8
      CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i);
687
8
      saved_auxs[i* aux_size * 2 + aux_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph, saved_aux_params);
688
8
    }
689
4
  }
690
1
  ccv_nnc_tensor_t* const out_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 1), 0);
691
1
  ccv_nnc_tensor_t* const fit_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 1), 0);
692
1
  ccv_nnc_tensor_t** tensor[device_count * 3];
693
1
  int epoch = 0;
694
1
  ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
695
2
  for (i = 0; epoch < epoch_limit; 
i++1
)
696
1
  {
697
1
    float learn_rate = 0.0001 * ccv_min(i / (10000. / batch_size), 1) * device_count;
698
1
    adam = CMD_ADAM_FORWARD(i + 1, learn_rate, 0.9, 0.98, 0, 1e-9);
699
1
    ccv_cnnp_dataframe_iter_next(iter, (void**)tensor, device_count, stream);
700
1
    ccv_nnc_tensor_t word_indices_tensor[device_count];
701
1
    ccv_nnc_tensor_t mask_tensor[device_count];
702
1
    ccv_nnc_tensor_variable_t word_indices[device_count];
703
1
    ccv_nnc_tensor_variable_t word_vec[device_count];
704
1
    ccv_nnc_tensor_variable_t pos_vec[device_count];
705
1
    ccv_nnc_tensor_variable_t select_vec[device_count];
706
1
    ccv_nnc_tensor_variable_t vec[device_count * 2];
707
1
    ccv_nnc_tensor_variable_t out[device_count];
708
1
    ccv_nnc_tensor_variable_t seq_indices_alias[device_count];
709
1
    int batch_length = 0;
710
5
    for (j = 0; j < device_count; 
j++4
)
711
4
    {
712
4
      batch_length = tensor[j][0]->info.dim[1];
713
4
      ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length);
714
4
      CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j);
715
4
      word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0);
716
4
      word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params);
717
4
      ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]);
718
4
      ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * batch_length, embedding_size);
719
4
      CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j);
720
4
      word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
721
4
      pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
722
4
      select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
723
4
      ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, batch_length, embedding_size);
724
4
      CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j);
725
4
      vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params);
726
4
      out[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
727
4
      ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length);
728
4
      CCV_TENSOR_SET_DEVICE_ID(seq_params.type, j);
729
4
      seq_indices_alias[j] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, seq_indices[j], ccv_nnc_no_ofs, DIM_ALLOC(), seq_params);
730
4
    }
731
65
    for (j = 0; j < batch_size; 
j++64
)
732
64
    {
733
64
      int k;
734
32.4k
      for (k = 0; k < batch_length; 
k++32.3k
)
735
32.3k
        seq_indices_cpu->data.i32[j * batch_length + k] = k;
736
64
    }
737
5
    for (j = 0; j < device_count; 
j++4
)
738
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(seq_indices_cpu), TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, seq_indices[j])), 0);
739
1
    ccv_nnc_tensor_variable_t tvin[device_count * 2];
740
5
    for (j = 0; j < device_count; 
j++4
)
741
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j];
742
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, stream);
743
5
    for (j = 0; j < device_count; 
j++4
)
744
4
      tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices_alias[j];
745
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, stream);
746
5
    for (j = 0; j < device_count; 
j++4
)
747
4
      tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j];
748
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, stream);
749
1
    ccv_cnnp_dataframe_iter_peek(iter, (void**)(tensor + device_count), device_count, device_count * 2, stream);
750
5
    for (j = 0; j < device_count; 
j++4
)
751
4
    {
752
4
      ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, batch_length, batch_length);
753
4
      CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j);
754
4
      mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0);
755
4
      vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params);
756
4
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]);
757
4
    }
758
1
    ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 0, vec, device_count * 2, out, device_count, 0, stream);
759
1
    ccv_nnc_tensor_variable_t sigmoid[device_count];
760
1
    ccv_nnc_tensor_variable_t fit[device_count];
761
1
    ccv_nnc_tensor_variable_t vocab_vec_grad[device_count];
762
1
    ccv_nnc_tensor_variable_t seq_vec_grad[device_count];
763
5
    for (j = 0; j < device_count; 
j++4
)
764
4
    {
765
4
      sigmoid[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
766
4
      fit[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
767
4
      ccv_nnc_tensor_variable_set(dynamic_graph, fit[j], tensor[j + device_count * 2][0]);
768
4
      vocab_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
769
4
      seq_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
770
4
    }
771
1
    ccv_nnc_tensor_variable_t tvout[device_count * 2];
772
5
    for (j = 0; j < device_count; 
j++4
)
773
4
      tvin[j * 2] = out[j], tvin[j * 2 + 1] = fit[j], tvout[j * 2] = 0, tvout[j * 2 + 1] = sigmoid[j];
774
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, tvout, device_count * 2, device_count, stream);
775
5
    for (j = 0; j < device_count; 
j++4
)
776
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = seq_vec[j], tvout[j * 2] = vocab_vec_grad[j], tvout[j * 2 + 1] = seq_vec_grad[j];
777
1
    ccv_nnc_dynamic_graph_backward(dynamic_graph, sigmoid, device_count, 0, tvin, device_count * 2, tvout, device_count * 2, stream);
778
1
    ccv_cnnp_model_set_minimizer(transformer, adam, 0, 0, 0);
779
5
    for (j = 0; j < device_count; 
j++4
)
780
4
      tvin[j * 2] = vocab_vec_grad[j], tvin[j * 2 + 1] = seq_vec_grad[j], tvout[j * 2] = vocab_vec[j], tvout[j * 2 + 1] = seq_vec[j];
781
1
    ccv_nnc_dynamic_graph_apply_gradients(dynamic_graph, adam, tvin, device_count * 2, tvout, device_count * 2, saved_auxs, device_count, stream);
782
1
    ccv_nnc_stream_context_wait(stream);
783
5
    for (j = 0; j < device_count; 
j++4
)
784
4
    {
785
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]);
786
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]);
787
4
      ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]);
788
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]);
789
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]);
790
4
      ccv_nnc_tensor_variable_free(dynamic_graph, out[j]);
791
4
      ccv_nnc_tensor_variable_free(dynamic_graph, fit[j]);
792
4
      ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]);
793
4
      ccv_nnc_tensor_variable_free(dynamic_graph, sigmoid[j]);
794
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vocab_vec_grad[j]);
795
4
      ccv_nnc_tensor_variable_free(dynamic_graph, seq_vec_grad[j]);
796
4
      ccv_nnc_tensor_variable_free(dynamic_graph, seq_indices_alias[j]);
797
4
    }
798
1
    if ((i + 1) % epoch_end == 0)
799
1
    {
800
1
      ++epoch;
801
1
      ccv_cnnp_dataframe_shuffle(train_data);
802
1
      ccv_cnnp_dataframe_iter_set_cursor(iter, 0);
803
1
    }
804
1
  }
805
1
  ccv_nnc_stream_context_free(stream);
806
1
  int correct = 0;
807
1
  ccv_cnnp_dataframe_iter_t* const test_iter = ccv_cnnp_dataframe_iter_new(test_batched_data, test_gpu_batched, device_count * 3);
808
1
  int k;
809
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1);
810
1
  const int row_count = ccv_cnnp_dataframe_row_count(test_data);
811
2
  for (k = 0; k < row_count; 
k += batch_size * device_count1
)
812
1
  {
813
1
    ccv_cnnp_dataframe_iter_next(test_iter, (void**)tensor, device_count * 3, 0);
814
1
    ccv_nnc_tensor_t word_indices_tensor[device_count];
815
1
    ccv_nnc_tensor_t mask_tensor[device_count];
816
1
    ccv_nnc_tensor_variable_t word_indices[device_count];
817
1
    ccv_nnc_tensor_variable_t word_vec[device_count];
818
1
    ccv_nnc_tensor_variable_t pos_vec[device_count];
819
1
    ccv_nnc_tensor_variable_t select_vec[device_count];
820
1
    ccv_nnc_tensor_variable_t vec[device_count * 2];
821
1
    ccv_nnc_tensor_variable_t out[device_count];
822
1
    ccv_nnc_tensor_variable_t seq_indices_alias[device_count];
823
1
    int batch_length = 0;
824
5
    for (j = 0; j < device_count; 
j++4
)
825
4
    {
826
4
      batch_length = tensor[j][0]->info.dim[1];
827
4
      ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length);
828
4
      CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j);
829
4
      word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0);
830
4
      word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params);
831
4
      ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]);
832
4
      ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * batch_length, embedding_size);
833
4
      CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j);
834
4
      word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
835
4
      pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
836
4
      select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
837
4
      ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, batch_length, embedding_size);
838
4
      CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j);
839
4
      vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params);
840
4
      out[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
841
4
      ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length);
842
4
      CCV_TENSOR_SET_DEVICE_ID(seq_params.type, j);
843
4
      seq_indices_alias[j] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, seq_indices[j], ccv_nnc_no_ofs, DIM_ALLOC(), seq_params);
844
4
    }
845
65
    for (j = 0; j < batch_size; 
j++64
)
846
64
    {
847
64
      int k;
848
32.7k
      for (k = 0; k < batch_length; 
k++32.7k
)
849
32.7k
        seq_indices_cpu->data.i32[j * batch_length + k] = k;
850
64
    }
851
5
    for (j = 0; j < device_count; 
j++4
)
852
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(seq_indices_cpu), TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, seq_indices[j])), 0);
853
1
    ccv_nnc_tensor_variable_t tvin[device_count * 2];
854
5
    for (j = 0; j < device_count; 
j++4
)
855
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j];
856
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, 0);
857
5
    for (j = 0; j < device_count; 
j++4
)
858
4
      tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices_alias[j];
859
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, 0);
860
5
    for (j = 0; j < device_count; 
j++4
)
861
4
      tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j];
862
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, 0);
863
5
    for (j = 0; j < device_count; 
j++4
)
864
4
    {
865
4
      ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, batch_length, batch_length);
866
4
      CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j);
867
4
      mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0);
868
4
      vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params);
869
4
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]);
870
4
    }
871
1
    ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 1, vec, device_count * 2, out, device_count, 0, 0);
872
1
    int d;
873
5
    for (d = 0; d < device_count; 
d++4
)
874
4
    {
875
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, out[d], 0)), TENSOR_LIST(out_cpu), 0);
876
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor[d + device_count * 2][0]), TENSOR_LIST(fit_cpu), 0);
877
132
      for (j = 0; j < ccv_min(row_count - k - d * batch_size, batch_size); 
j++128
)
878
128
      {
879
128
        const int truth = (fit_cpu->data.f32[j] > 0.5);
880
128
        const int prediction = (out_cpu->data.f32[j] > 0);
881
128
        if (truth == prediction)
882
0
          ++correct;
883
128
      }
884
4
    }
885
5
    for (j = 0; j < device_count; 
j++4
)
886
4
    {
887
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]);
888
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]);
889
4
      ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]);
890
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]);
891
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]);
892
4
      ccv_nnc_tensor_variable_free(dynamic_graph, out[j]);
893
4
      ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]);
894
4
      ccv_nnc_tensor_variable_free(dynamic_graph, seq_indices_alias[j]);
895
4
    }
896
1
  }
897
1
  ccv_nnc_tensor_free(seq_indices_cpu);
898
1
  ccv_cnnp_dataframe_iter_free(test_iter);
899
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0);
900
1
  ccv_cnnp_model_free(transformer);
901
1
  ccv_cnnp_dataframe_iter_free(iter);
902
1
  ccv_cnnp_dataframe_free(batched_data);
903
1
  ccv_cnnp_dataframe_free(test_batched_data);
904
1
  ccv_nnc_dynamic_graph_free(dynamic_graph);
905
1
  ccv_nnc_tensor_free(out_cpu);
906
1
  ccv_nnc_tensor_free(fit_cpu);
907
1
  return correct;
908
1
}
909
910
TEST_CASE("train a binary transformer classifier on imdb reviews to 80% with mix of dynamic graph and cnnp model and dynamic inputs")
911
1
{
912
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS) &&
913
1
      ccv_nnc_cmd_ok(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_GPU_CUBLAS) &&
914
1
      ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
915
1
      ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN));
916
1
  const char* const train_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/train.txt";
917
1
  const char* const test_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/test.txt";
918
1
  const char* const vocab_file = "/fast/Data/IMDB_Movie_Reviews/aclImdb/imdb.vocab";
919
1
  const char* const base_dir = "/fast/Data/IMDB_Movie_Reviews/aclImdb/";
920
1
  FILE* train_open = fopen(train_list, "rb");
921
1
  FILE* test_open = fopen(test_list, "rb");
922
1
  FILE* vocab_open = fopen(vocab_file, "rb");
923
1
  if (train_open)
924
1
    fclose(train_open);
925
1
  if (test_open)
926
1
    fclose(test_open);
927
1
  if (vocab_open)
928
1
    fclose(vocab_open);
929
1
  if (!train_open || !test_open || !vocab_open)
930
0
    { GUARD_ELSE_RETURN(0); }
931
1
  khash_t(vocab_map)* vocab;
932
1
  int vocab_size;
933
1
  _vocab_init(vocab_file, &vocab, &vocab_size);
934
1
  const int max_length = 512;
935
1
  ccv_array_t* train_set;
936
1
  ccv_cnnp_dataframe_t* train_data;
937
1
  ccv_array_t* test_set;
938
1
  ccv_cnnp_dataframe_t* test_data;
939
1
  if (!ccv_is_coverage())
940
0
  {
941
0
    train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 0);
942
0
    train_data = ccv_cnnp_dataframe_from_array_new(train_set);
943
0
    test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 0);
944
0
    test_data = ccv_cnnp_dataframe_from_array_new(test_set);
945
0
    const int correct = train_imdb_flex(10, vocab_size, 64, max_length, 128, train_data, test_data);
946
0
    REQUIRE((float)correct / test_set->rnum > 0.80, "%f should be larger than 80%%", (float)correct / test_set->rnum);
947
1
  } else {
948
1
    train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 128);
949
1
    train_data = ccv_cnnp_dataframe_from_array_new(train_set);
950
1
    test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 128);
951
1
    test_data = ccv_cnnp_dataframe_from_array_new(test_set);
952
1
    train_imdb_flex(1, vocab_size, 64, max_length, 128, train_data, test_data);
953
1
  }
954
1
  ccv_cnnp_dataframe_free(train_data);
955
1
  ccv_cnnp_dataframe_free(test_data);
956
1
  int i;
957
129
  for (i = 0; i < train_set->rnum; 
i++128
)
958
128
  {
959
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->tensor);
960
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->mask);
961
128
  }
962
1
  ccv_array_free(train_set);
963
129
  for (i = 0; i < test_set->rnum; 
i++128
)
964
128
  {
965
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->tensor);
966
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->mask);
967
128
  }
968
1
  ccv_array_free(test_set);
969
1
  _vocab_destroy(vocab);
970
1
}
971
972
#include "case_main.h"