Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/imdb.tests.c
Line
Count
Source (jump to first uncovered line)
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <ccv_internal.h>
6
#include <nnc/ccv_nnc.h>
7
#include <nnc/ccv_nnc_easy.h>
8
#include <3rdparty/dsfmt/dSFMT.h>
9
#include <ctype.h>
10
#include <3rdparty/khash/khash.h>
11
12
TEST_SETUP()
13
{
14
  ccv_nnc_init();
15
}
16
17
KHASH_MAP_INIT_STR(vocab_map, int)
18
19
static CCV_WARN_UNUSED(ccv_nnc_tensor_t*) _text_to_tensor_index(const char* const filename, const khash_t(vocab_map)* const vocab, const int vocab_size, const int max_length)
20
768
{
21
768
  const int end_flag = vocab_size - 2;
22
768
  const int pad_flag = vocab_size - 1;
23
768
  char* const word = (char*)ccmalloc(1024);
24
768
  ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, max_length), 0);
25
768
  FILE* const file = fopen(filename, "r");
26
768
  int t = 0;
27
183k
  while (fscanf(file, "%1023s", word) != EOF)
28
182k
  {
29
182k
    if (t >= max_length)
30
60
      break;
31
182k
    int j;
32
1.03M
    for(j = 0; word[j]; 
j++853k
)
33
853k
      word[j] = tolower(word[j]);
34
182k
    char* saveptr;
35
182k
    const char* token = strtok_r(word, ".,<>/~`@#$%^&*+\\\"", &saveptr);
36
367k
    while (token)
37
184k
    {
38
184k
      if (t >= max_length)
39
0
        break;
40
184k
      const khiter_t k = kh_get(vocab_map, vocab, token);
41
184k
      if (k != kh_end(vocab))
42
176k
        tensor->data.i32[t++] = kh_val(vocab, k);
43
184k
      token = strtok_r(0, ".,<>/~`@#$%^&*+\\\"", &saveptr);
44
184k
    }
45
182k
  }
46
768
  fclose(file);
47
768
  if (t < max_length)
48
708
  {
49
708
    tensor->data.i32[t] = end_flag;
50
216k
    for (++t; t < max_length; 
t++215k
)
51
215k
      tensor->data.i32[t] = pad_flag;
52
708
  }
53
768
  ccfree(word);
54
768
  return tensor;
55
768
}
56
57
typedef struct {
58
  ccv_nnc_tensor_t* tensor;
59
  ccv_nnc_tensor_t* mask;
60
  int c;
61
} ccv_nnc_text_t;
62
63
static ccv_array_t* _array_from_disk_new(const char* const list, const char* const base_dir, const khash_t(vocab_map)* const vocab, const int vocab_size, const int max_length, const int limit)
64
6
{
65
6
  FILE *r = fopen(list, "r");
66
6
  assert(r && "list doesn't exists");
67
6
  const int pad_flag = vocab_size - 1;
68
6
  int dirlen = (base_dir != 0) ? strlen(base_dir) + 1 : 
00
;
69
6
  ccv_array_t* categorizeds = ccv_array_new(sizeof(ccv_nnc_text_t), 64, 0);
70
6
  int c;
71
6
  char* file = (char*)ccmalloc(1024);
72
6
  char* filename = (char*)ccmalloc(1024);
73
768
  while (fscanf(r, "%d %1023s", &c, file) != EOF)
74
768
  {
75
768
    if (base_dir != 0)
76
768
    {
77
768
      strncpy(filename, base_dir, 1024);
78
768
      filename[dirlen - 1] = '/';
79
768
    }
80
768
    strncpy(filename + dirlen, file, 1024 - dirlen);
81
768
    ccv_nnc_tensor_t* const tensor = _text_to_tensor_index(filename, vocab, vocab_size, max_length);
82
768
    int length = 0;
83
768
    int i;
84
178k
    for (i = 0; !length && 
i < max_length178k
;
i++178k
)
85
178k
      if (tensor->data.i32[i] == pad_flag)
86
708
        length = i;
87
768
    ccv_nnc_tensor_t* const mask = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 1), 0);
88
768
    mask->data.i32[0] = length ? 
length708
:
max_length60
;
89
768
    ccv_nnc_text_t categorized = {
90
768
      .tensor = tensor,
91
768
      .mask = mask,
92
768
      .c = c
93
768
    };
94
768
    ccv_array_push(categorizeds, &categorized);
95
768
    if (limit > 0 && categorizeds->rnum >= limit)
96
6
      break;
97
768
  }
98
6
  ccfree(filename);
99
6
  ccfree(file);
100
6
  fclose(r);
101
6
  return categorizeds;
102
6
}
103
104
static ccv_cnnp_model_t* _self_attention_new(const int k, const int h, const int b, const int t, const float dropout)
105
4
{
106
4
  const ccv_cnnp_model_io_t x = ccv_cnnp_input();
107
4
  ccv_cnnp_model_io_t mask = ccv_cnnp_input();
108
4
  ccv_cnnp_model_io_t multiheads = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(x));
109
4
  ccv_cnnp_model_t* const tokeys = ccv_cnnp_dense(k * h, 1, 0, 1, "tokeys");
110
4
  ccv_cnnp_model_t* const toqueries = ccv_cnnp_dense(k * h, 1, 0, 1, "toqueries");
111
4
  ccv_cnnp_model_t* const tovalues = ccv_cnnp_dense(k * h, 1, 0, 1, "tovalues");
112
4
  ccv_cnnp_model_io_t keys = ccv_cnnp_model_apply(tokeys, MODEL_IO_LIST(multiheads));
113
4
  ccv_cnnp_model_io_t queries = ccv_cnnp_model_apply(toqueries, MODEL_IO_LIST(multiheads));
114
4
  ccv_cnnp_model_io_t values = ccv_cnnp_model_apply(tovalues, MODEL_IO_LIST(multiheads));
115
4
  keys = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(t, b, h, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(keys));
116
4
  queries = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(t, b, h, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(queries));
117
4
  values = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(t, b, h, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(values));
118
4
  keys = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(keys));
119
4
  queries = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(queries));
120
4
  values = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(values));
121
4
  keys = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * h, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(keys));
122
4
  queries = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * h, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(queries));
123
4
  values = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * h, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(values));
124
4
  ccv_cnnp_model_io_t dot = ccv_cnnp_model_apply(ccv_cnnp_matmul(NO_TRANSPOSE, TRANSPOSE(1, 2), 0, 0), MODEL_IO_LIST(queries, keys));
125
4
  const float scale = 1. / sqrt(k);
126
4
  dot = ccv_cnnp_model_apply(ccv_cnnp_scalar_mul(scale, 0), MODEL_IO_LIST(dot));
127
4
  dot = ccv_cnnp_model_apply(ccv_cnnp_masked_fill(0, -1e9, 0), MODEL_IO_LIST(dot, mask));
128
4
  dot = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * h * t, t), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(dot));
129
4
  dot = ccv_cnnp_model_apply(ccv_cnnp_softmax(0), MODEL_IO_LIST(dot));
130
4
  if (dropout > 0)
131
4
    dot = ccv_cnnp_model_apply(ccv_cnnp_dropout(dropout, 0, 0), MODEL_IO_LIST(dot));
132
4
  dot = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * h, t, t), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(dot));
133
4
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_matmul(NO_TRANSPOSE, NO_TRANSPOSE, 0, 0), MODEL_IO_LIST(dot, values));
134
4
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(h, b, t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
135
4
  out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 2, 0), MODEL_IO_LIST(out));
136
4
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * t, h * k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
137
4
  ccv_cnnp_model_t* const unifyheads = ccv_cnnp_dense(k, 0, 0, 1, "unifyheads");
138
4
  out = ccv_cnnp_model_apply(unifyheads, MODEL_IO_LIST(out));
139
4
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(t, b, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
140
4
  return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), 1, "self-attention");
141
4
}
142
143
static ccv_cnnp_model_t* _transformer_block_new(const int k, const int h, const int b, const int t, const int ff, const float dropout)
144
4
{
145
4
  ccv_cnnp_model_io_t const x = ccv_cnnp_input();
146
4
  ccv_cnnp_model_io_t const mask = ccv_cnnp_input();
147
4
  ccv_cnnp_model_t* const self_attention = _self_attention_new(k, h, b, t, dropout);
148
4
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(self_attention, MODEL_IO_LIST(x, mask));
149
4
  out = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(x, out));
150
4
  ccv_cnnp_model_io_t first = ccv_cnnp_model_apply(ccv_cnnp_layer_norm(1e-5, DIM_ALLOC(2), 1, 1, 1, 0), MODEL_IO_LIST(out));
151
4
  if (dropout)
152
4
    out = ccv_cnnp_model_apply(ccv_cnnp_dropout(dropout, 0, 0), MODEL_IO_LIST(first));
153
0
  else
154
0
    out = first;
155
4
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b * t, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
156
4
  out = ccv_cnnp_model_apply(ccv_cnnp_dense(ff, 0, 0, 1, 0), MODEL_IO_LIST(out));
157
4
  out = ccv_cnnp_model_apply(ccv_cnnp_relu(0), MODEL_IO_LIST(out));
158
4
  out = ccv_cnnp_model_apply(ccv_cnnp_dense(k, 0, 0, 1, 0), MODEL_IO_LIST(out));
159
4
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(t, b, k), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
160
4
  out = ccv_cnnp_model_apply(ccv_cnnp_sum(0), MODEL_IO_LIST(first, out));
161
4
  out = ccv_cnnp_model_apply(ccv_cnnp_layer_norm(1e-5, DIM_ALLOC(2), 1, 1, 1, 0), MODEL_IO_LIST(out));
162
4
  if (dropout > 0)
163
4
    out = ccv_cnnp_model_apply(ccv_cnnp_dropout(dropout, 0, 0), MODEL_IO_LIST(out));
164
4
  return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), 1, "transformer");
165
4
}
166
167
static ccv_cnnp_model_t* _classifier_transformer_new(const int layers, const int k, const int h, const int b, const int t, const int ff, const float dropout)
168
1
{
169
1
  ccv_cnnp_model_io_t const x = ccv_cnnp_input();
170
1
  ccv_cnnp_model_io_t const mask = ccv_cnnp_input();
171
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(x));
172
1
  int i;
173
3
  for (i = 0; i < layers; 
i++2
)
174
2
    out = ccv_cnnp_model_apply(_transformer_block_new(k, h, b, t, ff, dropout), MODEL_IO_LIST(out, mask));
175
1
  out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(out)); // t, b, k -> b, t, k
176
1
  out = ccv_cnnp_model_apply(ccv_cnnp_transpose(1, 2, 0), MODEL_IO_LIST(out)); // b, t, k -> b, k, t
177
1
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b, k, t, 1), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
178
1
  out = ccv_cnnp_model_apply(ccv_cnnp_average_pool(DIM_ALLOC(0, 0), ccv_nnc_no_hint, 0), MODEL_IO_LIST(out));
179
  // Last layer, get it to 2.
180
1
  out = ccv_cnnp_model_apply(ccv_cnnp_flatten(0), MODEL_IO_LIST(out));
181
1
  out = ccv_cnnp_model_apply(ccv_cnnp_dense(2, 0, 0, 1, 0), MODEL_IO_LIST(out));
182
1
  return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), 1, "classifier");
183
1
}
184
185
typedef struct {
186
  int layers;
187
  int h;
188
  int ff;
189
  float dropout;
190
} classifier_transformer_params_t;
191
192
static ccv_cnnp_model_t* _dynamic_classifier_transformer(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context)
193
1
{
194
1
  const classifier_transformer_params_t* const params = (classifier_transformer_params_t*)context;
195
1
  const int b = inputs[0].dim[0];
196
1
  const int t = inputs[0].dim[1];
197
1
  const int k = inputs[0].dim[2];
198
1
  const int ff = params->ff * k;
199
1
  return _classifier_transformer_new(params->layers, k, params->h, b, t, ff, params->dropout);
200
1
}
201
202
static ccv_cnnp_model_t* _binary_classifier_transformer_new(const int layers, const int k, const int h, const int b, const int t, const int ff, const float dropout)
203
1
{
204
1
  ccv_cnnp_model_io_t const x = ccv_cnnp_input();
205
1
  ccv_cnnp_model_io_t const mask = ccv_cnnp_input();
206
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(x));
207
1
  int i;
208
3
  for (i = 0; i < layers; 
i++2
)
209
2
    out = ccv_cnnp_model_apply(_transformer_block_new(k, h, b, t, ff, dropout), MODEL_IO_LIST(out, mask));
210
1
  out = ccv_cnnp_model_apply(ccv_cnnp_transpose(0, 1, 0), MODEL_IO_LIST(out)); // t, b, k -> b, t, k
211
1
  out = ccv_cnnp_model_apply(ccv_cnnp_transpose(1, 2, 0), MODEL_IO_LIST(out)); // b, t, k -> b, k, t
212
1
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(b, k, t, 1), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
213
1
  out = ccv_cnnp_model_apply(ccv_cnnp_average_pool(DIM_ALLOC(0, 0), ccv_nnc_no_hint, 0), MODEL_IO_LIST(out));
214
  // Last layer, get it to 1.
215
1
  out = ccv_cnnp_model_apply(ccv_cnnp_flatten(0), MODEL_IO_LIST(out));
216
1
  out = ccv_cnnp_model_apply(ccv_cnnp_dense(1, 0, 0, 1, 0), MODEL_IO_LIST(out));
217
1
  return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask), MODEL_IO_LIST(out), 1, "classifier");
218
1
}
219
220
static ccv_cnnp_model_t* _dynamic_binary_classifier_transformer(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context)
221
1
{
222
1
  const classifier_transformer_params_t* const params = (classifier_transformer_params_t*)context;
223
1
  const int b = inputs[0].dim[0];
224
1
  const int t = inputs[0].dim[1];
225
1
  const int k = inputs[0].dim[2];
226
1
  const int ff = params->ff * k;
227
1
  return _binary_classifier_transformer_new(params->layers, k, params->h, b, t, ff, params->dropout);
228
1
}
229
230
static void _vocab_init(const char* const vocab_file, khash_t(vocab_map)** const vocab_ref, int* const vocab_size_ref)
231
3
{
232
3
  FILE* const vocab_ptr = fopen(vocab_file, "r");
233
3
  khash_t(vocab_map)* const vocab = kh_init(vocab_map);
234
3
  int i, ret;
235
3
  char* const word = (char*)ccmalloc(1024);
236
268k
  for (i = 0; fscanf(vocab_ptr, "%1023s", word) != EOF; 
i++268k
)
237
268k
  {
238
268k
    const khiter_t k = kh_put(vocab_map, vocab, strdup(word), &ret);
239
268k
    kh_val(vocab, k) = i;
240
268k
  }
241
3
  ccfree(word);
242
3
  fclose(vocab_ptr);
243
3
  *vocab_ref = vocab;
244
3
  *vocab_size_ref = i;
245
3
}
246
247
static void _vocab_destroy(khash_t(vocab_map)* const vocab)
248
3
{
249
  // Free keys.
250
393k
  for (khiter_t k = 
kh_begin3
(vocab); k != kh_end(vocab);
k++393k
)
251
393k
    if (kh_exist(vocab, k))
252
268k
      free((void*)kh_key(vocab, k));
253
3
  kh_destroy(vocab_map, vocab);
254
3
}
255
256
static int train_imdb_fix(const int epoch_limit, const int vocab_size, const int batch_size, const int max_length, const int embedding_size, ccv_cnnp_dataframe_t* const train_data, ccv_cnnp_dataframe_t* const test_data)
257
1
{
258
1
  const int tensor_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, tensor), 0);
259
1
  const int one_hot_idx = ccv_cnnp_dataframe_one_hot(train_data, 0, offsetof(ccv_nnc_text_t, c), 2, 1, 0, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0);
260
1
  const int mask_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, mask), 0);
261
1
  const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
262
1
  ccv_cnnp_dataframe_t* const batched_data = ccv_cnnp_dataframe_combine_new(train_data, COLUMN_ID_LIST(tensor_idx, one_hot_idx, mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW);
263
1
  const int test_tensor_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, tensor), 0);
264
1
  const int test_one_hot_idx = ccv_cnnp_dataframe_one_hot(test_data, 0, offsetof(ccv_nnc_text_t, c), 2, 1, 0, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0);
265
1
  const int test_mask_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, mask), 0);
266
1
  ccv_cnnp_dataframe_t* const test_batched_data = ccv_cnnp_dataframe_combine_new(test_data, COLUMN_ID_LIST(test_tensor_idx, test_one_hot_idx, test_mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW);
267
1
  int gpu_batched[device_count * 2];
268
1
  int test_gpu_batched[device_count * 2];
269
1
  int i, j;
270
5
  for (i = 0; i < device_count; 
i++4
)
271
4
  {
272
4
    const int seq_len_batched = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3 + 2, 0);
273
4
    const int tupled_mask_batched = ccv_cnnp_dataframe_one_squared(batched_data, COLUMN_ID_LIST(seq_len_batched), 0, max_length, 0);
274
4
    gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, 0, i * 3, 2, i, 0);
275
4
    gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, tupled_mask_batched, 0, 1, i, 0);
276
4
    const int test_seq_len_batched = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3 + 2, 0);
277
4
    const int test_tupled_mask_batched = ccv_cnnp_dataframe_one_squared(test_batched_data, COLUMN_ID_LIST(test_seq_len_batched), 0, max_length, 0);
278
4
    test_gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, 0, i * 3, 2, i, 0);
279
4
    test_gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_tupled_mask_batched, 0, 1, i, 0);
280
4
  }
281
1
  ccv_cnnp_dataframe_iter_t* const iter = ccv_cnnp_dataframe_iter_new(batched_data, gpu_batched, device_count * 2);
282
1
  ccv_nnc_dynamic_graph_t* const dynamic_graph = ccv_nnc_dynamic_graph_new();
283
1
  ccv_nnc_tensor_variable_t vocab_vec[device_count];
284
1
  ccv_nnc_tensor_variable_t seq_vec[device_count];
285
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1);
286
1
  ccv_nnc_tensor_param_t vocab_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size);
287
1
  vocab_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params);
288
1
  ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(vocab_vec[0]), 0, 0);
289
1
  ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size);
290
1
  seq_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params);
291
1
  ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(seq_vec[0]), 0, 0);
292
4
  for (i = 1; i < device_count; 
i++3
)
293
3
  {
294
3
    CCV_TENSOR_SET_DEVICE_ID(vocab_params.type, i);
295
3
    vocab_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params);
296
3
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(vocab_vec[0]), TENSOR_VARIABLE_LIST(vocab_vec[i]), 0, 0);
297
3
    CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i);
298
3
    seq_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params);
299
3
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(seq_vec[0]), TENSOR_VARIABLE_LIST(seq_vec[i]), 0, 0);
300
3
  }
301
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0);
302
1
  ccv_nnc_tensor_t* const seq_indices_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32S, batch_size * max_length), 0);
303
65
  for (i = 0; i < batch_size; 
i++64
)
304
32.8k
    
for (j = 0; 64
j < max_length;
j++32.7k
)
305
32.7k
      seq_indices_cpu->data.i32[i * max_length + j] = j;
306
1
  ccv_nnc_tensor_variable_t seq_indices[device_count];
307
5
  for (i = 0; i < device_count; 
i++4
)
308
4
  {
309
4
    ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length);
310
4
    CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i);
311
4
    seq_indices[i] = ccv_nnc_tensor_constant_new(dynamic_graph, seq_params);
312
4
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(seq_indices_cpu), TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, seq_indices[i], 0)), 0);
313
4
  }
314
1
  ccv_nnc_tensor_free(seq_indices_cpu);
315
1
  classifier_transformer_params_t classifier_transformer_params = {
316
1
    .layers = 2,
317
1
    .h = 8,
318
1
    .ff = 4,
319
1
    .dropout = 0.1,
320
1
  };
321
1
  ccv_cnnp_model_t* const transformer = ccv_cnnp_dynamic_new(_dynamic_classifier_transformer, &classifier_transformer_params, 0);
322
1
  ccv_cnnp_model_set_data_parallel(transformer, device_count);
323
1
  const int epoch_end = (ccv_cnnp_dataframe_row_count(train_data) + device_count * batch_size - 1) / (device_count * batch_size);
324
1
  ccv_cnnp_dataframe_shuffle(train_data);
325
1
  ccv_nnc_cmd_t adam = CMD_ADAM_FORWARD(1, 0.0001, 0.9, 0.98, 0, 1e-9, 0);
326
1
  const int aux_size = ccv_nnc_minimizer_saved_aux_size(adam);
327
1
  ccv_nnc_tensor_variable_t saved_auxs[device_count * aux_size * 2];
328
5
  for (i = 0; i < device_count; 
i++4
)
329
4
  {
330
12
    for (j = 0; j < aux_size; 
j++8
)
331
8
    {
332
8
      ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size);
333
8
      CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i);
334
8
      saved_auxs[i * aux_size * 2 + j] = ccv_nnc_tensor_variable_new(dynamic_graph);
335
8
    }
336
12
    for (j = 0; j < aux_size; 
j++8
)
337
8
    {
338
8
      ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size);
339
8
      CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i);
340
8
      saved_auxs[i* aux_size * 2 + aux_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph);
341
8
    }
342
4
  }
343
1
  ccv_nnc_tensor_t* const out_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 2), 0);
344
1
  ccv_nnc_tensor_t* const fit_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 2), 0);
345
1
  ccv_nnc_tensor_t** tensor[device_count * 2];
346
1
  int epoch = 0;
347
1
  ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
348
2
  for (i = 0; epoch < epoch_limit; 
i++1
)
349
1
  {
350
1
    float learn_rate = 0.0001 * ccv_min(i / (10000. / batch_size), 1) * device_count;
351
1
    adam = CMD_ADAM_FORWARD(i + 1, learn_rate, 0.9, 0.98, 0, 1e-9, 0);
352
1
    ccv_cnnp_dataframe_iter_next(iter, (void**)tensor, device_count, stream);
353
1
    ccv_nnc_tensor_t word_indices_tensor[device_count];
354
1
    ccv_nnc_tensor_t mask_tensor[device_count];
355
1
    ccv_nnc_tensor_variable_t word_indices[device_count];
356
1
    ccv_nnc_tensor_variable_t word_vec[device_count];
357
1
    ccv_nnc_tensor_variable_t pos_vec[device_count];
358
1
    ccv_nnc_tensor_variable_t select_vec[device_count];
359
1
    ccv_nnc_tensor_variable_t vec[device_count * 2];
360
1
    ccv_nnc_tensor_variable_t out[device_count];
361
5
    for (j = 0; j < device_count; 
j++4
)
362
4
    {
363
4
      ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length);
364
4
      CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j);
365
4
      word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0);
366
4
      word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params);
367
4
      ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]);
368
4
      ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * max_length, embedding_size);
369
4
      CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j);
370
4
      word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
371
4
      pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
372
4
      select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
373
4
      out[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
374
4
    }
375
1
    ccv_nnc_tensor_variable_t tvin[device_count * 2];
376
5
    for (j = 0; j < device_count; 
j++4
)
377
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j];
378
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, stream);
379
5
    for (j = 0; j < device_count; 
j++4
)
380
4
      tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices[j];
381
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, stream);
382
5
    for (j = 0; j < device_count; 
j++4
)
383
4
      tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j];
384
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, stream);
385
1
    ccv_cnnp_dataframe_iter_peek(iter, (void**)(tensor + device_count), device_count, device_count, stream);
386
5
    for (j = 0; j < device_count; 
j++4
)
387
4
    {
388
4
      ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, max_length, embedding_size);
389
4
      CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j);
390
4
      vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params);
391
4
      ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, max_length, max_length);
392
4
      CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j);
393
4
      mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0);
394
4
      vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params);
395
4
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]);
396
4
    }
397
1
    ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 0, vec, device_count * 2, out, device_count, 0, stream);
398
1
    ccv_nnc_tensor_variable_t softmax[device_count];
399
1
    ccv_nnc_tensor_variable_t fit[device_count];
400
1
    ccv_nnc_tensor_variable_t vocab_vec_grad[device_count];
401
1
    ccv_nnc_tensor_variable_t seq_vec_grad[device_count];
402
5
    for (j = 0; j < device_count; 
j++4
)
403
4
    {
404
4
      softmax[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
405
4
      fit[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
406
4
      ccv_nnc_tensor_variable_set(dynamic_graph, fit[j], tensor[j][1]);
407
4
      vocab_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
408
4
      seq_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
409
4
    }
410
1
    ccv_nnc_tensor_variable_t tvout[device_count * 2];
411
5
    for (j = 0; j < device_count; 
j++4
)
412
4
      tvin[j * 2] = out[j], tvin[j * 2 + 1] = fit[j], tvout[j * 2] = 0, tvout[j * 2 + 1] = softmax[j];
413
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_SOFTMAX_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, tvout, device_count * 2, device_count, stream);
414
5
    for (j = 0; j < device_count; 
j++4
)
415
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = seq_vec[j], tvout[j * 2] = vocab_vec_grad[j], tvout[j * 2 + 1] = seq_vec_grad[j];
416
1
    ccv_nnc_dynamic_graph_backward(dynamic_graph, softmax, device_count, 0, tvin, device_count * 2, tvout, device_count * 2, stream);
417
1
    ccv_cnnp_model_set_minimizer(transformer, adam, 0, 0, 0);
418
5
    for (j = 0; j < device_count; 
j++4
)
419
4
      tvin[j * 2] = vocab_vec_grad[j], tvin[j * 2 + 1] = seq_vec_grad[j], tvout[j * 2] = vocab_vec[j], tvout[j * 2 + 1] = seq_vec[j];
420
1
    ccv_nnc_dynamic_graph_apply_gradients(dynamic_graph, adam, tvin, device_count * 2, tvout, device_count * 2, saved_auxs, device_count, stream);
421
1
    ccv_nnc_stream_context_wait(stream);
422
5
    for (j = 0; j < device_count; 
j++4
)
423
4
    {
424
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]);
425
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]);
426
4
      ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]);
427
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]);
428
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]);
429
4
      ccv_nnc_tensor_variable_free(dynamic_graph, out[j]);
430
4
      ccv_nnc_tensor_variable_free(dynamic_graph, fit[j]);
431
4
      ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]);
432
4
      ccv_nnc_tensor_variable_free(dynamic_graph, softmax[j]);
433
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vocab_vec_grad[j]);
434
4
      ccv_nnc_tensor_variable_free(dynamic_graph, seq_vec_grad[j]);
435
4
    }
436
1
    if ((i + 1) % epoch_end == 0)
437
1
    {
438
1
      ++epoch;
439
1
      ccv_cnnp_dataframe_shuffle(train_data);
440
1
      ccv_cnnp_dataframe_iter_set_cursor(iter, 0);
441
1
    }
442
1
  }
443
1
  ccv_nnc_stream_context_free(stream);
444
1
  int correct = 0;
445
1
  ccv_cnnp_dataframe_iter_t* const test_iter = ccv_cnnp_dataframe_iter_new(test_batched_data, test_gpu_batched, device_count * 2);
446
1
  int k;
447
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1);
448
1
  const int row_count = ccv_cnnp_dataframe_row_count(test_data);
449
2
  for (k = 0; k < row_count; 
k += batch_size * device_count1
)
450
1
  {
451
1
    ccv_cnnp_dataframe_iter_next(test_iter, (void**)tensor, device_count, 0);
452
1
    ccv_nnc_tensor_t word_indices_tensor[device_count];
453
1
    ccv_nnc_tensor_t mask_tensor[device_count];
454
1
    ccv_nnc_tensor_variable_t word_indices[device_count];
455
1
    ccv_nnc_tensor_variable_t word_vec[device_count];
456
1
    ccv_nnc_tensor_variable_t pos_vec[device_count];
457
1
    ccv_nnc_tensor_variable_t select_vec[device_count];
458
1
    ccv_nnc_tensor_variable_t vec[device_count * 2];
459
1
    ccv_nnc_tensor_variable_t out[device_count];
460
5
    for (j = 0; j < device_count; 
j++4
)
461
4
    {
462
4
      ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length);
463
4
      CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j);
464
4
      word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0);
465
4
      word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params);
466
4
      ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]);
467
4
      ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * max_length, embedding_size);
468
4
      CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j);
469
4
      word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
470
4
      pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
471
4
      select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
472
4
      out[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
473
4
    }
474
1
    ccv_nnc_tensor_variable_t tvin[device_count * 2];
475
5
    for (j = 0; j < device_count; 
j++4
)
476
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j];
477
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, 0);
478
5
    for (j = 0; j < device_count; 
j++4
)
479
4
      tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices[j];
480
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, 0);
481
5
    for (j = 0; j < device_count; 
j++4
)
482
4
      tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j];
483
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, 0);
484
1
    ccv_cnnp_dataframe_iter_peek(test_iter, (void**)(tensor + device_count), device_count, device_count, 0);
485
5
    for (j = 0; j < device_count; 
j++4
)
486
4
    {
487
4
      ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, max_length, embedding_size);
488
4
      CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j);
489
4
      vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params);
490
4
      ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, max_length, max_length);
491
4
      CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j);
492
4
      mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0);
493
4
      vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params);
494
4
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]);
495
4
    }
496
1
    ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 1, vec, device_count * 2, out, device_count, 0, 0);
497
1
    int d;
498
5
    for (d = 0; d < device_count; 
d++4
)
499
4
    {
500
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, out[d], 0)), TENSOR_LIST(out_cpu), 0);
501
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor[d][1]), TENSOR_LIST(fit_cpu), 0);
502
132
      for (j = 0; j < ccv_min(row_count - k - d * batch_size, batch_size); 
j++128
)
503
128
      {
504
128
        const int truth = (fit_cpu->data.f32[j * 2] < fit_cpu->data.f32[j * 2 + 1]);
505
128
        const int prediction = (out_cpu->data.f32[j * 2] < out_cpu->data.f32[j * 2 + 1]);
506
128
        if (truth == prediction)
507
0
          ++correct;
508
128
      }
509
4
    }
510
5
    for (j = 0; j < device_count; 
j++4
)
511
4
    {
512
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]);
513
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]);
514
4
      ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]);
515
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]);
516
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]);
517
4
      ccv_nnc_tensor_variable_free(dynamic_graph, out[j]);
518
4
      ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]);
519
4
    }
520
1
  }
521
1
  ccv_cnnp_dataframe_iter_free(test_iter);
522
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0);
523
1
  ccv_cnnp_model_free(transformer);
524
1
  ccv_cnnp_dataframe_iter_free(iter);
525
1
  ccv_cnnp_dataframe_free(batched_data);
526
1
  ccv_cnnp_dataframe_free(test_batched_data);
527
1
  ccv_nnc_dynamic_graph_free(dynamic_graph);
528
1
  ccv_nnc_tensor_free(out_cpu);
529
1
  ccv_nnc_tensor_free(fit_cpu);
530
1
  return correct;
531
1
}
532
533
TEST_CASE("train a categorical transformer classifier on imdb reviews to 80% with mix of dynamic graph and cnnp model")
534
1
{
535
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS) &&
536
1
      ccv_nnc_cmd_ok(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_GPU_CUBLAS) &&
537
1
      ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
538
1
      ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN));
539
1
  const char* const train_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/train.txt";
540
1
  const char* const test_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/test.txt";
541
1
  const char* const vocab_file = "/fast/Data/IMDB_Movie_Reviews/aclImdb/imdb.vocab";
542
1
  const char* const base_dir = "/fast/Data/IMDB_Movie_Reviews/aclImdb/";
543
1
  FILE* train_open = fopen(train_list, "rb");
544
1
  FILE* test_open = fopen(test_list, "rb");
545
1
  FILE* vocab_open = fopen(vocab_file, "rb");
546
1
  if (train_open)
547
1
    fclose(train_open);
548
1
  if (test_open)
549
1
    fclose(test_open);
550
1
  if (vocab_open)
551
1
    fclose(vocab_open);
552
1
  if (!train_open || !test_open || !vocab_open)
553
0
    { GUARD_ELSE_RETURN(0); }
554
1
  khash_t(vocab_map)* vocab;
555
1
  int vocab_size;
556
1
  _vocab_init(vocab_file, &vocab, &vocab_size);
557
1
  const int max_length = 512;
558
1
  ccv_array_t* train_set;
559
1
  ccv_cnnp_dataframe_t* train_data;
560
1
  ccv_array_t* test_set;
561
1
  ccv_cnnp_dataframe_t* test_data;
562
1
  if (!ccv_is_coverage())
563
0
  {
564
0
    train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 0);
565
0
    train_data = ccv_cnnp_dataframe_from_array_new(train_set);
566
0
    test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 0);
567
0
    test_data = ccv_cnnp_dataframe_from_array_new(test_set);
568
0
    const int correct = train_imdb_fix(10, vocab_size, 64, max_length, 128, train_data, test_data);
569
0
    REQUIRE((float)correct / test_set->rnum > 0.80, "%f should be larger than 80%%", (float)correct / test_set->rnum);
570
1
  } else {
571
1
    train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 128);
572
1
    train_data = ccv_cnnp_dataframe_from_array_new(train_set);
573
1
    test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 128);
574
1
    test_data = ccv_cnnp_dataframe_from_array_new(test_set);
575
1
    train_imdb_fix(1, vocab_size, 64, max_length, 128, train_data, test_data);
576
1
  }
577
1
  ccv_cnnp_dataframe_free(train_data);
578
1
  ccv_cnnp_dataframe_free(test_data);
579
1
  int i;
580
129
  for (i = 0; i < train_set->rnum; 
i++128
)
581
128
  {
582
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->tensor);
583
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->mask);
584
128
  }
585
1
  ccv_array_free(train_set);
586
129
  for (i = 0; i < test_set->rnum; 
i++128
)
587
128
  {
588
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->tensor);
589
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->mask);
590
128
  }
591
1
  ccv_array_free(test_set);
592
1
  _vocab_destroy(vocab);
593
1
}
594
595
static int train_imdb_flex(const int epoch_limit, const int vocab_size, const int batch_size, const int max_length, const int embedding_size, ccv_cnnp_dataframe_t* const train_data, ccv_cnnp_dataframe_t* const test_data)
596
1
{
597
1
  const int tensor_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, tensor), 0);
598
1
  const int one_hot_idx = ccv_cnnp_dataframe_copy_scalar(train_data, 0, offsetof(ccv_nnc_text_t, c), CCV_32S, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0);
599
1
  const int mask_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, mask), 0);
600
1
  const int device_count = ccv_nnc_device_count(CCV_STREAM_CONTEXT_GPU);
601
1
  ccv_cnnp_dataframe_t* const batched_data = ccv_cnnp_dataframe_combine_new(train_data, COLUMN_ID_LIST(tensor_idx, one_hot_idx, mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW);
602
1
  const int test_tensor_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, tensor), 0);
603
1
  const int test_one_hot_idx = ccv_cnnp_dataframe_copy_scalar(test_data, 0, offsetof(ccv_nnc_text_t, c), CCV_32S, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0);
604
1
  const int test_mask_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, mask), 0);
605
1
  ccv_cnnp_dataframe_t* const test_batched_data = ccv_cnnp_dataframe_combine_new(test_data, COLUMN_ID_LIST(test_tensor_idx, test_one_hot_idx, test_mask_idx), batch_size, device_count, CCV_TENSOR_FORMAT_NCHW);
606
1
  int gpu_batched[device_count * 3];
607
1
  int seq_len_batched[device_count];
608
1
  int data_batched[device_count];
609
1
  int test_gpu_batched[device_count * 3];
610
1
  int test_seq_len_batched[device_count];
611
1
  int test_data_batched[device_count];
612
1
  int i, j;
613
5
  for (i = 0; i < device_count; 
i++4
)
614
4
  {
615
4
    seq_len_batched[i] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3 + 2, 0);
616
4
    data_batched[i] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3, 0);
617
4
    test_seq_len_batched[i] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3 + 2, 0);
618
4
    test_data_batched[i] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3, 0);
619
4
  }
620
1
  const int mask_batched = ccv_cnnp_dataframe_one_squared(batched_data, seq_len_batched, device_count, 1, max_length, 0);
621
1
  const int trunc_data_batched = ccv_cnnp_dataframe_truncate(batched_data, data_batched, device_count, seq_len_batched, device_count, 0);
622
1
  const int test_mask_batched = ccv_cnnp_dataframe_one_squared(test_batched_data, test_seq_len_batched, device_count, 1, max_length, 0);
623
1
  const int test_trunc_data_batched = ccv_cnnp_dataframe_truncate(test_batched_data, test_data_batched, device_count, test_seq_len_batched, device_count, 0);
624
5
  for (i = 0; i < device_count; 
i++4
)
625
4
  {
626
4
    gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, trunc_data_batched, i, 1, i, 0);
627
4
    gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, mask_batched, i, 1, i, 0);
628
4
    gpu_batched[i + device_count * 2] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, 0, i * 3 + 1, 1, i, 0);
629
4
    test_gpu_batched[i] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_trunc_data_batched, i, 1, i, 0);
630
4
    test_gpu_batched[i + device_count] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_mask_batched, i, 1, i, 0);
631
4
    test_gpu_batched[i + device_count * 2] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, 0, i * 3 + 1, 1, i, 0);
632
4
  }
633
1
  ccv_cnnp_dataframe_iter_t* const iter = ccv_cnnp_dataframe_iter_new(batched_data, gpu_batched, device_count * 3);
634
1
  ccv_nnc_dynamic_graph_t* const dynamic_graph = ccv_nnc_dynamic_graph_new();
635
1
  ccv_nnc_tensor_variable_t vocab_vec[device_count];
636
1
  ccv_nnc_tensor_variable_t seq_vec[device_count];
637
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1);
638
1
  ccv_nnc_tensor_param_t vocab_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size);
639
1
  vocab_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params);
640
1
  ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(vocab_vec[0]), 0, 0);
641
1
  ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size);
642
1
  seq_vec[0] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params);
643
1
  ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(), TENSOR_VARIABLE_LIST(seq_vec[0]), 0, 0);
644
4
  for (i = 1; i < device_count; 
i++3
)
645
3
  {
646
3
    CCV_TENSOR_SET_DEVICE_ID(vocab_params.type, i);
647
3
    vocab_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params);
648
3
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(vocab_vec[0]), TENSOR_VARIABLE_LIST(vocab_vec[i]), 0, 0);
649
3
    CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i);
650
3
    seq_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, seq_params);
651
3
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_VARIABLE_LIST(seq_vec[0]), TENSOR_VARIABLE_LIST(seq_vec[i]), 0, 0);
652
3
  }
653
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0);
654
1
  ccv_nnc_tensor_t* const seq_indices_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32S, batch_size * max_length), 0);
655
1
  ccv_nnc_tensor_variable_t seq_indices[device_count];
656
5
  for (i = 0; i < device_count; 
i++4
)
657
4
  {
658
4
    ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * max_length);
659
4
    CCV_TENSOR_SET_DEVICE_ID(seq_params.type, i);
660
4
    seq_indices[i] = ccv_nnc_tensor_constant_new(dynamic_graph, seq_params);
661
4
  }
662
1
  classifier_transformer_params_t classifier_transformer_params = {
663
1
    .layers = 2,
664
1
    .h = 8,
665
1
    .ff = 4,
666
1
    .dropout = 0.1,
667
1
  };
668
1
  ccv_cnnp_model_t* const transformer = ccv_cnnp_dynamic_new(_dynamic_binary_classifier_transformer, &classifier_transformer_params, 0);
669
1
  ccv_cnnp_model_set_data_parallel(transformer, device_count);
670
1
  const int epoch_end = (ccv_cnnp_dataframe_row_count(train_data) + device_count * batch_size - 1) / (device_count * batch_size);
671
1
  ccv_cnnp_dataframe_shuffle(train_data);
672
1
  ccv_nnc_cmd_t adam = CMD_ADAM_FORWARD(1, 0.0001, 0.9, 0.98, 0, 1e-9, 0);
673
1
  const int aux_size = ccv_nnc_minimizer_saved_aux_size(adam);
674
1
  ccv_nnc_tensor_variable_t saved_auxs[device_count * aux_size * 2];
675
5
  for (i = 0; i < device_count; 
i++4
)
676
4
  {
677
12
    for (j = 0; j < aux_size; 
j++8
)
678
8
    {
679
8
      ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size);
680
8
      CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i);
681
8
      saved_auxs[i * aux_size * 2 + j] = ccv_nnc_tensor_variable_new(dynamic_graph, saved_aux_params);
682
8
    }
683
12
    for (j = 0; j < aux_size; 
j++8
)
684
8
    {
685
8
      ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size);
686
8
      CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i);
687
8
      saved_auxs[i* aux_size * 2 + aux_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph, saved_aux_params);
688
8
    }
689
4
  }
690
1
  ccv_nnc_tensor_t* const out_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 1), 0);
691
1
  ccv_nnc_tensor_t* const fit_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 1), 0);
692
1
  ccv_nnc_tensor_t** tensor[device_count * 3];
693
1
  int epoch = 0;
694
1
  ccv_nnc_stream_context_t* const stream = ccv_nnc_stream_context_new(CCV_STREAM_CONTEXT_GPU);
695
2
  for (i = 0; epoch < epoch_limit; 
i++1
)
696
1
  {
697
1
    float learn_rate = 0.0001 * ccv_min(i / (10000. / batch_size), 1) * device_count;
698
1
    adam = CMD_ADAM_FORWARD(i + 1, learn_rate, 0.9, 0.98, 0, 1e-9, 0);
699
1
    ccv_cnnp_dataframe_iter_next(iter, (void**)tensor, device_count, stream);
700
1
    ccv_nnc_tensor_t word_indices_tensor[device_count];
701
1
    ccv_nnc_tensor_t mask_tensor[device_count];
702
1
    ccv_nnc_tensor_variable_t word_indices[device_count];
703
1
    ccv_nnc_tensor_variable_t word_vec[device_count];
704
1
    ccv_nnc_tensor_variable_t pos_vec[device_count];
705
1
    ccv_nnc_tensor_variable_t select_vec[device_count];
706
1
    ccv_nnc_tensor_variable_t vec[device_count * 2];
707
1
    ccv_nnc_tensor_variable_t out[device_count];
708
1
    ccv_nnc_tensor_variable_t seq_indices_alias[device_count];
709
1
    int batch_length = 0;
710
5
    for (j = 0; j < device_count; 
j++4
)
711
4
    {
712
4
      batch_length = tensor[j][0]->info.dim[1];
713
4
      ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length);
714
4
      CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j);
715
4
      word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0);
716
4
      word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params);
717
4
      ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]);
718
4
      ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * batch_length, embedding_size);
719
4
      CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j);
720
4
      word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
721
4
      pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
722
4
      select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
723
4
      out[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
724
4
      ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length);
725
4
      CCV_TENSOR_SET_DEVICE_ID(seq_params.type, j);
726
4
      seq_indices_alias[j] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, seq_indices[j], ccv_nnc_no_ofs, DIM_ALLOC(), seq_params);
727
4
    }
728
65
    for (j = 0; j < batch_size; 
j++64
)
729
64
    {
730
64
      int k;
731
32.8k
      for (k = 0; k < batch_length; 
k++32.7k
)
732
32.7k
        seq_indices_cpu->data.i32[j * batch_length + k] = k;
733
64
    }
734
5
    for (j = 0; j < device_count; 
j++4
)
735
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(seq_indices_cpu), TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, seq_indices[j])), 0);
736
1
    ccv_nnc_tensor_variable_t tvin[device_count * 2];
737
5
    for (j = 0; j < device_count; 
j++4
)
738
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j];
739
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, stream);
740
5
    for (j = 0; j < device_count; 
j++4
)
741
4
      tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices_alias[j];
742
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, stream);
743
5
    for (j = 0; j < device_count; 
j++4
)
744
4
      tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j];
745
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, stream);
746
1
    ccv_cnnp_dataframe_iter_peek(iter, (void**)(tensor + device_count), device_count, device_count * 2, stream);
747
5
    for (j = 0; j < device_count; 
j++4
)
748
4
    {
749
4
      ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, batch_length, embedding_size);
750
4
      CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j);
751
4
      vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params);
752
4
      ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, batch_length, batch_length);
753
4
      CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j);
754
4
      mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0);
755
4
      vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params);
756
4
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]);
757
4
    }
758
1
    ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 0, vec, device_count * 2, out, device_count, 0, stream);
759
1
    ccv_nnc_tensor_variable_t sigmoid[device_count];
760
1
    ccv_nnc_tensor_variable_t fit[device_count];
761
1
    ccv_nnc_tensor_variable_t vocab_vec_grad[device_count];
762
1
    ccv_nnc_tensor_variable_t seq_vec_grad[device_count];
763
5
    for (j = 0; j < device_count; 
j++4
)
764
4
    {
765
4
      sigmoid[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
766
4
      fit[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
767
4
      ccv_nnc_tensor_variable_set(dynamic_graph, fit[j], tensor[j + device_count * 2][0]);
768
4
      vocab_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
769
4
      seq_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
770
4
    }
771
1
    ccv_nnc_tensor_variable_t tvout[device_count * 2];
772
5
    for (j = 0; j < device_count; 
j++4
)
773
4
      tvin[j * 2] = out[j], tvin[j * 2 + 1] = fit[j], tvout[j * 2] = 0, tvout[j * 2 + 1] = sigmoid[j];
774
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, tvout, device_count * 2, device_count, stream);
775
5
    for (j = 0; j < device_count; 
j++4
)
776
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = seq_vec[j], tvout[j * 2] = vocab_vec_grad[j], tvout[j * 2 + 1] = seq_vec_grad[j];
777
1
    ccv_nnc_dynamic_graph_backward(dynamic_graph, sigmoid, device_count, 0, tvin, device_count * 2, tvout, device_count * 2, stream);
778
1
    ccv_cnnp_model_set_minimizer(transformer, adam, 0, 0, 0);
779
5
    for (j = 0; j < device_count; 
j++4
)
780
4
      tvin[j * 2] = vocab_vec_grad[j], tvin[j * 2 + 1] = seq_vec_grad[j], tvout[j * 2] = vocab_vec[j], tvout[j * 2 + 1] = seq_vec[j];
781
1
    ccv_nnc_dynamic_graph_apply_gradients(dynamic_graph, adam, tvin, device_count * 2, tvout, device_count * 2, saved_auxs, device_count, stream);
782
1
    ccv_nnc_stream_context_wait(stream);
783
5
    for (j = 0; j < device_count; 
j++4
)
784
4
    {
785
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]);
786
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]);
787
4
      ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]);
788
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]);
789
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]);
790
4
      ccv_nnc_tensor_variable_free(dynamic_graph, out[j]);
791
4
      ccv_nnc_tensor_variable_free(dynamic_graph, fit[j]);
792
4
      ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]);
793
4
      ccv_nnc_tensor_variable_free(dynamic_graph, sigmoid[j]);
794
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vocab_vec_grad[j]);
795
4
      ccv_nnc_tensor_variable_free(dynamic_graph, seq_vec_grad[j]);
796
4
      ccv_nnc_tensor_variable_free(dynamic_graph, seq_indices_alias[j]);
797
4
    }
798
1
    if ((i + 1) % epoch_end == 0)
799
1
    {
800
1
      ++epoch;
801
1
      ccv_cnnp_dataframe_shuffle(train_data);
802
1
      ccv_cnnp_dataframe_iter_set_cursor(iter, 0);
803
1
    }
804
1
  }
805
1
  ccv_nnc_stream_context_free(stream);
806
1
  int correct = 0;
807
1
  ccv_cnnp_dataframe_iter_t* const test_iter = ccv_cnnp_dataframe_iter_new(test_batched_data, test_gpu_batched, device_count * 3);
808
1
  int k;
809
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1);
810
1
  const int row_count = ccv_cnnp_dataframe_row_count(test_data);
811
2
  for (k = 0; k < row_count; 
k += batch_size * device_count1
)
812
1
  {
813
1
    ccv_cnnp_dataframe_iter_next(test_iter, (void**)tensor, device_count * 3, 0);
814
1
    ccv_nnc_tensor_t word_indices_tensor[device_count];
815
1
    ccv_nnc_tensor_t mask_tensor[device_count];
816
1
    ccv_nnc_tensor_variable_t word_indices[device_count];
817
1
    ccv_nnc_tensor_variable_t word_vec[device_count];
818
1
    ccv_nnc_tensor_variable_t pos_vec[device_count];
819
1
    ccv_nnc_tensor_variable_t select_vec[device_count];
820
1
    ccv_nnc_tensor_variable_t vec[device_count * 2];
821
1
    ccv_nnc_tensor_variable_t out[device_count];
822
1
    ccv_nnc_tensor_variable_t seq_indices_alias[device_count];
823
1
    int batch_length = 0;
824
5
    for (j = 0; j < device_count; 
j++4
)
825
4
    {
826
4
      batch_length = tensor[j][0]->info.dim[1];
827
4
      ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length);
828
4
      CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j);
829
4
      word_indices_tensor[j] = ccv_nnc_tensor(tensor[j][0]->data.f32, word_indices_params, 0);
830
4
      word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params);
831
4
      ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]);
832
4
      ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * batch_length, embedding_size);
833
4
      CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j);
834
4
      word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
835
4
      pos_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
836
4
      select_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
837
4
      out[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
838
4
      ccv_nnc_tensor_param_t seq_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length);
839
4
      CCV_TENSOR_SET_DEVICE_ID(seq_params.type, j);
840
4
      seq_indices_alias[j] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, seq_indices[j], ccv_nnc_no_ofs, DIM_ALLOC(), seq_params);
841
4
    }
842
65
    for (j = 0; j < batch_size; 
j++64
)
843
64
    {
844
64
      int k;
845
32.8k
      for (k = 0; k < batch_length; 
k++32.7k
)
846
32.7k
        seq_indices_cpu->data.i32[j * batch_length + k] = k;
847
64
    }
848
5
    for (j = 0; j < device_count; 
j++4
)
849
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(seq_indices_cpu), TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, seq_indices[j])), 0);
850
1
    ccv_nnc_tensor_variable_t tvin[device_count * 2];
851
5
    for (j = 0; j < device_count; 
j++4
)
852
4
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j];
853
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, word_vec, device_count, device_count, 0);
854
5
    for (j = 0; j < device_count; 
j++4
)
855
4
      tvin[j * 2] = seq_vec[j], tvin[j * 2 + 1] = seq_indices_alias[j];
856
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, device_count * 2, pos_vec, device_count, device_count, 0);
857
5
    for (j = 0; j < device_count; 
j++4
)
858
4
      tvin[j * 2] = word_vec[j], tvin[j * 2 + 1] = pos_vec[j];
859
1
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, tvin, device_count * 2, select_vec, device_count, device_count, 0);
860
5
    for (j = 0; j < device_count; 
j++4
)
861
4
    {
862
4
      ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, batch_length, embedding_size);
863
4
      CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j);
864
4
      vec[j * 2] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, select_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params);
865
4
      ccv_nnc_tensor_param_t mask_params = GPU_TENSOR_NCHW(000, 32S, batch_size, batch_length, batch_length);
866
4
      CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j);
867
4
      mask_tensor[j] = ccv_nnc_tensor(tensor[j + device_count][0]->data.i32, mask_params, 0);
868
4
      vec[j * 2 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params);
869
4
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 2 + 1], &mask_tensor[j]);
870
4
    }
871
1
    ccv_nnc_dynamic_graph_evaluate(dynamic_graph, transformer, 1, vec, device_count * 2, out, device_count, 0, 0);
872
1
    int d;
873
5
    for (d = 0; d < device_count; 
d++4
)
874
4
    {
875
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, out[d], 0)), TENSOR_LIST(out_cpu), 0);
876
4
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor[d + device_count * 2][0]), TENSOR_LIST(fit_cpu), 0);
877
132
      for (j = 0; j < ccv_min(row_count - k - d * batch_size, batch_size); 
j++128
)
878
128
      {
879
128
        const int truth = (fit_cpu->data.f32[j] > 0.5);
880
128
        const int prediction = (out_cpu->data.f32[j] > 0);
881
128
        if (truth == prediction)
882
128
          ++correct;
883
128
      }
884
4
    }
885
5
    for (j = 0; j < device_count; 
j++4
)
886
4
    {
887
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2]);
888
4
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 2 + 1]);
889
4
      ccv_nnc_tensor_variable_free(dynamic_graph, select_vec[j]);
890
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]);
891
4
      ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]);
892
4
      ccv_nnc_tensor_variable_free(dynamic_graph, out[j]);
893
4
      ccv_nnc_tensor_variable_free(dynamic_graph, pos_vec[j]);
894
4
      ccv_nnc_tensor_variable_free(dynamic_graph, seq_indices_alias[j]);
895
4
    }
896
1
  }
897
1
  ccv_nnc_tensor_free(seq_indices_cpu);
898
1
  ccv_cnnp_dataframe_iter_free(test_iter);
899
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 0);
900
1
  ccv_cnnp_model_free(transformer);
901
1
  ccv_cnnp_dataframe_iter_free(iter);
902
1
  ccv_cnnp_dataframe_free(batched_data);
903
1
  ccv_cnnp_dataframe_free(test_batched_data);
904
1
  ccv_nnc_dynamic_graph_free(dynamic_graph);
905
1
  ccv_nnc_tensor_free(out_cpu);
906
1
  ccv_nnc_tensor_free(fit_cpu);
907
1
  return correct;
908
1
}
909
910
TEST_CASE("train a binary transformer classifier on imdb reviews to 80% with mix of dynamic graph and cnnp model and dynamic inputs")
911
1
{
912
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS) &&
913
1
      ccv_nnc_cmd_ok(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_GPU_CUBLAS) &&
914
1
      ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
915
1
      ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN));
916
1
  const char* const train_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/train.txt";
917
1
  const char* const test_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/test.txt";
918
1
  const char* const vocab_file = "/fast/Data/IMDB_Movie_Reviews/aclImdb/imdb.vocab";
919
1
  const char* const base_dir = "/fast/Data/IMDB_Movie_Reviews/aclImdb/";
920
1
  FILE* train_open = fopen(train_list, "rb");
921
1
  FILE* test_open = fopen(test_list, "rb");
922
1
  FILE* vocab_open = fopen(vocab_file, "rb");
923
1
  if (train_open)
924
1
    fclose(train_open);
925
1
  if (test_open)
926
1
    fclose(test_open);
927
1
  if (vocab_open)
928
1
    fclose(vocab_open);
929
1
  if (!train_open || !test_open || !vocab_open)
930
0
    { GUARD_ELSE_RETURN(0); }
931
1
  khash_t(vocab_map)* vocab;
932
1
  int vocab_size;
933
1
  _vocab_init(vocab_file, &vocab, &vocab_size);
934
1
  const int max_length = 512;
935
1
  ccv_array_t* train_set;
936
1
  ccv_cnnp_dataframe_t* train_data;
937
1
  ccv_array_t* test_set;
938
1
  ccv_cnnp_dataframe_t* test_data;
939
1
  if (!ccv_is_coverage())
940
0
  {
941
0
    train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 0);
942
0
    train_data = ccv_cnnp_dataframe_from_array_new(train_set);
943
0
    test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 0);
944
0
    test_data = ccv_cnnp_dataframe_from_array_new(test_set);
945
0
    const int correct = train_imdb_flex(10, vocab_size, 64, max_length, 128, train_data, test_data);
946
0
    REQUIRE((float)correct / test_set->rnum > 0.80, "%f should be larger than 80%%", (float)correct / test_set->rnum);
947
1
  } else {
948
1
    train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 128);
949
1
    train_data = ccv_cnnp_dataframe_from_array_new(train_set);
950
1
    test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 128);
951
1
    test_data = ccv_cnnp_dataframe_from_array_new(test_set);
952
1
    train_imdb_flex(1, vocab_size, 64, max_length, 128, train_data, test_data);
953
1
  }
954
1
  ccv_cnnp_dataframe_free(train_data);
955
1
  ccv_cnnp_dataframe_free(test_data);
956
1
  int i;
957
129
  for (i = 0; i < train_set->rnum; 
i++128
)
958
128
  {
959
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->tensor);
960
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->mask);
961
128
  }
962
1
  ccv_array_free(train_set);
963
129
  for (i = 0; i < test_set->rnum; 
i++128
)
964
128
  {
965
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->tensor);
966
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->mask);
967
128
  }
968
1
  ccv_array_free(test_set);
969
1
  _vocab_destroy(vocab);
970
1
}
971
972
static ccv_cnnp_model_t* _classifier_lstm_new(const int batch_size, const int batch_length, const int num_layers, const int hidden_size, const float dropout)
973
1
{
974
1
  ccv_cnnp_model_io_t const x = ccv_cnnp_input();
975
1
  ccv_cnnp_model_io_t const mask = ccv_cnnp_input();
976
1
  ccv_cnnp_model_io_t const index = ccv_cnnp_input();
977
1
  ccv_cnnp_model_io_t out = ccv_cnnp_model_apply(ccv_cnnp_lstm(1, hidden_size, 0, num_layers, 1, 1, 0, dropout, 1, 0), MODEL_IO_LIST(x, mask));
978
1
  out = ccv_cnnp_model_apply(ccv_cnnp_reshape(0, DIM_ALLOC(batch_size * batch_length, 128), DIM_ALLOC(), DIM_ALLOC(), 0), MODEL_IO_LIST(out));
979
1
  out = ccv_cnnp_model_apply(ccv_cnnp_index_select(0), MODEL_IO_LIST(out, index));
980
  // Last layer, get it to 1.
981
1
  out = ccv_cnnp_model_apply(ccv_cnnp_flatten(0), MODEL_IO_LIST(out));
982
1
  out = ccv_cnnp_model_apply(ccv_cnnp_dense(1, 0, 0, 1, 0), MODEL_IO_LIST(out));
983
1
  return ccv_cnnp_model_new(MODEL_IO_LIST(x, mask, index), MODEL_IO_LIST(out), 1, "classifier");
984
1
}
985
986
typedef struct {
987
  int num_layers;
988
  int hidden_size;
989
  float dropout;
990
} classifier_lstm_params_t;
991
992
static ccv_cnnp_model_t* _dynamic_classifier_lstm(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context)
993
1
{
994
1
  const classifier_lstm_params_t* const params = (classifier_lstm_params_t*)context;
995
1
  const int batch_size = inputs[0].dim[0];
996
1
  const int batch_length = inputs[0].dim[1];
997
1
  return _classifier_lstm_new(batch_size, batch_length, params->num_layers, params->hidden_size, params->dropout);
998
1
}
999
1000
static void _ccv_cnnp_mask_to_index(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context)
1001
4
{
1002
4
  int i, j;
1003
8
  for (i = 0; i < batch_size; 
i++4
)
1004
4
  {
1005
4
    ccv_nnc_tensor_t* const input = (ccv_nnc_tensor_t*)column_data[0][i];
1006
4
    ccv_nnc_tensor_t* output = (ccv_nnc_tensor_t*)data[i];
1007
4
    ccv_nnc_tensor_param_t params = input->info;
1008
4
    output = output ? 
ccv_nnc_tensor_resize(output, params)2
:
ccv_nnc_tensor_new(0, params, 0)2
;
1009
4
    int max_seq_length = 0;
1010
260
    for (j = 0; j < params.dim[0]; 
j++256
)
1011
256
      if (input->data.i32[j] > max_seq_length)
1012
11
        max_seq_length = input->data.i32[j];
1013
260
    for (j = 0; j < params.dim[0]; 
j++256
)
1014
256
      output->data.i32[j] = ccv_max(max_seq_length * j + input->data.i32[j] - 1, 0);
1015
4
    data[i] = output;
1016
4
  }
1017
4
}
1018
1019
static void _ccv_cnnp_tensor_deinit(void* const data, void* const context)
1020
2
{
1021
2
  ccv_nnc_tensor_free((ccv_nnc_tensor_t*)data);
1022
2
}
1023
1024
static int train_imdb_lstm(const int epoch_limit, const int vocab_size, const int batch_size, const int max_length, const int embedding_size, ccv_cnnp_dataframe_t* const train_data, ccv_cnnp_dataframe_t* const test_data)
1025
1
{
1026
1
  const int tensor_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, tensor), 0);
1027
1
  const int one_hot_idx = ccv_cnnp_dataframe_copy_scalar(train_data, 0, offsetof(ccv_nnc_text_t, c), CCV_32S, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0);
1028
1
  const int mask_idx = ccv_cnnp_dataframe_extract_value(train_data, 0, offsetof(ccv_nnc_text_t, mask), 0);
1029
1
  ccv_cnnp_dataframe_t* const batched_data = ccv_cnnp_dataframe_combine_new(train_data, COLUMN_ID_LIST(tensor_idx, one_hot_idx, mask_idx), batch_size, 1, CCV_TENSOR_FORMAT_NCHW);
1030
1
  const int test_tensor_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, tensor), 0);
1031
1
  const int test_one_hot_idx = ccv_cnnp_dataframe_copy_scalar(test_data, 0, offsetof(ccv_nnc_text_t, c), CCV_32S, CCV_32F, CCV_TENSOR_FORMAT_NCHW, 0);
1032
1
  const int test_mask_idx = ccv_cnnp_dataframe_extract_value(test_data, 0, offsetof(ccv_nnc_text_t, mask), 0);
1033
1
  ccv_cnnp_dataframe_t* const test_batched_data = ccv_cnnp_dataframe_combine_new(test_data, COLUMN_ID_LIST(test_tensor_idx, test_one_hot_idx, test_mask_idx), batch_size, 1, CCV_TENSOR_FORMAT_NCHW);
1034
1
  int gpu_batched[4];
1035
1
  int seq_len_batched[1];
1036
1
  int index_batched[1];
1037
1
  int data_batched[1];
1038
1
  int test_gpu_batched[4];
1039
1
  int test_seq_len_batched[1];
1040
1
  int test_index_batched[1];
1041
1
  int test_data_batched[1];
1042
1
  int i, j;
1043
2
  for (i = 0; i < 1; 
i++1
)
1044
1
  {
1045
1
    seq_len_batched[i] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3 + 2, 0);
1046
1
    index_batched[i] = ccv_cnnp_dataframe_map(batched_data, _ccv_cnnp_mask_to_index, CCV_STREAM_CONTEXT_CPU, _ccv_cnnp_tensor_deinit, COLUMN_ID_LIST(seq_len_batched[i]), 0, 0, 0);
1047
1
    index_batched[i] = ccv_cnnp_dataframe_make_tuple(batched_data, COLUMN_ID_LIST(index_batched[i]), 0);
1048
1
    data_batched[i] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3, 0);
1049
1
    test_seq_len_batched[i] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3 + 2, 0);
1050
1
    test_index_batched[i] = ccv_cnnp_dataframe_map(test_batched_data, _ccv_cnnp_mask_to_index, CCV_STREAM_CONTEXT_CPU, _ccv_cnnp_tensor_deinit, COLUMN_ID_LIST(test_seq_len_batched[i]), 0, 0, 0);
1051
1
    test_index_batched[i] = ccv_cnnp_dataframe_make_tuple(test_batched_data, COLUMN_ID_LIST(test_index_batched[i]), 0);
1052
1
    test_data_batched[i] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3, 0);
1053
1
  }
1054
1
  const int trunc_data_batched = ccv_cnnp_dataframe_truncate(batched_data, data_batched, 1, seq_len_batched, 1, 0);
1055
1
  const int test_trunc_data_batched = ccv_cnnp_dataframe_truncate(test_batched_data, test_data_batched, 1, test_seq_len_batched, 1, 0);
1056
2
  for (i = 0; i < 1; 
i++1
)
1057
1
  {
1058
1
    gpu_batched[i * 4] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, trunc_data_batched, i, 1, i, 0);
1059
1
    gpu_batched[i * 4 + 1] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, 0, i * 3 + 1, 1, i, 0);
1060
1
    gpu_batched[i * 4 + 2] = ccv_cnnp_dataframe_extract_tuple(batched_data, 0, i * 3 + 2, 0);
1061
1
    gpu_batched[i * 4 + 3] = ccv_cnnp_dataframe_copy_to_gpu(batched_data, index_batched[i], 0, 1, i, 0);
1062
1
    test_gpu_batched[i * 4] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_trunc_data_batched, i, 1, i, 0);
1063
1
    test_gpu_batched[i * 4 + 1] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, 0, i * 3 + 1, 1, i, 0);
1064
1
    test_gpu_batched[i * 4 + 2] = ccv_cnnp_dataframe_extract_tuple(test_batched_data, 0, i * 3 + 2, 0);
1065
1
    test_gpu_batched[i * 4 + 3] = ccv_cnnp_dataframe_copy_to_gpu(test_batched_data, test_index_batched[i], 0, 1, i, 0);
1066
1
  }
1067
1
  ccv_cnnp_dataframe_iter_t* const iter = ccv_cnnp_dataframe_iter_new(batched_data, gpu_batched, 4);
1068
1
  ccv_nnc_dynamic_graph_t* const dynamic_graph = ccv_nnc_dynamic_graph_new();
1069
1
  ccv_nnc_tensor_t* const vocab_vec_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, vocab_size, embedding_size), 0);
1070
1
  ccv_nnc_cmd_exec(CMD_RANDOM_UNIFORM_FORWARD(-1, 1), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(vocab_vec_cpu), 0);
1071
1
  ccv_nnc_tensor_variable_t vocab_vec[1];
1072
2
  for (i = 0; i < 1; 
i++1
)
1073
1
  {
1074
1
    ccv_nnc_tensor_param_t vocab_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size);
1075
1
    CCV_TENSOR_SET_DEVICE_ID(vocab_params.type, i);
1076
1
    vocab_vec[i] = ccv_nnc_tensor_variable_new(dynamic_graph, vocab_params);
1077
1
    ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(vocab_vec_cpu), TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, vocab_vec[i])), 0);
1078
1
  }
1079
1
  ccv_nnc_tensor_free(vocab_vec_cpu);
1080
1
  classifier_lstm_params_t classifier_lstm_params = {
1081
1
    .num_layers = 2,
1082
1
    .hidden_size = 128,
1083
1
    .dropout = 0.2,
1084
1
  };
1085
1
  ccv_cnnp_model_t* const lstm = ccv_cnnp_dynamic_new(_dynamic_classifier_lstm, &classifier_lstm_params, 0);
1086
1
  const int epoch_end = (ccv_cnnp_dataframe_row_count(train_data) + batch_size - 1) / batch_size;
1087
1
  ccv_cnnp_dataframe_shuffle(train_data);
1088
1
  ccv_nnc_cmd_t optim = CMD_LAMB_FORWARD(1, 0.001, 0.9, 0.999, 0, 1e-6);
1089
1
  const int aux_size = ccv_nnc_minimizer_saved_aux_size(optim);
1090
1
  ccv_nnc_tensor_variable_t saved_auxs[aux_size * 2];
1091
2
  for (i = 0; i < 1; 
i++1
)
1092
1
  {
1093
3
    for (j = 0; j < aux_size; 
j++2
)
1094
2
    {
1095
2
      ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, vocab_size, embedding_size);
1096
2
      CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i);
1097
2
      saved_auxs[i * aux_size * 2 + j] = ccv_nnc_tensor_variable_new(dynamic_graph, saved_aux_params);
1098
2
    }
1099
3
    for (j = 0; j < aux_size; 
j++2
)
1100
2
    {
1101
2
      ccv_nnc_tensor_param_t saved_aux_params = GPU_TENSOR_NCHW(000, 32F, max_length, embedding_size);
1102
2
      CCV_TENSOR_SET_DEVICE_ID(saved_aux_params.type, i);
1103
2
      saved_auxs[i* aux_size * 2 + aux_size + j] = ccv_nnc_tensor_variable_new(dynamic_graph, saved_aux_params);
1104
2
    }
1105
1
  }
1106
1
  ccv_nnc_tensor_t* const out_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 1), 0);
1107
1
  ccv_nnc_tensor_t* const fit_cpu = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, batch_size, 1), 0);
1108
1
  ccv_nnc_tensor_t** tensor[4];
1109
1
  int epoch = 0;
1110
3
  for (i = 0; epoch < epoch_limit; 
i++2
)
1111
2
  {
1112
2
    float learn_rate = 0.001;
1113
2
    optim = CMD_LAMB_FORWARD(i + 1, learn_rate, 0.9, 0.999, 0, 1e-6);
1114
2
    int status = ccv_cnnp_dataframe_iter_next(iter, (void**)tensor, 4, 0);
1115
2
    assert(status == 0);
1116
2
    ccv_nnc_tensor_t word_indices_tensor[1];
1117
2
    ccv_nnc_tensor_t mask_tensor[1];
1118
2
    ccv_nnc_tensor_t index_tensor[1];
1119
2
    ccv_nnc_tensor_variable_t word_indices[1];
1120
2
    ccv_nnc_tensor_variable_t word_vec[1];
1121
2
    ccv_nnc_tensor_variable_t vec[1 * 3];
1122
2
    ccv_nnc_tensor_variable_t out[1];
1123
4
    for (j = 0; j < 1; 
j++2
)
1124
2
    {
1125
2
      const int batch_length = tensor[j * 4][0]->info.dim[1];
1126
2
      ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length);
1127
2
      CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j);
1128
2
      word_indices_tensor[j] = ccv_nnc_tensor(tensor[j * 4][0]->data.f32, word_indices_params, 0);
1129
2
      word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params);
1130
2
      ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]);
1131
2
      ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * batch_length, embedding_size);
1132
2
      CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j);
1133
2
      word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
1134
2
      ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, batch_length, embedding_size);
1135
2
      CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j);
1136
2
      vec[j * 3] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, word_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params);
1137
2
      ccv_nnc_tensor_param_t mask_params = CPU_TENSOR_NCHW(32S, batch_size);
1138
2
      assert(((ccv_nnc_tensor_t*)tensor[j * 4 + 2])->info.dim[0] == batch_size);
1139
2
      CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j);
1140
2
      ccv_nnc_tensor_param_t index_params = GPU_TENSOR_NCHW(000, 32S, batch_size);
1141
2
      assert(tensor[j * 4 + 3][0]->info.dim[0] == batch_size);
1142
2
      CCV_TENSOR_SET_DEVICE_ID(index_params.type, j);
1143
2
      mask_tensor[j] = ccv_nnc_tensor(((ccv_nnc_tensor_t*)tensor[j * 4 + 2])->data.i32, mask_params, 0);
1144
2
      index_tensor[j] = ccv_nnc_tensor(tensor[j * 4 + 3][0]->data.i32, index_params, 0);
1145
2
      vec[j * 3 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params);
1146
2
      vec[j * 3 + 2] = ccv_nnc_tensor_constant_new(dynamic_graph, index_params);
1147
2
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 3 + 1], &mask_tensor[j]);
1148
2
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 3 + 2], &index_tensor[j]);
1149
2
      out[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
1150
2
    }
1151
2
    ccv_nnc_tensor_variable_t tvin[1 * 2];
1152
4
    for (j = 0; j < 1; 
j++2
)
1153
2
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j];
1154
2
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, 2, word_vec, 1, 1, 0);
1155
2
    ccv_nnc_dynamic_graph_evaluate(dynamic_graph, lstm, 0, vec, 3, out, 1, 0, 0);
1156
2
    ccv_nnc_tensor_variable_t sigmoid[1];
1157
2
    ccv_nnc_tensor_variable_t fit[1];
1158
2
    ccv_nnc_tensor_variable_t vocab_vec_grad[1];
1159
4
    for (j = 0; j < 1; 
j++2
)
1160
2
    {
1161
2
      sigmoid[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
1162
2
      fit[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
1163
2
      ccv_nnc_tensor_variable_set(dynamic_graph, fit[j], tensor[j * 4 + 1][0]);
1164
2
      vocab_vec_grad[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
1165
2
    }
1166
2
    ccv_nnc_tensor_variable_t tvout[2];
1167
4
    for (j = 0; j < 1; 
j++2
)
1168
2
      tvin[j * 2] = out[j], tvin[j * 2 + 1] = fit[j], tvout[j * 2] = 0, tvout[j * 2 + 1] = sigmoid[j];
1169
2
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, tvin, 2, tvout, 2, 1, 0);
1170
4
    for (j = 0; j < 1; 
j++2
)
1171
2
      tvin[j] = vocab_vec[j], tvout[j] = vocab_vec_grad[j];
1172
2
    ccv_nnc_dynamic_graph_backward(dynamic_graph, sigmoid, 1, 0, tvin, 1, tvout, 1, 0);
1173
2
    ccv_cnnp_model_set_minimizer(lstm, optim, 0, 0, 0);
1174
4
    for (j = 0; j < 1; 
j++2
)
1175
2
      tvin[j] = vocab_vec_grad[j], tvout[j] = vocab_vec[j];
1176
2
    ccv_nnc_dynamic_graph_apply_gradients(dynamic_graph, optim, tvin, 1, tvout, 1, saved_auxs, 1, 0);
1177
4
    for (j = 0; j < 1; 
j++2
)
1178
2
    {
1179
2
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3]);
1180
2
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3 + 1]);
1181
2
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3 + 2]);
1182
2
      ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]);
1183
2
      ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]);
1184
2
      ccv_nnc_tensor_variable_free(dynamic_graph, out[j]);
1185
2
      ccv_nnc_tensor_variable_free(dynamic_graph, fit[j]);
1186
2
      ccv_nnc_tensor_variable_free(dynamic_graph, sigmoid[j]);
1187
2
      ccv_nnc_tensor_variable_free(dynamic_graph, vocab_vec_grad[j]);
1188
2
    }
1189
2
    if ((i + 1) % epoch_end == 0)
1190
1
    {
1191
1
      ++epoch;
1192
1
      ccv_cnnp_dataframe_shuffle(train_data);
1193
1
      ccv_cnnp_dataframe_iter_set_cursor(iter, 0);
1194
1
    }
1195
2
  }
1196
1
  int correct = 0;
1197
1
  ccv_cnnp_dataframe_iter_t* const test_iter = ccv_cnnp_dataframe_iter_new(test_batched_data, test_gpu_batched, 4);
1198
1
  int k;
1199
1
  ccv_cnnp_dataframe_shuffle(test_data);
1200
1
  ccv_nnc_dynamic_graph_set_no_grad(dynamic_graph, 1);
1201
1
  const int row_count = ccv_cnnp_dataframe_row_count(test_data);
1202
3
  for (k = 0; k < row_count; 
k += batch_size2
)
1203
2
  {
1204
2
    ccv_cnnp_dataframe_iter_next(test_iter, (void**)tensor, 4, 0);
1205
2
    ccv_nnc_tensor_t word_indices_tensor[1];
1206
2
    ccv_nnc_tensor_t mask_tensor[1];
1207
2
    ccv_nnc_tensor_t index_tensor[1];
1208
2
    ccv_nnc_tensor_variable_t word_indices[1];
1209
2
    ccv_nnc_tensor_variable_t word_vec[1];
1210
2
    ccv_nnc_tensor_variable_t vec[3];
1211
2
    ccv_nnc_tensor_variable_t out[1];
1212
4
    for (j = 0; j < 1; 
j++2
)
1213
2
    {
1214
2
      const int batch_length = tensor[j * 4][0]->info.dim[1];
1215
2
      ccv_nnc_tensor_param_t word_indices_params = GPU_TENSOR_NCHW(000, 32S, batch_size * batch_length);
1216
2
      CCV_TENSOR_SET_DEVICE_ID(word_indices_params.type, j);
1217
2
      word_indices_tensor[j] = ccv_nnc_tensor(tensor[j * 4][0]->data.f32, word_indices_params, 0);
1218
2
      word_indices[j] = ccv_nnc_tensor_variable_new(dynamic_graph, word_indices_params);
1219
2
      ccv_nnc_tensor_variable_set(dynamic_graph, word_indices[j], &word_indices_tensor[j]);
1220
2
      ccv_nnc_tensor_param_t pre_vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size * batch_length, embedding_size);
1221
2
      CCV_TENSOR_SET_DEVICE_ID(pre_vec_params.type, j);
1222
2
      word_vec[j] = ccv_nnc_tensor_variable_new(dynamic_graph, pre_vec_params);
1223
2
      ccv_nnc_tensor_param_t vec_params = GPU_TENSOR_NCHW(000, 32F, batch_size, batch_length, embedding_size);
1224
2
      CCV_TENSOR_SET_DEVICE_ID(vec_params.type, j);
1225
2
      vec[j * 3] = ccv_nnc_tensor_variable_alias_new(dynamic_graph, word_vec[j], ccv_nnc_no_ofs, DIM_ALLOC(), vec_params);
1226
2
      ccv_nnc_tensor_param_t mask_params = CPU_TENSOR_NCHW(32S, batch_size);
1227
2
      CCV_TENSOR_SET_DEVICE_ID(mask_params.type, j);
1228
2
      assert(((ccv_nnc_tensor_t*)tensor[j * 4 + 2])->info.dim[0] == batch_size);
1229
2
      ccv_nnc_tensor_param_t index_params = GPU_TENSOR_NCHW(000, 32S, batch_size);
1230
2
      CCV_TENSOR_SET_DEVICE_ID(index_params.type, j);
1231
2
      assert(tensor[j * 4 + 3][0]->info.dim[0] == batch_size);
1232
2
      mask_tensor[j] = ccv_nnc_tensor(((ccv_nnc_tensor_t*)tensor[j * 4 + 2])->data.i32, mask_params, 0);
1233
2
      index_tensor[j] = ccv_nnc_tensor(tensor[j * 4 + 3][0]->data.i32, index_params, 0);
1234
2
      vec[j * 3 + 1] = ccv_nnc_tensor_constant_new(dynamic_graph, mask_params);
1235
2
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 3 + 1], &mask_tensor[j]);
1236
2
      vec[j * 3 + 2] = ccv_nnc_tensor_constant_new(dynamic_graph, index_params);
1237
2
      ccv_nnc_tensor_variable_set(dynamic_graph, vec[j * 3 + 2], &index_tensor[j]);
1238
2
      out[j] = ccv_nnc_tensor_variable_new(dynamic_graph);
1239
2
    }
1240
2
    ccv_nnc_tensor_variable_t tvin[2];
1241
4
    for (j = 0; j < 1; 
j++2
)
1242
2
      tvin[j * 2] = vocab_vec[j], tvin[j * 2 + 1] = word_indices[j];
1243
2
    ccv_nnc_dynamic_graph_exec(dynamic_graph, CMD_INDEX_SELECT_FORWARD(), ccv_nnc_no_hint, 0, tvin, 2, word_vec, 1, 1, 0);
1244
2
    ccv_nnc_dynamic_graph_evaluate(dynamic_graph, lstm, 1, vec, 3, out, 1, 0, 0);
1245
2
    int d;
1246
4
    for (d = 0; d < 1; 
d++2
)
1247
2
    {
1248
2
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ccv_nnc_tensor_from_variable(dynamic_graph, out[d])), TENSOR_LIST(out_cpu), 0);
1249
2
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor[d * 4 + 1][0]), TENSOR_LIST(fit_cpu), 0);
1250
130
      for (j = 0; j < ccv_min(row_count - k - d * batch_size, batch_size); 
j++128
)
1251
128
      {
1252
128
        const int truth = (fit_cpu->data.f32[j] > 0.5);
1253
128
        const int prediction = (out_cpu->data.f32[j] > 0);
1254
128
        if (truth == prediction)
1255
91
          ++correct;
1256
128
      }
1257
2
    }
1258
4
    for (j = 0; j < 1; 
j++2
)
1259
2
    {
1260
2
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3]);
1261
2
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3 + 1]);
1262
2
      ccv_nnc_tensor_variable_free(dynamic_graph, vec[j * 3 + 2]);
1263
2
      ccv_nnc_tensor_variable_free(dynamic_graph, word_vec[j]);
1264
2
      ccv_nnc_tensor_variable_free(dynamic_graph, word_indices[j]);
1265
2
      ccv_nnc_tensor_variable_free(dynamic_graph, out[j]);
1266
2
    }
1267
2
  }
1268
1
  ccv_cnnp_dataframe_iter_free(test_iter);
1269
1
  ccv_cnnp_model_free(lstm);
1270
1
  ccv_cnnp_dataframe_iter_free(iter);
1271
1
  ccv_cnnp_dataframe_free(batched_data);
1272
1
  ccv_cnnp_dataframe_free(test_batched_data);
1273
1
  ccv_nnc_dynamic_graph_free(dynamic_graph);
1274
1
  ccv_nnc_tensor_free(out_cpu);
1275
1
  return correct;
1276
1
}
1277
1278
TEST_CASE("train a binary LSTM classifier on imdb reviews to 80% with mix of dynamic graph and cnnp model and dynamic inputs")
1279
1
{
1280
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_GPU_CUBLAS) &&
1281
1
      ccv_nnc_cmd_ok(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_GPU_CUBLAS) &&
1282
1
      ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) &&
1283
1
      ccv_nnc_cmd_ok(CCV_NNC_AVERAGE_POOL_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN));
1284
1
  const char* const train_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/train.txt";
1285
1
  const char* const test_list = "/fast/Data/IMDB_Movie_Reviews/aclImdb/test.txt";
1286
1
  const char* const vocab_file = "/fast/Data/IMDB_Movie_Reviews/aclImdb/imdb.vocab";
1287
1
  const char* const base_dir = "/fast/Data/IMDB_Movie_Reviews/aclImdb/";
1288
1
  FILE* train_open = fopen(train_list, "rb");
1289
1
  FILE* test_open = fopen(test_list, "rb");
1290
1
  FILE* vocab_open = fopen(vocab_file, "rb");
1291
1
  if (train_open)
1292
1
    fclose(train_open);
1293
1
  if (test_open)
1294
1
    fclose(test_open);
1295
1
  if (vocab_open)
1296
1
    fclose(vocab_open);
1297
1
  if (!train_open || !test_open || !vocab_open)
1298
0
    { GUARD_ELSE_RETURN(0); }
1299
1
  khash_t(vocab_map)* vocab;
1300
1
  int vocab_size;
1301
1
  _vocab_init(vocab_file, &vocab, &vocab_size);
1302
1
  const int max_length = 512;
1303
1
  ccv_array_t* train_set;
1304
1
  ccv_cnnp_dataframe_t* train_data;
1305
1
  ccv_array_t* test_set;
1306
1
  ccv_cnnp_dataframe_t* test_data;
1307
1
  if (!ccv_is_coverage())
1308
0
  {
1309
0
    train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 0);
1310
0
    train_data = ccv_cnnp_dataframe_from_array_new(train_set);
1311
0
    test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 0);
1312
0
    test_data = ccv_cnnp_dataframe_from_array_new(test_set);
1313
0
    const int correct = train_imdb_lstm(3, vocab_size, 64, max_length, 128, train_data, test_data);
1314
0
    REQUIRE((float)correct / test_set->rnum > 0.80, "%f should be larger than 80%%", (float)correct / test_set->rnum);
1315
1
  } else {
1316
1
    train_set = _array_from_disk_new(train_list, base_dir, vocab, vocab_size, max_length, 128);
1317
1
    train_data = ccv_cnnp_dataframe_from_array_new(train_set);
1318
1
    test_set = _array_from_disk_new(test_list, base_dir, vocab, vocab_size, max_length, 128);
1319
1
    test_data = ccv_cnnp_dataframe_from_array_new(test_set);
1320
1
    train_imdb_lstm(1, vocab_size, 64, max_length, 128, train_data, test_data);
1321
1
  }
1322
1
  ccv_cnnp_dataframe_free(train_data);
1323
1
  ccv_cnnp_dataframe_free(test_data);
1324
1
  int i;
1325
129
  for (i = 0; i < train_set->rnum; 
i++128
)
1326
128
  {
1327
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->tensor);
1328
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(train_set, i))->mask);
1329
128
  }
1330
1
  ccv_array_free(train_set);
1331
129
  for (i = 0; i < test_set->rnum; 
i++128
)
1332
128
  {
1333
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->tensor);
1334
128
    ccv_nnc_tensor_free(((ccv_nnc_text_t*)ccv_array_get(test_set, i))->mask);
1335
128
  }
1336
1
  ccv_array_free(test_set);
1337
1
  _vocab_destroy(vocab);
1338
1
}
1339
1340
#include "case_main.h"