Coverage Report

Created: 2021-04-07 21:56

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc_tensor_tape.c
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************
2
 * C-based/Cached/Core Computer Vision Library
3
 * Liu Liu, 2010-02-01
4
 **********************************************************/
5
6
/**********************************************************
7
 * CCV - Neural Network Collection
8
 **********************************************************/
9
10
#include "_ccv_nnc_tensor_tape.h"
11
#include "_ccv_nnc_graph.h"
12
#include "ccv_nnc_internal.h"
13
#include "ccv_nnc_easy.h"
14
#ifdef HAVE_CUDA
15
#include "gpu/ccv_nnc_compat.h"
16
#endif
17
18
ccv_nnc_tensor_tape_t* ccv_nnc_tensor_tape_new(void)
19
4
{
20
4
  ccv_nnc_tensor_tape_t* tape = (ccv_nnc_tensor_tape_t*)ccmalloc(sizeof(ccv_nnc_tensor_tape_t));
21
4
  tape->tensor_data = ccv_array_new(sizeof(ccv_nnc_tape_tensor_data_array_t), 0, 0);
22
4
  tape->exec_data = ccv_array_new(sizeof(ccv_nnc_tape_exec_data_array_t), 0, 0);
23
4
  return tape;
24
4
}
25
26
static ccv_nnc_tensor_t* _ccv_nnc_tensor_from_tensor_multiview(const ccv_nnc_graph_t* const* const graphs, const int graph_size, ccv_nnc_tensor_multiview_t* const mv)
27
0
{
28
0
  int i;
29
0
  ccv_nnc_tensor_t* tensor = (ccv_nnc_tensor_t*)mv;
30
0
  for (i = 0; CCV_IS_TENSOR_MULTIVIEW(tensor) && i < graph_size; i++)
31
0
  {
32
0
    const int count = (int)graphs[i]->while_count;
33
0
    while (CCV_IS_TENSOR_MULTIVIEW(tensor) &&
34
0
         (((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graphs[i] ||
35
0
        ((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graphs[i]->pair))
36
0
    {
37
0
      ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor;
38
0
      const int off = mv->kind;
39
0
      const int mod = mv->repeat;
40
0
      // If reached the root.
41
0
      tensor = CCV_NNC_MULTIVIEW_DATA(mv)[count >= off ? ((count - off) % mod) + off : count]; // Unwrap.
42
0
    }
43
0
  }
44
0
  return tensor;
45
0
}
46
47
62
#define CCV_NNC_IS_TAPE_TENSOR_DATA_ARRAY_POS(ptr) ((uintptr_t)(ptr) & 1)
48
33
#define CCV_NUMERIC_DATA_NO_ALLOC(data) ((uintptr_t)(data.u8) & 1)
49
// Align integer to 16-bytes.
50
98
#define ALIGN_16(x) (((x) + 3) & -4)
51
52
// Simple allocator from ccv_array_t.
53
static void _ccv_nnc_tape_tensor_data_array_pos_new(ccv_array_t* const tensor_data, int* const pos_ref, ccv_nnc_tape_tensor_data_array_t** const tape_tensor_data_ref)
54
8
{
55
8
  int pos = tensor_data->rnum;
56
8
  ccv_array_resize(tensor_data, pos + 1);
57
8
  *pos_ref = (pos << 1) | 1;
58
8
  ccv_nnc_tape_tensor_data_array_t* const tape_tensor_data = (ccv_nnc_tape_tensor_data_array_t*)ccv_array_get(tensor_data, pos);
59
8
  memset(tape_tensor_data, 0, sizeof(ccv_nnc_tape_tensor_data_array_t));
60
8
  *tape_tensor_data_ref = tape_tensor_data;
61
8
}
62
63
static ccv_nnc_tape_tensor_data_array_t* _ccv_nnc_tape_tensor_data_array_get(const ccv_array_t* const tensor_data, const int pos)
64
62
{
65
62
  assert((pos >> 1) <= tensor_data->rnum);
66
62
  return (ccv_nnc_tape_tensor_data_array_t*)ccv_array_get(tensor_data, pos >> 1);
67
62
}
68
69
static void _ccv_nnc_tape_tensor_data_move(ccv_nnc_tape_tensor_data_t* const old_data, ccv_nnc_tape_tensor_data_t* const new_data, const int offset, const ccv_nnc_graph_t* const* const graphs, const int graph_size, const int* const dim, const int dim_count)
70
69
{
71
69
  int i;
72
69
  if (offset == ccv_max(dim_count, graph_size) - 1)
73
47
  {
74
47
    const int data_dim = offset < dim_count ? 
dim[offset] - 145
:
02
;
75
47
    const int graph_dim = offset < graph_size ? graphs[offset]->while_count + 1 : 
00
;
76
47
    assert(old_data <= new_data);
77
47
    // Do the actual copy or set.
78
47
    if (!old_data)
79
11
      
for (i = 2
ccv_max2
(data_dim, graph_dim); i >= 0;
i--9
)
80
9
        new_data[i].data.u8 = 0;
81
45
    else {
82
90
      for (i = graph_dim; i > data_dim; 
i--45
)
83
45
        new_data[i].data.u8 = 0;
84
203
      for (i = data_dim; i >= 0; 
i--158
)
85
158
        new_data[i] = old_data[i];
86
45
    }
87
47
  } else {
88
22
    int old_data_step = 1;
89
43
    for (i = offset + 1; i < dim_count; 
i++21
)
90
21
      old_data_step *= dim[i];
91
22
    const int new_dim_count = ccv_max(graph_size, dim_count);
92
22
    int new_data_step = 1;
93
44
    for (i = offset + 1; i < new_dim_count; 
i++22
)
94
22
    {
95
22
      int old_dim = (i < dim_count) ? 
dim[i]21
:
11
;
96
22
      int graph_dim = (i < graph_size) ? (int)(graphs[i]->while_count + 2) : 
10
;
97
22
      new_data_step *= ccv_max(old_dim, graph_dim);
98
22
    }
99
22
    const int data_dim = offset < dim_count ? dim[offset] - 1 : 
00
;
100
22
    const int graph_dim = offset < graph_size ? graphs[offset]->while_count + 1 : 
00
;
101
69
    for (i = 
ccv_max22
(data_dim, graph_dim); i >= 0;
i--47
)
102
47
      _ccv_nnc_tape_tensor_data_move((old_data && offset < dim_count && i < dim[offset]) ? 
old_data + i * old_data_step45
:
02
, new_data + i * new_data_step, offset + 1, graphs, graph_size, dim, dim_count);
103
22
  }
104
69
}
105
106
static void _ccv_nnc_tape_tensor_data_array_resize(ccv_nnc_tape_tensor_data_array_t* const data_array, const ccv_nnc_graph_t* const* const graphs, const int graph_size)
107
22
{
108
22
  const int new_dim_count = ccv_max(graph_size, data_array->dim_count);
109
22
  int i;
110
22
  int size = 1;
111
66
  for (i = 0; i < new_dim_count; 
i++44
)
112
44
  {
113
44
    int old_dim = (i < data_array->dim_count) ? 
data_array->dim[i]43
:
11
;
114
44
    int graph_dim = (i < graph_size) ? (int)(graphs[i]->while_count + 2) : 
10
;
115
44
    size *= ccv_max(old_dim, graph_dim);
116
44
  }
117
22
  data_array->dim = ccrealloc(data_array->dim, sizeof(int) * ALIGN_16(new_dim_count) + sizeof(ccv_nnc_tape_tensor_data_t) * size);
118
22
  ccv_nnc_tape_tensor_data_t* const old_data = (ccv_nnc_tape_tensor_data_t*)(data_array->dim + ALIGN_16(data_array->dim_count));
119
22
  ccv_nnc_tape_tensor_data_t* const new_data = (ccv_nnc_tape_tensor_data_t*)(data_array->dim + ALIGN_16(new_dim_count));
120
22
  // Note that both old_data and new_data occupies the same memory region, since the resize operation
121
22
  // is mono-increasing, we just need to move the data from the end to the beginning to avoid data
122
22
  // overwrite issues.
123
22
  assert(graph_size > 0);
124
22
  assert(data_array->dim_count > 0);
125
22
  _ccv_nnc_tape_tensor_data_move(old_data, new_data, 0, graphs, graph_size, data_array->dim, data_array->dim_count);
126
22
  data_array->data = new_data;
127
22
  // We are done, update the dim.
128
66
  for (i = 0; i < new_dim_count; 
i++44
)
129
44
  {
130
44
    int old_dim = (i < data_array->dim_count) ? 
data_array->dim[i]43
:
11
;
131
44
    int graph_dim = (i < graph_size) ? (int)(graphs[i]->while_count + 2) : 
10
;
132
44
    data_array->dim[i] = ccv_max(old_dim, graph_dim);
133
44
  }
134
22
  data_array->dim_count = new_dim_count;
135
22
}
136
137
static void _ccv_nnc_tensor_from_tape(ccv_array_t* const tensor_data, ccv_nnc_tensor_t* const tensor, const int flags, const ccv_nnc_graph_t* const* const graphs, const int graph_size, const int create_if_missing)
138
70
{
139
70
  assert(graph_size > 0);
140
70
  ccv_nnc_tensor_t* tensor_ref = tensor;
141
70
  while (tensor_ref->alias_ref && 
!62
CCV_NNC_IS_TAPE_TENSOR_DATA_ARRAY_POS62
(tensor_ref->alias_ref))
142
70
  {
143
0
    tensor_ref = (ccv_nnc_tensor_t*)tensor->alias_ref;
144
0
    if (CCV_IS_TENSOR_MULTIVIEW(tensor_ref))
145
0
      tensor_ref = _ccv_nnc_tensor_from_tensor_multiview(graphs, graph_size, (ccv_nnc_tensor_multiview_t*)tensor_ref);
146
0
  }
147
70
  ccv_nnc_tape_tensor_data_array_t* data_array;
148
70
  if (!tensor_ref->alias_ref)
149
8
  {
150
8
    // Create data array.
151
8
    int pos;
152
8
    _ccv_nnc_tape_tensor_data_array_pos_new(tensor_data, &pos, &data_array);
153
8
    tensor_ref->alias_ref = pos;
154
8
  } else
155
62
    data_array = _ccv_nnc_tape_tensor_data_array_get(tensor_data, (int)tensor_ref->alias_ref);
156
70
  // Either the data exists, or it doesn't and we need to create one.
157
70
  int i;
158
70
  if (!data_array->dim)
159
8
  {
160
8
    int size = 1;
161
23
    for (i = 0; i < graph_size; 
i++15
)
162
15
      size *= (int)(graphs[i]->while_count + 2);
163
8
    data_array->dim_count = graph_size;
164
8
    data_array->dim = (int*)ccmalloc(sizeof(int) * ALIGN_16(graph_size) + sizeof(ccv_nnc_tape_tensor_data_t) * size);
165
23
    for (i = 0; i < graph_size; 
i++15
)
166
15
      data_array->dim[i] = (int)(graphs[i]->while_count + 2);
167
8
    data_array->data = (ccv_nnc_tape_tensor_data_t*)(data_array->dim + ALIGN_16(graph_size));
168
42
    for (i = 0; i < size; 
i++34
)
169
34
      data_array->data[i].data.u8 = 0;
170
62
  } else {
171
62
    int flag = (data_array->dim_count < graph_size);
172
181
    for (i = 0; !flag && 
i < graph_size159
;
i++119
)
173
119
      flag = (data_array->dim[i] <= graphs[i]->while_count + 1);
174
62
    if (flag)
175
22
      _ccv_nnc_tape_tensor_data_array_resize(data_array, graphs, graph_size);
176
62
  }
177
70
  // Compute the index.
178
70
  int idx, step;
179
70
  idx = (graphs[graph_size - 1]->while_count + 1);
180
70
  step = data_array->dim[graph_size - 1];
181
138
  for (i = graph_size - 2; i >= 0; 
i--68
)
182
68
  {
183
68
    idx += (graphs[i]->while_count + 1) * step;
184
68
    step *= data_array->dim[i];
185
68
  }
186
70
  ccv_numeric_data_t data = data_array->data[idx].data;
187
70
  if (!data.u8)
188
33
  {
189
33
    // If we cannot create, loop back idx until we find one that exists.
190
33
    if (!create_if_missing)
191
11
    {
192
11
      if (data_array->data[idx].data.u8)
193
0
        data.u8 = (unsigned char*)((uintptr_t)data_array->data[idx].data.u8 | (uintptr_t)1);
194
11
      else
195
11
      // Now looped back to 0, if still cannot find, using the original pointer.
196
11
        data.u8 = data_array->data[idx].data.u8 = (unsigned char*)((uintptr_t)tensor_ref->data.u8 | (uintptr_t)1);
197
22
    } else {
198
22
      const size_t size = ccv_nnc_tensor_data_size(tensor->info);
199
22
      data_array->data[idx].type = tensor->info.type;
200
22
#ifdef HAVE_CUDA
201
22
      if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY)
202
0
        data_array->data[idx].data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), size);
203
22
      else
204
22
        ccmemalign((void **)&data_array->data[idx].data.u8, 16, size);
205
#else
206
      assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
207
      ccmemalign((void **)&data_array->data[idx].data.u8, 16, size);
208
#endif
209
22
      data = data_array->data[idx].data;
210
22
    }
211
33
  }
212
70
  tensor->data.u8 = (unsigned char*)((uintptr_t)data.u8 & ~(uintptr_t)1);
213
70
}
214
215
void ccv_nnc_tensor_tape_io(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph, const int* const input_flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, const int* const output_flags, ccv_nnc_tensor_t* const* const outputs, const int output_size)
216
90
{
217
90
  int i, tape_io = 0;
218
187
  for (i = 0; i < input_size && 
!tape_io119
;
i++97
)
219
97
    if (inputs[i] && 
CCV_GET_TAPE_ALLOC94
(inputs[i]->type))
220
97
      
tape_io = 139
;
221
121
  for (i = 0; i < output_size && 
!tape_io64
;
i++31
)
222
31
    if (outputs[i] && CCV_GET_TAPE_ALLOC(outputs[i]->type))
223
31
      
tape_io = 18
;
224
90
  // If doesn't need to update with tape io, just pointing to the inputs and outputs directly.
225
90
  if (!tape_io)
226
43
    return;
227
47
  // Go to the root graph, record which was taken along the way.
228
47
  // In this way, we can then unwrap multi-view tensors.
229
47
  assert(graph);
230
47
  const ccv_nnc_graph_t* curr_graph = graph;
231
47
  int d;
232
139
  for (d = 0; curr_graph; 
d++92
)
233
92
    curr_graph = curr_graph->p;
234
47
  curr_graph = graph;
235
47
  const int graph_size = d;
236
47
  assert(graph_size > 0);
237
47
  const ccv_nnc_graph_t* graphs[graph_size];
238
139
  for (d = graph_size - 1; curr_graph; 
d--, curr_graph = curr_graph->p92
)
239
92
    graphs[d] = curr_graph;
240
47
  // Now, go through the inputs / outputs and update.
241
147
  for (i = 0; i < input_size; 
i++100
)
242
100
    if (inputs[i] && 
CCV_GET_TAPE_ALLOC95
(inputs[i]->type))
243
100
      
_ccv_nnc_tensor_from_tape(tape->tensor_data, inputs[i], input_flags 48
?
input_flags[i]29
:
019
, graphs, graph_size, 0);
244
99
  for (i = 0; i < output_size; 
i++52
)
245
52
    if (outputs[i] && CCV_GET_TAPE_ALLOC(outputs[i]->type))
246
52
      
_ccv_nnc_tensor_from_tape(tape->tensor_data, outputs[i], output_flags 22
?
output_flags[i]11
:
011
, graphs, graph_size, 1); // Create if it is not found. This is OK for output tensor.
247
47
}
248
249
#define CCV_NNC_IS_TAPE_EXEC_DATA_ARRAY_POS(ptr) ((uintptr_t)(ptr) & 1)
250
251
// Simple allocator from ccv_array_t.
252
static void _ccv_nnc_tape_exec_data_array_pos_new(ccv_array_t* const exec_data, int* const pos_ref, ccv_nnc_tape_exec_data_array_t** const tape_exec_data_ref)
253
5
{
254
5
  int pos = exec_data->rnum;
255
5
  ccv_array_resize(exec_data, pos + 1);
256
5
  *pos_ref = (pos << 1) | 1;
257
5
  ccv_nnc_tape_exec_data_array_t* const tape_exec_data = (ccv_nnc_tape_exec_data_array_t*)ccv_array_get(exec_data, pos);
258
5
  memset(tape_exec_data, 0, sizeof(ccv_nnc_tape_exec_data_array_t));
259
5
  *tape_exec_data_ref = tape_exec_data;
260
5
}
261
262
static ccv_nnc_tape_exec_data_array_t* _ccv_nnc_tape_exec_data_array_get(const ccv_array_t* const exec_data, const int pos)
263
30
{
264
30
  assert((pos >> 1) <= exec_data->rnum);
265
30
  return (ccv_nnc_tape_exec_data_array_t*)ccv_array_get(exec_data, pos >> 1);
266
30
}
267
268
static void _ccv_nnc_tape_exec_data_move(uint64_t* const old_data, uint64_t* const new_data, const int offset, const uint64_t* const while_counts, const int graph_size, const int* const dim, const int dim_count)
269
6
{
270
6
  int i;
271
6
  if (offset == ccv_max(dim_count, graph_size) - 1)
272
4
  {
273
4
    const int data_dim = offset < dim_count ? dim[offset] - 1 : 
00
;
274
4
    const int graph_dim = offset < graph_size ? while_counts[offset] : 
00
;
275
4
    assert(old_data <= new_data);
276
4
    // Do the actual copy or set.
277
4
    if (!old_data)
278
0
      for (i = ccv_max(data_dim, graph_dim); i >= 0; i--)
279
0
        new_data[i] = 0;
280
4
    else {
281
8
      for (i = graph_dim; i > data_dim; 
i--4
)
282
4
        new_data[i] = 0;
283
14
      for (i = data_dim; i >= 0; 
i--10
)
284
10
        new_data[i] = old_data[i];
285
4
    }
286
4
  } else {
287
2
    int old_data_step = 1;
288
4
    for (i = offset + 1; i < dim_count; 
i++2
)
289
2
      old_data_step *= dim[i];
290
2
    const int new_dim_count = ccv_max(graph_size, dim_count);
291
2
    int new_data_step = 1;
292
4
    for (i = offset + 1; i < new_dim_count; 
i++2
)
293
2
    {
294
2
      int old_dim = (i < dim_count) ? dim[i] : 
10
;
295
2
      int graph_dim = (i < graph_size) ? (int)(while_counts[i] + 1) : 
10
;
296
2
      new_data_step *= ccv_max(old_dim, graph_dim);
297
2
    }
298
2
    const int data_dim = offset < dim_count ? dim[offset] - 1 : 
00
;
299
2
    const int graph_dim = offset < graph_size ? while_counts[offset] : 
00
;
300
6
    for (i = 
ccv_max2
(data_dim, graph_dim); i >= 0;
i--4
)
301
4
      _ccv_nnc_tape_exec_data_move((old_data && offset < dim_count && i < dim[offset]) ? old_data + i * old_data_step : 
00
, new_data + i * new_data_step, offset + 1, while_counts, graph_size, dim, dim_count);
302
2
  }
303
6
}
304
305
static void _ccv_nnc_tape_exec_data_array_resize(ccv_nnc_tape_exec_data_array_t* const data_array, const uint64_t* const while_counts, const int graph_size)
306
2
{
307
2
  const int new_dim_count = ccv_max(graph_size, data_array->dim_count);
308
2
  int i;
309
2
  int size = 1;
310
6
  for (i = 0; i < new_dim_count; 
i++4
)
311
4
  {
312
4
    int old_dim = (i < data_array->dim_count) ? data_array->dim[i] : 
10
;
313
4
    int graph_dim = (i < graph_size) ? (int)(while_counts[i] + 1) : 
10
;
314
4
    size *= ccv_max(old_dim, graph_dim);
315
4
  }
316
2
  data_array->dim = ccrealloc(data_array->dim, sizeof(int) * ALIGN_16(new_dim_count) + sizeof(uint64_t) * size);
317
2
  uint64_t* const old_data = (uint64_t*)(data_array->dim + ALIGN_16(data_array->dim_count));
318
2
  uint64_t* const new_data = (uint64_t*)(data_array->dim + ALIGN_16(new_dim_count));
319
2
  // Note that both old_data and new_data occupies the same memory region, since the resize operation
320
2
  // is mono-increasing, we just need to move the data from the end to the beginning to avoid data
321
2
  // overwrite issues.
322
2
  assert(graph_size > 0);
323
2
  assert(data_array->dim_count > 0);
324
2
  _ccv_nnc_tape_exec_data_move(old_data, new_data, 0, while_counts, graph_size, data_array->dim, data_array->dim_count);
325
2
  data_array->data = new_data;
326
2
  // We are done, update the dim.
327
6
  for (i = 0; i < new_dim_count; 
i++4
)
328
4
  {
329
4
    int old_dim = (i < data_array->dim_count) ? data_array->dim[i] : 
10
;
330
4
    int graph_dim = (i < graph_size) ? (int)(while_counts[i] + 1) : 
10
;
331
4
    data_array->dim[i] = ccv_max(old_dim, graph_dim);
332
4
  }
333
2
  data_array->dim_count = new_dim_count;
334
2
}
335
336
uint64_t ccv_nnc_tensor_tape_numbering(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec)
337
12
{
338
12
  assert(exec.graph == graph);
339
12
  ccv_nnc_graph_exec_info_t* exec_info = ccv_array_get(graph->exec_info, exec.d);
340
12
  if (!exec_info->alias_ref && 
exec_info->pair_ref6
)
341
0
    exec_info = ccv_array_get(graph->exec_info, exec_info->pair_ref - 1);
342
12
  ccv_nnc_tape_exec_data_array_t* const data_array = _ccv_nnc_tape_exec_data_array_get(tape->exec_data, (int)exec_info->alias_ref);
343
12
  const ccv_nnc_graph_t* curr_graph = graph;
344
12
  int i;
345
28
  for (i = 0; curr_graph; 
i++16
)
346
16
    curr_graph = curr_graph->p;
347
12
  curr_graph = graph;
348
12
  const int graph_size = i;
349
12
  uint64_t while_counts[graph_size];
350
28
  for (i = graph_size - 1; curr_graph; 
i--, curr_graph = curr_graph->p16
)
351
16
    while_counts[i] = curr_graph->while_count;
352
12
  assert(graph_size <= data_array->dim_count);
353
12
  int idx = 0, step = 1;
354
28
  for (i = graph_size - 1; i >= 0; 
i--16
)
355
16
  {
356
16
    assert(while_counts[i] < data_array->dim[i]);
357
16
    idx += while_counts[i] * step;
358
16
    step *= data_array->dim[i];
359
16
  }
360
12
  return data_array->data[idx];
361
12
}
362
363
void ccv_nnc_tensor_tape_set_numbering(ccv_nnc_tensor_tape_t* const tape, ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const uint64_t numbering)
364
23
{
365
23
  ccv_nnc_tape_exec_data_array_t* data_array;
366
23
  assert(exec.graph == graph);
367
23
  ccv_nnc_graph_exec_info_t* const exec_info = ccv_array_get(graph->exec_info, exec.d);
368
23
  if (exec_info->alias_ref)
369
18
  {
370
18
    assert(CCV_NNC_IS_TAPE_EXEC_DATA_ARRAY_POS(exec_info->alias_ref));
371
18
    data_array = _ccv_nnc_tape_exec_data_array_get(tape->exec_data, (int)exec_info->alias_ref);
372
18
  } else {
373
5
    int pos;
374
5
    _ccv_nnc_tape_exec_data_array_pos_new(tape->exec_data, &pos, &data_array);
375
5
    exec_info->alias_ref = pos;
376
5
  }
377
23
  const ccv_nnc_graph_t* curr_graph = graph;
378
23
  assert(curr_graph);
379
23
  int i;
380
51
  for (i = 0; curr_graph; 
i++28
)
381
28
    curr_graph = curr_graph->p;
382
23
  curr_graph = graph;
383
23
  const int graph_size = i;
384
23
  assert(graph_size > 0);
385
23
  uint64_t while_counts[graph_size];
386
51
  for (i = graph_size - 1; curr_graph; 
i--, curr_graph = curr_graph->p28
)
387
28
    while_counts[i] = curr_graph->while_count;
388
23
  if (!data_array->dim)
389
5
  {
390
5
    int size = 1;
391
11
    for (i = 0; i < graph_size; 
i++6
)
392
6
      size *= (int)(while_counts[i] + 1);
393
5
    data_array->dim_count = graph_size;
394
5
    data_array->dim = (int*)ccmalloc(sizeof(int) * ALIGN_16(graph_size) + sizeof(uint64_t) * size);
395
11
    for (i = 0; i < graph_size; 
i++6
)
396
6
      data_array->dim[i] = (int)(while_counts[i] + 1);
397
5
    data_array->data = (uint64_t*)(data_array->dim + ALIGN_16(graph_size));
398
14
    for (i = 0; i < size; 
i++9
)
399
9
      data_array->data[i] = 0;
400
18
  } else {
401
18
    int flag = (data_array->dim_count < graph_size);
402
40
    for (i = 0; !flag && 
i < graph_size38
;
i++22
)
403
22
      flag = (data_array->dim[i] <= while_counts[i]);
404
18
    if (flag)
405
2
      _ccv_nnc_tape_exec_data_array_resize(data_array, while_counts, graph_size);
406
18
  }
407
23
  int idx = 0, step = 1;
408
51
  for (i = graph_size - 1; i >= 0; 
i--28
)
409
28
  {
410
28
    assert(while_counts[i] < data_array->dim[i]);
411
28
    idx += while_counts[i] * step;
412
28
    step *= data_array->dim[i];
413
28
  }
414
23
  data_array->data[idx] = numbering;
415
23
}
416
417
void ccv_nnc_tensor_tape_free(ccv_nnc_tensor_tape_t* const tape)
418
4
{
419
4
  int i, j;
420
12
  for (i = 0; i < tape->tensor_data->rnum; 
i++8
)
421
8
  {
422
8
    ccv_nnc_tape_tensor_data_array_t* const data_array = (ccv_nnc_tape_tensor_data_array_t*)ccv_array_get(tape->tensor_data, i);
423
8
    if (data_array->dim)
424
8
    {
425
8
      int size = 1;
426
24
      for (j = 0; j < data_array->dim_count; 
j++16
)
427
16
        size *= data_array->dim[j];
428
96
      for (j = 0; j < size; 
j++88
)
429
88
        if (data_array->data[j].data.u8 && 
!33
CCV_NUMERIC_DATA_NO_ALLOC33
(data_array->data[j].data))
430
88
        {
431
22
#ifdef HAVE_CUDA
432
22
          if (CCV_TENSOR_GET_MEMORY(data_array->data[j].type) == CCV_TENSOR_GPU_MEMORY)
433
0
            cufree(CCV_TENSOR_GET_DEVICE_ID(data_array->data[j].type), data_array->data[j].data.u8);
434
22
          else
435
22
            ccfree(data_array->data[j].data.u8);
436
#else
437
          ccfree(data_array->data[j].data.u8);
438
#endif
439
22
        }
440
8
      ccfree(data_array->dim);
441
8
    }
442
8
  }
443
4
  ccv_array_free(tape->tensor_data);
444
9
  for (i = 0; i < tape->exec_data->rnum; 
i++5
)
445
5
  {
446
5
    ccv_nnc_tape_exec_data_array_t* const data_array = (ccv_nnc_tape_exec_data_array_t*)ccv_array_get(tape->exec_data, i);
447
5
    if (data_array->dim)
448
5
      ccfree(data_array->dim);
449
5
  }
450
4
  ccv_array_free(tape->exec_data);
451
4
  ccfree(tape);
452
4
}