Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor_tape.c
Line
Count
Source
1
/**********************************************************
2
 * C-based/Cached/Core Computer Vision Library
3
 * Liu Liu, 2010-02-01
4
 **********************************************************/
5
6
/**********************************************************
7
 * CCV - Neural Network Collection
8
 **********************************************************/
9
10
#include "_ccv_nnc_tensor_tape.h"
11
#include "_ccv_nnc_graph.h"
12
#include "ccv_nnc_internal.h"
13
#include "ccv_nnc_easy.h"
14
#ifdef HAVE_CUDA
15
#include "gpu/ccv_nnc_compat.h"
16
#elif defined(HAVE_MPS)
17
#include "mps/ccv_nnc_mps.h"
18
#endif
19
20
ccv_nnc_tensor_tape_t* ccv_nnc_tensor_tape_new(void)
21
4
{
22
4
  ccv_nnc_tensor_tape_t* tape = (ccv_nnc_tensor_tape_t*)ccmalloc(sizeof(ccv_nnc_tensor_tape_t));
23
4
  tape->tensor_data = ccv_array_new(sizeof(ccv_nnc_tape_tensor_data_array_t), 0, 0);
24
4
  tape->exec_data = ccv_array_new(sizeof(ccv_nnc_tape_exec_data_array_t), 0, 0);
25
4
  return tape;
26
4
}
27
28
static ccv_nnc_tensor_t* _ccv_nnc_tensor_from_tensor_multiview(const ccv_nnc_graph_t* const* const graphs, const int graph_size, ccv_nnc_tensor_multiview_t* const mv)
29
0
{
30
0
  int i;
31
0
  ccv_nnc_tensor_t* tensor = (ccv_nnc_tensor_t*)mv;
32
0
  for (i = 0; CCV_IS_TENSOR_MULTIVIEW(tensor) && i < graph_size; i++)
33
0
  {
34
0
    const int count = (int)graphs[i]->while_count;
35
0
    while (CCV_IS_TENSOR_MULTIVIEW(tensor) &&
36
0
         (((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graphs[i] ||
37
0
        ((ccv_nnc_tensor_multiview_t*)tensor)->anchor == (intptr_t)graphs[i]->pair))
38
0
    {
39
0
      ccv_nnc_tensor_multiview_t* mv = (ccv_nnc_tensor_multiview_t*)tensor;
40
0
      const int off = mv->kind;
41
0
      const int mod = mv->repeat;
42
      // If reached the root.
43
0
      tensor = CCV_NNC_MULTIVIEW_DATA(mv)[count >= off ? ((count - off) % mod) + off : count]; // Unwrap.
44
0
    }
45
0
  }
46
0
  return tensor;
47
0
}
48
49
62
#define CCV_NNC_IS_TAPE_TENSOR_DATA_ARRAY_POS(ptr) ((uintptr_t)(ptr) & 1)
50
33
#define CCV_NUMERIC_DATA_NO_ALLOC(data) ((uintptr_t)(data.u8) & 1)
51
// Align integer to 16-bytes.
52
98
#define ALIGN_16(x) (((x) + 3) & -4)
53
54
// Simple allocator from ccv_array_t.
55
static void _ccv_nnc_tape_tensor_data_array_pos_new(ccv_array_t* const tensor_data, int* const pos_ref, ccv_nnc_tape_tensor_data_array_t** const tape_tensor_data_ref)
56
8
{
57
8
  int pos = tensor_data->rnum;
58
8
  ccv_array_resize(tensor_data, pos + 1);
59
8
  *pos_ref = (pos << 1) | 1;
60
8
  ccv_nnc_tape_tensor_data_array_t* const tape_tensor_data = (ccv_nnc_tape_tensor_data_array_t*)ccv_array_get(tensor_data, pos);
61
8
  memset(tape_tensor_data, 0, sizeof(ccv_nnc_tape_tensor_data_array_t));
62
8
  *tape_tensor_data_ref = tape_tensor_data;
63
8
}
64
65
static ccv_nnc_tape_tensor_data_array_t* _ccv_nnc_tape_tensor_data_array_get(const ccv_array_t* const tensor_data, const int pos)
66
62
{
67
62
  assert((pos >> 1) <= tensor_data->rnum);
68
62
  return (ccv_nnc_tape_tensor_data_array_t*)ccv_array_get(tensor_data, pos >> 1);
69
62
}
70
71
static void _ccv_nnc_tape_tensor_data_move(ccv_nnc_tape_tensor_data_t* const old_data, ccv_nnc_tape_tensor_data_t* const new_data, const int offset, const ccv_nnc_graph_t* const* const graphs, const int graph_size, const int* const dim, const int dim_count)
72
69
{
73
69
  int i;
74
69
  if (offset == ccv_max(dim_count, graph_size) - 1)
75
47
  {
76
47
    const int data_dim = offset < dim_count ? 
dim[offset] - 145
:
02
;
77
47
    const int graph_dim = offset < graph_size ? graphs[offset]->while_count + 1 : 
00
;
78
47
    assert(old_data <= new_data);
79
    // Do the actual copy or set.
80
47
    if (!old_data)
81
11
      
for (i = 2
ccv_max2
(data_dim, graph_dim); i >= 0;
i--9
)
82
9
        new_data[i].data.u8 = 0;
83
45
    else {
84
90
      for (i = graph_dim; i > data_dim; 
i--45
)
85
45
        new_data[i].data.u8 = 0;
86
203
      for (i = data_dim; i >= 0; 
i--158
)
87
158
        new_data[i] = old_data[i];
88
45
    }
89
47
  } else {
90
22
    int old_data_step = 1;
91
43
    for (i = offset + 1; i < dim_count; 
i++21
)
92
21
      old_data_step *= dim[i];
93
22
    const int new_dim_count = ccv_max(graph_size, dim_count);
94
22
    int new_data_step = 1;
95
44
    for (i = offset + 1; i < new_dim_count; 
i++22
)
96
22
    {
97
22
      int old_dim = (i < dim_count) ? 
dim[i]21
:
11
;
98
22
      int graph_dim = (i < graph_size) ? (int)(graphs[i]->while_count + 2) : 
10
;
99
22
      new_data_step *= ccv_max(old_dim, graph_dim);
100
22
    }
101
22
    const int data_dim = offset < dim_count ? dim[offset] - 1 : 
00
;
102
22
    const int graph_dim = offset < graph_size ? graphs[offset]->while_count + 1 : 
00
;
103
69
    for (i = 
ccv_max22
(data_dim, graph_dim); i >= 0;
i--47
)
104
47
      _ccv_nnc_tape_tensor_data_move((old_data && offset < dim_count && i < dim[offset]) ? 
old_data + i * old_data_step45
:
02
, new_data + i * new_data_step, offset + 1, graphs, graph_size, dim, dim_count);
105
22
  }
106
69
}
107
108
static void _ccv_nnc_tape_tensor_data_array_resize(ccv_nnc_tape_tensor_data_array_t* const data_array, const ccv_nnc_graph_t* const* const graphs, const int graph_size)
109
22
{
110
22
  const int new_dim_count = ccv_max(graph_size, data_array->dim_count);
111
22
  int i;
112
22
  int size = 1;
113
66
  for (i = 0; i < new_dim_count; 
i++44
)
114
44
  {
115
44
    int old_dim = (i < data_array->dim_count) ? 
data_array->dim[i]43
:
11
;
116
44
    int graph_dim = (i < graph_size) ? (int)(graphs[i]->while_count + 2) : 
10
;
117
44
    size *= ccv_max(old_dim, graph_dim);
118
44
  }
119
22
  data_array->dim = ccrealloc(data_array->dim, sizeof(int) * ALIGN_16(new_dim_count) + sizeof(ccv_nnc_tape_tensor_data_t) * size);
120
22
  ccv_nnc_tape_tensor_data_t* const old_data = (ccv_nnc_tape_tensor_data_t*)(data_array->dim + ALIGN_16(data_array->dim_count));
121
22
  ccv_nnc_tape_tensor_data_t* const new_data = (ccv_nnc_tape_tensor_data_t*)(data_array->dim + ALIGN_16(new_dim_count));
122
  // Note that both old_data and new_data occupies the same memory region, since the resize operation
123
  // is mono-increasing, we just need to move the data from the end to the beginning to avoid data
124
  // overwrite issues.
125
22
  assert(graph_size > 0);
126
22
  assert(data_array->dim_count > 0);
127
22
  _ccv_nnc_tape_tensor_data_move(old_data, new_data, 0, graphs, graph_size, data_array->dim, data_array->dim_count);
128
22
  data_array->data = new_data;
129
  // We are done, update the dim.
130
66
  for (i = 0; i < new_dim_count; 
i++44
)
131
44
  {
132
44
    int old_dim = (i < data_array->dim_count) ? 
data_array->dim[i]43
:
11
;
133
44
    int graph_dim = (i < graph_size) ? (int)(graphs[i]->while_count + 2) : 
10
;
134
44
    data_array->dim[i] = ccv_max(old_dim, graph_dim);
135
44
  }
136
22
  data_array->dim_count = new_dim_count;
137
22
}
138
139
static void _ccv_nnc_tensor_from_tape(ccv_array_t* const tensor_data, ccv_nnc_tensor_t* const tensor, const int flags, const ccv_nnc_graph_t* const* const graphs, const int graph_size, const int create_if_missing)
140
70
{
141
70
  assert(graph_size > 0);
142
70
  ccv_nnc_tensor_t* tensor_ref = tensor;
143
70
  while (tensor_ref->alias_ref && 
!62
CCV_NNC_IS_TAPE_TENSOR_DATA_ARRAY_POS62
(tensor_ref->alias_ref))
144
0
  {
145
0
    tensor_ref = (ccv_nnc_tensor_t*)tensor->alias_ref;
146
0
    if (CCV_IS_TENSOR_MULTIVIEW(tensor_ref))
147
0
      tensor_ref = _ccv_nnc_tensor_from_tensor_multiview(graphs, graph_size, (ccv_nnc_tensor_multiview_t*)tensor_ref);
148
0
  }
149
70
  ccv_nnc_tape_tensor_data_array_t* data_array;
150
70
  if (!tensor_ref->alias_ref)
151
8
  {
152
    // Create data array.
153
8
    int pos;
154
8
    _ccv_nnc_tape_tensor_data_array_pos_new(tensor_data, &pos, &data_array);
155
8
    tensor_ref->alias_ref = pos;
156
8
  } else
157
62
    data_array = _ccv_nnc_tape_tensor_data_array_get(tensor_data, (int)tensor_ref->alias_ref);
158
  // Either the data exists, or it doesn't and we need to create one.
159
70
  int i;
160
70
  if (!data_array->dim)
161
8
  {
162
8
    int size = 1;
163
23
    for (i = 0; i < graph_size; 
i++15
)
164
15
      size *= (int)(graphs[i]->while_count + 2);
165
8
    data_array->dim_count = graph_size;
166
8
    data_array->dim = (int*)ccmalloc(sizeof(int) * ALIGN_16(graph_size) + sizeof(ccv_nnc_tape_tensor_data_t) * size);
167
23
    for (i = 0; i < graph_size; 
i++15
)
168
15
      data_array->dim[i] = (int)(graphs[i]->while_count + 2);
169
8
    data_array->data = (ccv_nnc_tape_tensor_data_t*)(data_array->dim + ALIGN_16(graph_size));
170
42
    for (i = 0; i < size; 
i++34
)
171
34
      data_array->data[i].data.u8 = 0;
172
62
  } else {
173
62
    int flag = (data_array->dim_count < graph_size);
174
181
    for (i = 0; !flag && 
i < graph_size159
;
i++119
)
175
119
      flag = (data_array->dim[i] <= graphs[i]->while_count + 1);
176
62
    if (flag)
177
22
      _ccv_nnc_tape_tensor_data_array_resize(data_array, graphs, graph_size);
178
62
  }
179
  // Compute the index.
180
70
  int idx, step;
181
70
  idx = (graphs[graph_size - 1]->while_count + 1);
182
70
  step = data_array->dim[graph_size - 1];
183
138
  for (i = graph_size - 2; i >= 0; 
i--68
)
184
68
  {
185
68
    idx += (graphs[i]->while_count + 1) * step;
186
68
    step *= data_array->dim[i];
187
68
  }
188
70
  ccv_numeric_data_t data = data_array->data[idx].data;
189
70
  if (!data.u8)
190
33
  {
191
    // If we cannot create, loop back idx until we find one that exists.
192
33
    if (!create_if_missing)
193
11
    {
194
11
      if (data_array->data[idx].data.u8)
195
0
        data.u8 = (unsigned char*)((uintptr_t)data_array->data[idx].data.u8 | (uintptr_t)1);
196
11
      else
197
      // Now looped back to 0, if still cannot find, using the original pointer.
198
11
        data.u8 = data_array->data[idx].data.u8 = (unsigned char*)((uintptr_t)tensor_ref->data.u8 | (uintptr_t)1);
199
22
    } else {
200
22
      const size_t size = ccv_nnc_tensor_data_size(tensor->info);
201
22
      data_array->data[idx].type = tensor->info.type;
202
22
#ifdef HAVE_CUDA
203
22
      if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY)
204
0
        data_array->data[idx].data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), size);
205
22
      else
206
22
        ccmemalign((void **)&data_array->data[idx].data.u8, 64, size);
207
#elif defined(HAVE_MPS)
208
      if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY)
209
        data_array->data[idx].data.u8 = (uint8_t*)mpobjmalloc(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), size);
210
      else
211
        ccmemalign((void **)&data_array->data[idx].data.u8, 64, size);
212
#else
213
      assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
214
      ccmemalign((void **)&data_array->data[idx].data.u8, 64, size);
215
#endif
216
22
      data = data_array->data[idx].data;
217
22
    }
218
33
  }
219
70
  tensor->data.u8 = (unsigned char*)((uintptr_t)data.u8 & ~(uintptr_t)1);
220
70
}
221
222
void ccv_nnc_tensor_tape_io(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph, const int* const input_flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, const int* const output_flags, ccv_nnc_tensor_t* const* const outputs, const int output_size)
223
90
{
224
90
  int i, tape_io = 0;
225
187
  for (i = 0; i < input_size && 
!tape_io119
;
i++97
)
226
97
    if (inputs[i] && 
CCV_GET_TAPE_ALLOC94
(inputs[i]->type))
227
39
      tape_io = 1;
228
121
  for (i = 0; i < output_size && 
!tape_io64
;
i++31
)
229
31
    if (outputs[i] && CCV_GET_TAPE_ALLOC(outputs[i]->type))
230
8
      tape_io = 1;
231
  // If doesn't need to update with tape io, just pointing to the inputs and outputs directly.
232
90
  if (!tape_io)
233
43
    return;
234
  // Go to the root graph, record which was taken along the way.
235
  // In this way, we can then unwrap multi-view tensors.
236
90
  assert
(graph)47
;
237
47
  const ccv_nnc_graph_t* curr_graph = graph;
238
47
  int d;
239
139
  for (d = 0; curr_graph; 
d++92
)
240
92
    curr_graph = curr_graph->p;
241
47
  curr_graph = graph;
242
47
  const int graph_size = d;
243
47
  assert(graph_size > 0);
244
47
  const ccv_nnc_graph_t* graphs[graph_size];
245
139
  for (d = graph_size - 1; curr_graph; 
d--, curr_graph = curr_graph->p92
)
246
92
    graphs[d] = curr_graph;
247
  // Now, go through the inputs / outputs and update.
248
147
  for (i = 0; i < input_size; 
i++100
)
249
100
    if (inputs[i] && 
CCV_GET_TAPE_ALLOC95
(inputs[i]->type))
250
48
      _ccv_nnc_tensor_from_tape(tape->tensor_data, inputs[i], input_flags ? 
input_flags[i]29
:
019
, graphs, graph_size, 0);
251
99
  for (i = 0; i < output_size; 
i++52
)
252
52
    if (outputs[i] && CCV_GET_TAPE_ALLOC(outputs[i]->type))
253
22
      _ccv_nnc_tensor_from_tape(tape->tensor_data, outputs[i], output_flags ? 
output_flags[i]11
:
011
, graphs, graph_size, 1); // Create if it is not found. This is OK for output tensor.
254
47
}
255
256
#define CCV_NNC_IS_TAPE_EXEC_DATA_ARRAY_POS(ptr) ((uintptr_t)(ptr) & 1)
257
258
// Simple allocator from ccv_array_t.
259
static void _ccv_nnc_tape_exec_data_array_pos_new(ccv_array_t* const exec_data, int* const pos_ref, ccv_nnc_tape_exec_data_array_t** const tape_exec_data_ref)
260
5
{
261
5
  int pos = exec_data->rnum;
262
5
  ccv_array_resize(exec_data, pos + 1);
263
5
  *pos_ref = (pos << 1) | 1;
264
5
  ccv_nnc_tape_exec_data_array_t* const tape_exec_data = (ccv_nnc_tape_exec_data_array_t*)ccv_array_get(exec_data, pos);
265
5
  memset(tape_exec_data, 0, sizeof(ccv_nnc_tape_exec_data_array_t));
266
5
  *tape_exec_data_ref = tape_exec_data;
267
5
}
268
269
static ccv_nnc_tape_exec_data_array_t* _ccv_nnc_tape_exec_data_array_get(const ccv_array_t* const exec_data, const int pos)
270
30
{
271
30
  assert((pos >> 1) <= exec_data->rnum);
272
30
  return (ccv_nnc_tape_exec_data_array_t*)ccv_array_get(exec_data, pos >> 1);
273
30
}
274
275
static void _ccv_nnc_tape_exec_data_move(uint64_t* const old_data, uint64_t* const new_data, const int offset, const uint64_t* const while_counts, const int graph_size, const int* const dim, const int dim_count)
276
6
{
277
6
  int i;
278
6
  if (offset == ccv_max(dim_count, graph_size) - 1)
279
4
  {
280
4
    const int data_dim = offset < dim_count ? dim[offset] - 1 : 
00
;
281
4
    const int graph_dim = offset < graph_size ? while_counts[offset] : 
00
;
282
4
    assert(old_data <= new_data);
283
    // Do the actual copy or set.
284
4
    if (!old_data)
285
0
      for (i = ccv_max(data_dim, graph_dim); i >= 0; i--)
286
0
        new_data[i] = 0;
287
4
    else {
288
8
      for (i = graph_dim; i > data_dim; 
i--4
)
289
4
        new_data[i] = 0;
290
14
      for (i = data_dim; i >= 0; 
i--10
)
291
10
        new_data[i] = old_data[i];
292
4
    }
293
4
  } else {
294
2
    int old_data_step = 1;
295
4
    for (i = offset + 1; i < dim_count; 
i++2
)
296
2
      old_data_step *= dim[i];
297
2
    const int new_dim_count = ccv_max(graph_size, dim_count);
298
2
    int new_data_step = 1;
299
4
    for (i = offset + 1; i < new_dim_count; 
i++2
)
300
2
    {
301
2
      int old_dim = (i < dim_count) ? dim[i] : 
10
;
302
2
      int graph_dim = (i < graph_size) ? (int)(while_counts[i] + 1) : 
10
;
303
2
      new_data_step *= ccv_max(old_dim, graph_dim);
304
2
    }
305
2
    const int data_dim = offset < dim_count ? dim[offset] - 1 : 
00
;
306
2
    const int graph_dim = offset < graph_size ? while_counts[offset] : 
00
;
307
6
    for (i = 
ccv_max2
(data_dim, graph_dim); i >= 0;
i--4
)
308
4
      _ccv_nnc_tape_exec_data_move((old_data && offset < dim_count && i < dim[offset]) ? old_data + i * old_data_step : 
00
, new_data + i * new_data_step, offset + 1, while_counts, graph_size, dim, dim_count);
309
2
  }
310
6
}
311
312
static void _ccv_nnc_tape_exec_data_array_resize(ccv_nnc_tape_exec_data_array_t* const data_array, const uint64_t* const while_counts, const int graph_size)
313
2
{
314
2
  const int new_dim_count = ccv_max(graph_size, data_array->dim_count);
315
2
  int i;
316
2
  int size = 1;
317
6
  for (i = 0; i < new_dim_count; 
i++4
)
318
4
  {
319
4
    int old_dim = (i < data_array->dim_count) ? data_array->dim[i] : 
10
;
320
4
    int graph_dim = (i < graph_size) ? (int)(while_counts[i] + 1) : 
10
;
321
4
    size *= ccv_max(old_dim, graph_dim);
322
4
  }
323
2
  data_array->dim = ccrealloc(data_array->dim, sizeof(int) * ALIGN_16(new_dim_count) + sizeof(uint64_t) * size);
324
2
  uint64_t* const old_data = (uint64_t*)(data_array->dim + ALIGN_16(data_array->dim_count));
325
2
  uint64_t* const new_data = (uint64_t*)(data_array->dim + ALIGN_16(new_dim_count));
326
  // Note that both old_data and new_data occupies the same memory region, since the resize operation
327
  // is mono-increasing, we just need to move the data from the end to the beginning to avoid data
328
  // overwrite issues.
329
2
  assert(graph_size > 0);
330
2
  assert(data_array->dim_count > 0);
331
2
  _ccv_nnc_tape_exec_data_move(old_data, new_data, 0, while_counts, graph_size, data_array->dim, data_array->dim_count);
332
2
  data_array->data = new_data;
333
  // We are done, update the dim.
334
6
  for (i = 0; i < new_dim_count; 
i++4
)
335
4
  {
336
4
    int old_dim = (i < data_array->dim_count) ? data_array->dim[i] : 
10
;
337
4
    int graph_dim = (i < graph_size) ? (int)(while_counts[i] + 1) : 
10
;
338
4
    data_array->dim[i] = ccv_max(old_dim, graph_dim);
339
4
  }
340
2
  data_array->dim_count = new_dim_count;
341
2
}
342
343
uint64_t ccv_nnc_tensor_tape_numbering(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec)
344
12
{
345
12
  assert(exec.graph == graph);
346
12
  ccv_nnc_graph_exec_info_t* exec_info = ccv_array_get(graph->exec_info, exec.d);
347
12
  if (!exec_info->alias_ref && 
exec_info->pair_ref6
)
348
0
    exec_info = ccv_array_get(graph->exec_info, exec_info->pair_ref - 1);
349
12
  ccv_nnc_tape_exec_data_array_t* const data_array = _ccv_nnc_tape_exec_data_array_get(tape->exec_data, (int)exec_info->alias_ref);
350
12
  const ccv_nnc_graph_t* curr_graph = graph;
351
12
  int i;
352
28
  for (i = 0; curr_graph; 
i++16
)
353
16
    curr_graph = curr_graph->p;
354
12
  curr_graph = graph;
355
12
  const int graph_size = i;
356
12
  uint64_t while_counts[graph_size];
357
28
  for (i = graph_size - 1; curr_graph; 
i--, curr_graph = curr_graph->p16
)
358
16
    while_counts[i] = curr_graph->while_count;
359
12
  assert(graph_size <= data_array->dim_count);
360
12
  int idx = 0, step = 1;
361
28
  for (i = graph_size - 1; i >= 0; 
i--16
)
362
16
  {
363
16
    assert(while_counts[i] < data_array->dim[i]);
364
16
    idx += while_counts[i] * step;
365
16
    step *= data_array->dim[i];
366
16
  }
367
12
  return data_array->data[idx];
368
12
}
369
370
void ccv_nnc_tensor_tape_set_numbering(ccv_nnc_tensor_tape_t* const tape, ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const uint64_t numbering)
371
23
{
372
23
  ccv_nnc_tape_exec_data_array_t* data_array;
373
23
  assert(exec.graph == graph);
374
23
  ccv_nnc_graph_exec_info_t* const exec_info = ccv_array_get(graph->exec_info, exec.d);
375
23
  if (exec_info->alias_ref)
376
18
  {
377
18
    assert(CCV_NNC_IS_TAPE_EXEC_DATA_ARRAY_POS(exec_info->alias_ref));
378
18
    data_array = _ccv_nnc_tape_exec_data_array_get(tape->exec_data, (int)exec_info->alias_ref);
379
18
  } else {
380
5
    int pos;
381
5
    _ccv_nnc_tape_exec_data_array_pos_new(tape->exec_data, &pos, &data_array);
382
5
    exec_info->alias_ref = pos;
383
5
  }
384
23
  const ccv_nnc_graph_t* curr_graph = graph;
385
23
  assert(curr_graph);
386
23
  int i;
387
51
  for (i = 0; curr_graph; 
i++28
)
388
28
    curr_graph = curr_graph->p;
389
23
  curr_graph = graph;
390
23
  const int graph_size = i;
391
23
  assert(graph_size > 0);
392
23
  uint64_t while_counts[graph_size];
393
51
  for (i = graph_size - 1; curr_graph; 
i--, curr_graph = curr_graph->p28
)
394
28
    while_counts[i] = curr_graph->while_count;
395
23
  if (!data_array->dim)
396
5
  {
397
5
    int size = 1;
398
11
    for (i = 0; i < graph_size; 
i++6
)
399
6
      size *= (int)(while_counts[i] + 1);
400
5
    data_array->dim_count = graph_size;
401
5
    data_array->dim = (int*)ccmalloc(sizeof(int) * ALIGN_16(graph_size) + sizeof(uint64_t) * size);
402
11
    for (i = 0; i < graph_size; 
i++6
)
403
6
      data_array->dim[i] = (int)(while_counts[i] + 1);
404
5
    data_array->data = (uint64_t*)(data_array->dim + ALIGN_16(graph_size));
405
14
    for (i = 0; i < size; 
i++9
)
406
9
      data_array->data[i] = 0;
407
18
  } else {
408
18
    int flag = (data_array->dim_count < graph_size);
409
40
    for (i = 0; !flag && 
i < graph_size38
;
i++22
)
410
22
      flag = (data_array->dim[i] <= while_counts[i]);
411
18
    if (flag)
412
2
      _ccv_nnc_tape_exec_data_array_resize(data_array, while_counts, graph_size);
413
18
  }
414
23
  int idx = 0, step = 1;
415
51
  for (i = graph_size - 1; i >= 0; 
i--28
)
416
28
  {
417
28
    assert(while_counts[i] < data_array->dim[i]);
418
28
    idx += while_counts[i] * step;
419
28
    step *= data_array->dim[i];
420
28
  }
421
23
  data_array->data[idx] = numbering;
422
23
}
423
424
void ccv_nnc_tensor_tape_free(ccv_nnc_tensor_tape_t* const tape)
425
4
{
426
4
  int i, j;
427
12
  for (i = 0; i < tape->tensor_data->rnum; 
i++8
)
428
8
  {
429
8
    ccv_nnc_tape_tensor_data_array_t* const data_array = (ccv_nnc_tape_tensor_data_array_t*)ccv_array_get(tape->tensor_data, i);
430
8
    if (data_array->dim)
431
8
    {
432
8
      int size = 1;
433
24
      for (j = 0; j < data_array->dim_count; 
j++16
)
434
16
        size *= data_array->dim[j];
435
96
      for (j = 0; j < size; 
j++88
)
436
88
        if (data_array->data[j].data.u8 && 
!33
CCV_NUMERIC_DATA_NO_ALLOC33
(data_array->data[j].data))
437
22
        {
438
22
#ifdef HAVE_CUDA
439
22
          if (CCV_TENSOR_GET_MEMORY(data_array->data[j].type) == CCV_TENSOR_GPU_MEMORY)
440
0
            cufree(CCV_TENSOR_GET_DEVICE_ID(data_array->data[j].type), data_array->data[j].data.u8);
441
22
          else
442
22
            ccfree(data_array->data[j].data.u8);
443
#else
444
          ccfree(data_array->data[j].data.u8);
445
#endif
446
22
        }
447
8
      ccfree(data_array->dim);
448
8
    }
449
8
  }
450
4
  ccv_array_free(tape->tensor_data);
451
9
  for (i = 0; i < tape->exec_data->rnum; 
i++5
)
452
5
  {
453
5
    ccv_nnc_tape_exec_data_array_t* const data_array = (ccv_nnc_tape_exec_data_array_t*)ccv_array_get(tape->exec_data, i);
454
5
    if (data_array->dim)
455
5
      ccfree(data_array->dim);
456
5
  }
457
4
  ccv_array_free(tape->exec_data);
458
4
  ccfree(tape);
459
4
}