Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_dynamic_graph.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_nnc_easy.h"
5
#include "ccv_internal.h"
6
#include "_ccv_nnc_dynamic_graph.h"
7
8
// MARK - Level-4 API
9
10
ccv_nnc_dynamic_graph_t* ccv_nnc_dynamic_graph_new(void)
11
53
{
12
53
  ccv_nnc_dynamic_graph_t* graph = ccmalloc(sizeof(ccv_nnc_dynamic_graph_t));
13
53
  graph->no_grad = 0;
14
53
  graph->reuse_var = -1;
15
53
  graph->vars = ccv_array_new(sizeof(ccv_nnc_tensor_variable_t), 1, 0);
16
53
  graph->binds = ccv_array_new(sizeof(ccv_nnc_tensor_variable_graph_bind_t), 1, 0);
17
53
  graph->tape = ccv_nnc_symbolic_graph_new();
18
53
  graph->xpu_alloc.mp_hdr = -1;
19
53
  graph->xpu_alloc.freed = kh_init(dy_str);
20
53
  graph->xpu_alloc.allocd = kh_init(dy_alloc);
21
  // These may not be used as frequent, init as needed.
22
53
  graph->stateful_execs = 0;
23
53
  graph->reuse_stateful_exec = -1;
24
53
  graph->stream_map = 0;
25
53
  graph->ws = 0;
26
53
  return graph;
27
53
}
28
29
static void _ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int zeroing)
30
32.3k
{
31
32.3k
  const int index = tensor_variable->index;
32
32.3k
  if (tensor_variable->tensor_view)
33
14.6k
  {
34
14.6k
    if (tensor_variable->destructor_hook.func)
35
4
      tensor_variable->destructor_hook.func(graph, (ccv_nnc_tensor_t*)tensor_variable->tensor_view, tensor_variable->destructor_hook.context);
36
14.6k
    if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view))
37
14.2k
    {
38
14.2k
      if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))
39
7
        ccv_nnc_tensor_view_free(tensor_variable->tensor_view);
40
14.2k
      else {
41
14.2k
        if (!tensor_variable->alias_index_ref && // Return this memory to the graph.
42
14.2k
          
CCV_TENSOR_GET_MEMORY14.1k
(tensor_variable->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY14.1k
&&
tensor_variable->tensor_view->data.u8419
)
43
419
          ccv_nnc_xpu_free(&graph->xpu_alloc, tensor_variable->tensor_view->data.u8);
44
14.2k
        ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view);
45
14.2k
      }
46
14.2k
    }
47
14.6k
  }
48
32.3k
  ccfree(tensor_variable);
49
32.3k
  if (zeroing)
50
32.1k
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, index) = 0;
51
32.3k
  int i;
52
64.4k
  for (i = graph->vars->rnum - 1; i >= 0; 
i--32.1k
)
53
64.2k
    if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) != 0)
54
32.1k
    {
55
32.1k
      graph->vars->rnum = i + 1;
56
32.1k
      break;
57
32.1k
    }
58
32.3k
  if (index < graph->vars->rnum &&
59
32.3k
    
(23.3k
index < graph->reuse_var23.3k
||
graph->reuse_var < 017.2k
))
60
11.9k
    graph->reuse_var = index;
61
20.4k
  else if (graph->reuse_var >= graph->vars->rnum)
62
5.62k
    graph->reuse_var = -1;
63
32.3k
}
64
65
static void _ccv_nnc_tensor_variable_graph_bind_free(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int zeroing)
66
27.7k
{
67
27.7k
  bind->index = CCV_NNC_TENSOR_NO_VARIABLE;
68
27.7k
  if (bind->sources)
69
15.3k
    ccv_array_free(bind->sources);
70
27.7k
  if (bind->destinations)
71
23.0k
    ccv_array_free(bind->destinations);
72
27.7k
  if (bind->tensor_view)
73
17.6k
  {
74
17.6k
    if (bind->destructor_hook.func)
75
3
      bind->destructor_hook.func(graph, (ccv_nnc_tensor_t*)bind->tensor_view, bind->destructor_hook.context);
76
17.6k
    if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(bind->tensor_view))
77
17.4k
    {
78
17.4k
      if (CCV_IS_TENSOR_VIEW(bind->tensor_view))
79
1
        ccv_nnc_tensor_view_free(bind->tensor_view);
80
17.4k
      else {
81
17.4k
        if (!bind->alias_ref && // Return this memory to the graph.
82
17.4k
          
CCV_TENSOR_GET_MEMORY16.4k
(bind->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY16.4k
&&
bind->tensor_view->data.u838
)
83
38
          ccv_nnc_xpu_free(&graph->xpu_alloc, bind->tensor_view->data.u8);
84
17.4k
        ccv_nnc_tensor_free((ccv_nnc_tensor_t*)bind->tensor_view);
85
17.4k
      }
86
17.4k
    }
87
17.6k
  }
88
27.7k
  if (zeroing)
89
27.4k
  {
90
27.4k
    bind->sources = 0;
91
27.4k
    bind->destinations = 0;
92
27.4k
    bind->tensor_view = 0;
93
27.4k
    bind->destructor_hook.func = 0;
94
27.4k
    bind->destructor_hook.context = 0;
95
27.4k
  }
96
27.7k
}
97
98
void ccv_nnc_dynamic_graph_free(ccv_nnc_dynamic_graph_t* const graph)
99
53
{
100
53
  int i;
101
333
  for (i = 0; i < graph->vars->rnum; 
i++280
)
102
280
  {
103
280
    ccv_nnc_tensor_variable_t tensor_variable = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i);
104
280
    if (tensor_variable)
105
227
      _ccv_nnc_tensor_variable_free(graph, tensor_variable, 0);
106
280
  }
107
53
  ccv_array_free(graph->vars);
108
398
  for (i = 0; i < graph->binds->rnum; 
i++345
)
109
345
    _ccv_nnc_tensor_variable_graph_bind_free(graph, (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i), 0);
110
53
  ccv_array_free(graph->binds);
111
53
  ccv_nnc_symbolic_graph_free(graph->tape);
112
53
  if (graph->ws)
113
36
    ccv_array_free(graph->ws);
114
53
  if (graph->stateful_execs)
115
12
  {
116
33
    for (i = 0; i < graph->stateful_execs->rnum; 
i++21
)
117
21
    {
118
21
      ccv_nnc_stateful_exec_t* const stateful_exec = *(ccv_nnc_stateful_exec_t**)ccv_array_get(graph->stateful_execs, i);
119
21
      if (stateful_exec)
120
8
        ccfree(stateful_exec);
121
21
    }
122
12
    ccv_array_free(graph->stateful_execs);
123
12
  }
124
53
  if (graph->stream_map)
125
10
  {
126
10
    khiter_t k;
127
58
    for (k = 
kh_begin10
(graph->stream_map); k != kh_end(graph->stream_map);
++k48
)
128
48
    {
129
48
      if (!kh_exist(graph->stream_map, k))
130
25
        continue;
131
23
      ccv_nnc_stream_context_t* const stream = kh_val(graph->stream_map, k);
132
23
      ccv_nnc_stream_context_free(stream);
133
23
    }
134
10
    kh_destroy(stream_map, graph->stream_map);
135
10
  }
136
53
  ccv_nnc_xpu_alloc_destroy(&graph->xpu_alloc);
137
53
  ccfree(graph);
138
53
}
139
140
void ccv_nnc_tensor_variable_set(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_t* const tensor)
141
655
{
142
655
  assert(!tensor_variable->alias_index_ref);
143
655
  if (tensor_variable->tensor_view && 
!2
CCV_NNC_IS_EXTERN_TENSOR_VIEW2
(tensor_variable->tensor_view))
144
0
  {
145
0
    assert(!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view));
146
0
    ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view);
147
0
  }
148
655
  tensor_variable->info = tensor->info;
149
655
  tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)((uintptr_t)tensor | 1);
150
655
}
151
152
void ccv_nnc_tensor_variable_destructor_hook(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_destructor_f func, void* const context)
153
7
{
154
7
  tensor_variable->destructor_hook.func = func;
155
7
  tensor_variable->destructor_hook.context = context;
156
7
}
157
158
inline static void _ccv_nnc_tensor_variable_init(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_param_t info)
159
31.3k
{
160
31.3k
  tensor_variable->alias_index_ref = 0;
161
31.3k
  tensor_variable->alias_off = 0;
162
31.3k
  tensor_variable->destructor_hook.func = 0;
163
31.3k
  tensor_variable->destructor_hook.context = 0;
164
31.3k
  tensor_variable->info = info;
165
31.3k
  tensor_variable->symbol = NO_TENSOR_SYMBOL;
166
31.3k
  tensor_variable->tensor_view = 0;
167
31.3k
  if (graph->reuse_var >= 0)
168
802
  {
169
802
    const int reuse_var = graph->reuse_var;
170
802
    assert(reuse_var < graph->vars->rnum);
171
802
    tensor_variable->index = reuse_var;
172
802
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = tensor_variable;
173
802
    int i;
174
802
    graph->reuse_var = -1;
175
1.43k
    for (i = reuse_var + 1; i < graph->vars->rnum && 
graph->reuse_var < 01.02k
;
i++631
)
176
631
      if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0)
177
591
        graph->reuse_var = i;
178
30.5k
  } else {
179
30.5k
    tensor_variable->index = graph->vars->rnum;
180
30.5k
    ccv_array_push(graph->vars, &tensor_variable);
181
30.5k
  }
182
31.3k
}
183
184
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info)
185
31.2k
{
186
31.2k
  ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
187
31.2k
  tensor_variable->type = CCV_NNC_TENSOR_VARIABLE;
188
31.2k
  _ccv_nnc_tensor_variable_init(graph, tensor_variable, info);
189
31.2k
  return tensor_variable;
190
31.2k
}
191
192
ccv_nnc_tensor_variable_t ccv_nnc_tensor_constant_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info)
193
37
{
194
37
  ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
195
37
  tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
196
37
  _ccv_nnc_tensor_variable_init(graph, tensor_variable, info);
197
37
  return tensor_variable;
198
37
}
199
200
int ccv_nnc_tensor_variable_is_constant(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
201
0
{
202
0
  return tensor_variable->type == CCV_NNC_TENSOR_CONSTANT;
203
0
}
204
205
ccv_nnc_tensor_param_t ccv_nnc_tensor_variable_params(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
206
0
{
207
0
  return tensor_variable->info;
208
0
}
209
210
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_alias_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info)
211
1.04k
{
212
1.04k
  ccv_nnc_tensor_variable_t variable_alias = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
213
1.04k
  variable_alias->type = tensor_variable->type;
214
  // If the tensor variable is an alias itself, we point directly to its original.
215
1.04k
  if (tensor_variable->alias_index_ref)
216
1
  {
217
1
    variable_alias->alias_index_ref = tensor_variable->alias_index_ref;
218
    // The tensor variable need to be fully specified if I am doing alias an alias.
219
1
    assert(!ccv_nnc_is_tensor_auto(tensor_variable->info));
220
1
    int i;
221
1
    int no_stride = 1;
222
2
    for (i = 0; no_stride && 
i < 1
CCV_NNC_MAX_DIM_ALLOC1
;
i++1
)
223
1
      no_stride = (tensor_variable->stride[i] == 0);
224
1
    int stride_from_dim[CCV_NNC_MAX_DIM_ALLOC];
225
1
    int* to_stride;
226
1
    if (no_stride)
227
0
    {
228
0
      ccv_nnc_tensor_get_stride(tensor_variable->info.dim, stride_from_dim);
229
0
      to_stride = stride_from_dim;
230
0
    } else
231
1
      to_stride = tensor_variable->stride;
232
    // If we provide stride, or reshape to a different size, assert the tensor variable itself is contiguous (otherwise we cannot satisfy the reshape requirements).
233
1
    const int different_dim = ccv_nnc_tensor_nd(info.dim) != ccv_nnc_tensor_nd(tensor_variable->info.dim);
234
1
    if (different_dim || 
(0
stride[0] != 00
&&
memcmp(stride, to_stride, sizeof(int) * 0
CCV_NNC_MAX_DIM_ALLOC0
) != 0))
235
1
      { assert(ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, to_stride)); }
236
    // Need to compute alias off, that is the alias off of the tensor variable plus its ofs.
237
1
    const off_t off = ccv_nnc_tensor_view_offset(tensor_variable->info.datatype, to_stride, tensor_variable->ofs);
238
1
    variable_alias->alias_off = tensor_variable->alias_off + off;
239
    // If we don't provide stride, copy the stride from previous variable.
240
1
    if (stride[0] == 0)
241
0
    {
242
0
      if (different_dim)
243
0
        ccv_nnc_tensor_get_stride(info.dim, variable_alias->stride);
244
0
      else
245
0
        memcpy(variable_alias->stride, to_stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
246
0
    } else
247
1
      memcpy(variable_alias->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
248
1.04k
  } else {
249
1.04k
    variable_alias->alias_index_ref = tensor_variable->index + 1;
250
1.04k
    variable_alias->alias_off = 0;
251
1.04k
    memcpy(variable_alias->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
252
1.04k
  }
253
1.04k
  variable_alias->info = info;
254
1.04k
  variable_alias->symbol = NO_TENSOR_SYMBOL;
255
1.04k
  variable_alias->destructor_hook.func = 0;
256
1.04k
  variable_alias->destructor_hook.context = 0;
257
1.04k
  variable_alias->tensor_view = 0;
258
1.04k
  memcpy(variable_alias->ofs, ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
259
1.04k
  if (graph->reuse_var >= 0)
260
0
  {
261
0
    const int reuse_var = graph->reuse_var;
262
0
    assert(reuse_var < graph->vars->rnum);
263
0
    variable_alias->index = reuse_var;
264
0
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = variable_alias;
265
0
    int i;
266
0
    graph->reuse_var = -1;
267
0
    for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++)
268
0
      if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0)
269
0
        graph->reuse_var = i;
270
1.04k
  } else {
271
1.04k
    variable_alias->index = graph->vars->rnum;
272
1.04k
    ccv_array_push(graph->vars, &variable_alias);
273
1.04k
  }
274
1.04k
  return variable_alias;
275
1.04k
}
276
277
int ccv_nnc_tensor_variable_alias_params(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, int ofs[CCV_NNC_MAX_DIM_ALLOC], int stride[CCV_NNC_MAX_DIM_ALLOC])
278
0
{
279
0
  if (!tensor_variable->alias_index_ref)
280
0
    return -1;
281
0
  if (ofs)
282
0
    memcpy(ofs, tensor_variable->ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
283
0
  if (stride)
284
0
    memcpy(stride, tensor_variable->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
285
0
  return 0;
286
0
}
287
288
ccv_nnc_tensor_t* ccv_nnc_tensor_from_variable_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_stream_context_t* const stream_context)
289
69.1k
{
290
69.1k
  if (tensor_variable->tensor_view)
291
37.4k
  {
292
37.4k
    if (tensor_variable->alias_index_ref)
293
1.03k
    {
294
1.03k
      const int alias_index = tensor_variable->alias_index_ref - 1;
295
1.03k
      assert(alias_index >= 0);
296
1.03k
      ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
297
1.03k
      if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))
298
12
      {
299
12
        ccv_nnc_tensor_view_t* const tv = tensor_variable->tensor_view;
300
        // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid.
301
12
        assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv));
302
        // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed.
303
12
        ccv_nnc_tensor_data(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->info, CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tv->off + tensor_variable->alias_off, &tv->data, &tv->dataof);
304
1.02k
      } else {
305
1.02k
        ccv_nnc_tensor_t* const tv = (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
306
        // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid.
307
1.02k
        assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv));
308
        // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed.
309
1.02k
        ccv_nnc_tensor_data(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->info, CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tensor_variable->alias_off, &tv->data, &tv->dataof);
310
1.02k
      }
311
1.03k
    }
312
37.4k
    return (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(tensor_variable->tensor_view);
313
37.4k
  }
314
31.7k
  if (!tensor_variable->alias_index_ref)
315
30.6k
  {
316
    // If we haven't allocated tensor_variable, we cannot allocate them now (because no shape specified), return 0.
317
30.6k
    if (ccv_nnc_is_tensor_auto(tensor_variable->info))
318
0
      return 0;
319
30.6k
    void* ptr = 0;
320
30.6k
    const size_t data_size = ccv_nnc_tensor_data_size(tensor_variable->info);
321
30.6k
    if (CCV_TENSOR_GET_MEMORY(tensor_variable->info.type) == CCV_TENSOR_GPU_MEMORY && 
data_size > 0457
)
322
457
      ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(tensor_variable->info.type), stream_context, data_size);
323
30.6k
    tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, tensor_variable->info, 0);
324
30.6k
    if (tensor_variable->info.dim[0] > 0)
325
30.6k
      { assert(tensor_variable->tensor_view->data.u8); }
326
30.6k
    return (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
327
30.6k
  }
328
1.04k
  const int alias_index = tensor_variable->alias_index_ref - 1;
329
1.04k
  assert(alias_index >= 0);
330
1.04k
  ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
331
1.04k
  assert(!variable_to->alias_index_ref);
332
1.04k
  if (!variable_to->tensor_view)
333
3
  {
334
    // If we haven't allocated variable_to, we cannot allocate them now (because no shape specified), return 0.
335
3
    if (ccv_nnc_is_tensor_auto(variable_to->info))
336
0
      return 0;
337
3
    void* ptr = 0;
338
3
    assert(variable_to->info.type == tensor_variable->info.type);
339
3
    const size_t data_size = ccv_nnc_tensor_data_size(variable_to->info);
340
3
    if (CCV_TENSOR_GET_MEMORY(variable_to->info.type) == CCV_TENSOR_GPU_MEMORY && 
data_size > 00
)
341
0
      ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(variable_to->info.type), stream_context, data_size);
342
3
    variable_to->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, variable_to->info, 0);
343
3
    assert(variable_to->tensor_view->data.u8);
344
3
  }
345
1.04k
  int i;
346
1.04k
  int no_ofs = 1;
347
13.5k
  for (i = 0; no_ofs && 
i < 13.5k
CCV_NNC_MAX_DIM_ALLOC13.5k
;
i++12.4k
)
348
12.4k
    no_ofs = (tensor_variable->ofs[i] == 0);
349
1.04k
  int no_stride = 1;
350
2.46k
  for (i = 0; no_stride && 
i < 1.45k
CCV_NNC_MAX_DIM_ALLOC1.45k
;
i++1.42k
)
351
1.42k
    no_stride = (tensor_variable->stride[i] == 0);
352
1.04k
  int stride_is_packed = no_stride;
353
1.04k
  if (!no_stride) // We have stride, now if it is packed.
354
1.01k
    stride_is_packed = ccv_nnc_is_tensor_stride_packed(tensor_variable->stride, tensor_variable->info.dim);
355
1.04k
  assert(CCV_GET_DATA_TYPE_SIZE(tensor_variable->info.datatype) * ccv_nnc_tensor_count(tensor_variable->info) + tensor_variable->alias_off <= CCV_GET_DATA_TYPE_SIZE(variable_to->info.datatype) * ccv_nnc_tensor_count(variable_to->info));
356
  // Allowing vector type to be normal tensor, rather than a tensor view. We cannot have any offset though.
357
1.04k
  if (no_ofs && 
!stride_is_packed1.03k
)
358
3
    stride_is_packed = ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, tensor_variable->stride);
359
1.04k
  if (no_ofs && 
stride_is_packed1.03k
)
360
1.03k
    tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tensor_variable->info, 0);
361
8
  else {
362
8
    if (no_stride)
363
1
      ccv_nnc_tensor_get_stride(tensor_variable->info.dim, tensor_variable->stride);
364
8
    tensor_variable->tensor_view = ccv_nnc_tensor_view_new((ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(variable_to->tensor_view), tensor_variable->info, tensor_variable->ofs, tensor_variable->stride);
365
8
  }
366
1.04k
  if  (tensor_variable->alias_off)
367
1
    ccv_nnc_tensor_data_add(tensor_variable->tensor_view->info, tensor_variable->alias_off, &tensor_variable->tensor_view->data, &tensor_variable->tensor_view->dataof);
368
1.04k
  return (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
369
1.04k
}
370
371
static void _ccv_nnc_tensor_symbol_extra_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_symbol_t symbol)
372
27.6k
{
373
27.6k
  if (symbol.d >= graph->binds->rnum)
374
345
  {
375
345
    const int rnum = graph->binds->rnum;
376
345
    ccv_array_resize(graph->binds, symbol.d + 1);
377
345
    int i;
378
690
    for (i = rnum; i < graph->binds->rnum; 
i++345
)
379
345
      ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i))->index = CCV_NNC_TENSOR_NO_VARIABLE;
380
345
  }
381
27.6k
  ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, symbol.d);
382
27.6k
  bind->type = tensor_variable->type;
383
27.6k
  bind->index = tensor_variable->index;
384
27.6k
  if (tensor_variable->alias_index_ref)
385
1.04k
  {
386
1.04k
    const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){
387
1.04k
      .d = symbol.d,
388
1.04k
      .graph = graph->tape
389
1.04k
    });
390
1.04k
    assert(alias_to.d >= 0 && alias_to.d < graph->binds->rnum);
391
1.04k
    bind->alias_ref = alias_to.d + 1;
392
1.04k
  } else
393
26.5k
    bind->alias_ref = 0;
394
27.6k
  if (bind->sources)
395
0
    ccv_array_free(bind->sources);
396
27.6k
  bind->sources = 0;
397
27.6k
  if (bind->destinations)
398
0
    ccv_array_free(bind->destinations);
399
27.6k
  bind->destinations = 0;
400
27.6k
  bind->destructor_hook.func = 0;
401
27.6k
  bind->destructor_hook.context = 0;
402
27.6k
  bind->tensor_view = 0;
403
27.6k
}
404
405
static ccv_nnc_tensor_symbol_t _ccv_nnc_tensor_symbol_from_variable(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
406
44.5k
{
407
44.5k
  if (tensor_variable->symbol.d >= 0)
408
16.9k
    return tensor_variable->symbol;
409
27.6k
  if (!tensor_variable->alias_index_ref)
410
26.5k
  {
411
26.5k
    const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_new(graph->tape, tensor_variable->info, 0);
412
26.5k
    _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol);
413
26.5k
    return symbol;
414
26.5k
  }
415
1.04k
  const int alias_index = tensor_variable->alias_index_ref - 1;
416
1.04k
  assert(alias_index >= 0);
417
1.04k
  ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
418
1.04k
  assert(!variable_to->alias_index_ref);
419
1.04k
  int no_stride = 1;
420
1.04k
  int i;
421
2.43k
  for (i = 0; no_stride && 
i < 1.42k
CCV_NNC_MAX_DIM_ALLOC1.42k
;
i++1.39k
)
422
1.39k
    no_stride = (tensor_variable->stride[i] == 0);
423
1.04k
  if (no_stride)
424
32
    ccv_nnc_tensor_get_stride(tensor_variable->info.dim, tensor_variable->stride);
425
1.04k
  const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_alias_new(graph->tape, _ccv_nnc_tensor_symbol_from_variable(graph, variable_to), tensor_variable->ofs, tensor_variable->stride, tensor_variable->info, 0);
426
1.04k
  _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol);
427
1.04k
  return symbol;
428
1.04k
}
429
430
// Return the tensor variable that is old (the provided tensor variable will have a new setting).
431
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_exchange_new(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable)
432
6.28k
{
433
6.28k
  struct ccv_nnc_tensor_variable_s x = *tensor_variable;
434
6.28k
  ccv_nnc_tensor_variable_t new_variable;
435
  // Need to handle alias.
436
6.28k
  if (x.alias_index_ref)
437
0
    new_variable = ccv_nnc_tensor_variable_alias_new(graph, *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, x.alias_index_ref - 1), x.ofs, x.stride, x.info);
438
6.28k
  else
439
6.28k
    new_variable = ccv_nnc_tensor_variable_new(graph, x.info);
440
6.28k
  *tensor_variable = *new_variable;
441
6.28k
  *new_variable = x;
442
  // The index should be the same though.
443
6.28k
  const int index = new_variable->index;
444
6.28k
  new_variable->index = tensor_variable->index;
445
6.28k
  if (new_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
446
2.84k
  {
447
2.84k
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, new_variable->symbol.d);
448
2.84k
    bind->index = new_variable->index;
449
2.84k
  }
450
6.28k
  tensor_variable->index = index;
451
6.28k
  return new_variable;
452
6.28k
}
453
454
void ccv_nnc_dynamic_graph_set_max_concurrency(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int max_stream_count)
455
0
{
456
0
  dynamic_graph->max_stream_count = max_stream_count;
457
0
}
458
459
int ccv_nnc_dynamic_graph_set_no_grad(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int no_grad)
460
9
{
461
9
  if (dynamic_graph->no_grad == no_grad)
462
0
    return -1;
463
9
  dynamic_graph->no_grad = no_grad;
464
9
  return 0;
465
9
}
466
467
static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_get_stream(ccv_nnc_dynamic_graph_t* const graph, const int type)
468
72
{
469
72
  if (!graph->stream_map)
470
10
    graph->stream_map = kh_init(stream_map);
471
72
  int ret = 0;
472
72
  khiter_t k = kh_put(stream_map, graph->stream_map, type, &ret);
473
72
  assert(ret >= 0);
474
72
  ccv_nnc_stream_context_t* stream = kh_val(graph->stream_map, k);
475
  // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
476
72
  if (ret != 0)
477
23
  {
478
23
    stream = ccv_nnc_stream_context_new(type);
479
23
    kh_val(graph->stream_map, k) = stream;
480
23
  }
481
72
  return stream;
482
72
}
483
484
typedef struct {
485
  ccv_nnc_dynamic_graph_t* graph;
486
  int stream_type;
487
} ccv_nnc_dynamic_graph_neighbor_context_discovery_t;
488
489
static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_neighbor_context_discovery(const int device_id, void* const context)
490
0
{
491
0
  ccv_nnc_dynamic_graph_neighbor_context_discovery_t* const discovery = (ccv_nnc_dynamic_graph_neighbor_context_discovery_t*)context;
492
0
  int type = discovery->stream_type;
493
0
  CCV_STREAM_SET_DEVICE_ID(type, device_id);
494
0
  return _ccv_nnc_dynamic_graph_get_stream(discovery->graph, type);
495
0
}
496
497
void ccv_nnc_dynamic_graph_exec_ret(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context, ccv_nnc_graph_exec_symbol_t* const graph_execs)
498
15.3k
{
499
15.3k
  int i, j;
500
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
501
28.1k
    if (inputs[i] && 
!inputs[i]->alias_index_ref28.1k
)
502
27.1k
      { assert(inputs[i]->tensor_view); }
503
15.3k
  ccv_nnc_tensor_t* input_tensors[ccv_max(1, input_size)];
504
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
505
28.1k
    input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(graph, inputs[i], stream_context) : 
02
;
506
15.3k
  ccv_nnc_tensor_symbol_t input_symbols[ccv_max(1, input_size)];
507
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
508
28.1k
    input_symbols[i] = inputs[i] ? 
_ccv_nnc_tensor_symbol_from_variable(graph, inputs[i])28.1k
:
NO_TENSOR_SYMBOL2
;
509
15.3k
  ccv_array_t* input_sources[ccv_max(1, input_size)];
510
15.3k
  ccv_array_t* input_alias_sources[ccv_max(1, input_size)];
511
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
512
28.1k
  {
513
28.1k
    input_sources[i] = input_symbols[i].d != CCV_NNC_NO_TENSOR_SYMBOL ? 
((ccv_nnc_tensor_variable_graph_bind_t*)28.1k
ccv_array_get28.1k
(graph->binds, input_symbols[i].d))->sources :
02
;
514
28.1k
    if (inputs[i] && 
inputs[i]->alias_index_ref28.1k
)
515
1.03k
    {
516
1.03k
      const int alias_index_ref = inputs[i]->alias_index_ref - 1;
517
1.03k
      assert(alias_index_ref >= 0);
518
1.03k
      ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index_ref);
519
1.03k
      input_alias_sources[i] = ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d))->sources;
520
1.03k
    } else
521
27.1k
      input_alias_sources[i] = 0;
522
28.1k
  }
523
15.3k
  const int parallel_count = ccv_max(1, parallel);
524
15.3k
  assert(input_size % parallel_count == 0);
525
15.3k
  const int per_input_size = input_size / parallel_count;
526
15.3k
  assert(output_size % parallel_count == 0);
527
15.3k
  const int per_output_size = output_size / parallel_count;
528
15.3k
  int output_auto = 0;
529
30.9k
  for (i = 0; !output_auto && 
i < output_size16.3k
;
i++15.6k
)
530
15.6k
    output_auto = outputs[i] ? 
ccv_nnc_is_tensor_auto(outputs[i]->info)15.4k
:
0200
;
531
  // One extra step, infer the parameters for outputs.
532
15.3k
  if (output_auto)
533
14.6k
  {
534
14.6k
    ccv_nnc_tensor_param_t input_params[ccv_max(1, per_input_size)];
535
14.6k
    ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)];
536
29.4k
    for (i = 0; i < parallel_count; 
i++14.7k
)
537
14.7k
    {
538
41.9k
      for (j = 0; j < per_input_size; 
j++27.2k
)
539
27.2k
        input_params[j] = inputs[j + i * per_input_size] ? 
inputs[j + i * per_input_size]->info27.2k
:
ccv_nnc_tensor_auto2
;
540
29.6k
      for (j = 0; j < per_output_size; 
j++14.9k
)
541
14.9k
        output_params[j] = outputs[j + i * per_output_size] ? 
outputs[j + i * per_output_size]->info14.7k
:
ccv_nnc_tensor_auto207
;
542
14.7k
      ccv_nnc_hint_tensor_auto(cmd, input_params, per_input_size, hint, output_params, per_output_size);
543
29.6k
      for (j = 0; j < per_output_size; 
j++14.9k
)
544
14.9k
        if (outputs[j + i * per_output_size])
545
14.7k
          outputs[j + i * per_output_size]->info = output_params[j];
546
14.7k
    }
547
14.6k
  }
548
15.3k
  int freeable_size = 0;
549
15.3k
  ccv_nnc_tensor_variable_t freeables[ccv_max(1, output_size)];
550
  // Refresh the symbol if it is binded to an existing exec. Otherwise we cannot keep the SSA guarantee.
551
31.0k
  for (i = 0; i < output_size; 
i++15.6k
)
552
15.6k
  {
553
    // First, go over to see whether there is enforce inplace.
554
15.6k
    int enforce_idx = -1;
555
44.7k
    for (j = 0; enforce_idx < 0 && 
j < input_size44.7k
;
j++29.1k
)
556
29.1k
      if (inputs[j] && 
ccv_nnc_cmd_enforce_inplace(cmd, j, input_size, i, output_size)29.0k
)
557
2
        enforce_idx = j;
558
15.6k
    if (enforce_idx >= 0)
559
2
      { assert(outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL); }
560
    // We don't allow or check "allow inplace" yet. That logic will be at odds with backward logic.
561
15.6k
    if (outputs[i] && 
outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL15.4k
)
562
417
    {
563
417
      const ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[i]->symbol.d);
564
417
      if (enforce_idx >= 0)
565
2
        { assert(!bind->destinations || bind->destinations->rnum == 0); }
566
417
      if (bind->sources && 
bind->sources->rnum > 0415
)
567
415
      {
568
415
        const ccv_nnc_tensor_variable_t old_var = freeables[freeable_size++] = ccv_nnc_tensor_variable_exchange_new(graph, outputs[i]);
569
        // If this is enforce output, make sure the tensor view is taken by the output.
570
415
        if (enforce_idx >= 0)
571
0
        {
572
0
          outputs[i]->destructor_hook = old_var->destructor_hook;
573
0
          outputs[i]->tensor_view = old_var->tensor_view; // Make sure the tensor view is taken over by the output.
574
0
          old_var->tensor_view = 0;
575
0
        }
576
415
      }
577
417
    }
578
15.6k
  }
579
15.3k
  ccv_nnc_tensor_t* output_tensors[ccv_max(1, per_output_size)];
580
15.3k
  if (parallel_count > 1)
581
23
  {
582
23
    const int max_device_id_size = per_input_size + per_output_size;
583
23
    assert(max_device_id_size > 0);
584
23
    int device_ids[max_device_id_size];
585
23
    ccv_nnc_stream_context_t* streams[parallel_count];
586
23
    ccv_nnc_stream_signal_t* signal;
587
23
    if (stream_context)
588
14
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
589
97
    for (i = 0; i < parallel_count; 
i++74
)
590
74
    {
591
74
      int flag = 0;
592
148
      for (j = 0; !flag && 
j < per_input_size78
;
j++74
)
593
74
        if (input_tensors[i * per_input_size + j])
594
74
          flag = (CCV_TENSOR_GET_MEMORY(input_tensors[i * per_input_size + j]->info.type) == CCV_TENSOR_GPU_MEMORY);
595
156
      for (j = 0; j < per_output_size; 
j++82
)
596
82
      {
597
82
        output_tensors[j] = outputs[j + i * per_output_size] ? ccv_nnc_tensor_from_variable(graph, outputs[j + i * per_output_size], stream_context) : 
08
;
598
82
        if (output_tensors[j] && 
!flag74
)
599
4
          flag = (CCV_TENSOR_GET_MEMORY(output_tensors[j]->info.type) == CCV_TENSOR_GPU_MEMORY);
600
82
      }
601
74
      const int stream_type = flag ? 
CCV_STREAM_CONTEXT_GPU70
:
CCV_STREAM_CONTEXT_CPU4
;
602
74
      const int tensor_type = flag ? 
CCV_TENSOR_GPU_MEMORY70
:
CCV_TENSOR_CPU_MEMORY4
;
603
74
      const int device_id_size = ccv_nnc_device_ids_for_io(input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, tensor_type, device_ids, max_device_id_size);
604
74
      ccv_nnc_stream_context_t* stream_0 = 0;
605
146
      for (j = 0; j < device_id_size; 
j++72
)
606
72
      {
607
72
        int type = stream_type;
608
72
        CCV_STREAM_SET_DEVICE_ID(type, device_ids[j]);
609
72
        ccv_nnc_stream_context_t* const stream = _ccv_nnc_dynamic_graph_get_stream(graph, type);
610
72
        if (!stream_0)
611
72
          stream_0 = stream;
612
72
      }
613
      // Wait signal to finish.
614
74
      if (stream_context)
615
44
      {
616
44
        if (stream_0)
617
42
          ccv_nnc_stream_context_wait_signal(stream_0, signal);
618
2
        else
619
2
          ccv_nnc_stream_context_wait(stream_context);
620
44
      }
621
74
      if (stream_0)
622
72
      {
623
72
        ccv_nnc_dynamic_graph_neighbor_context_discovery_t discovery = {
624
72
          .graph = graph,
625
72
          .stream_type = stream_type
626
72
        };
627
72
        ccv_nnc_stream_context_set_neighbor_discovery(stream_0, _ccv_nnc_dynamic_graph_neighbor_context_discovery, &discovery);
628
72
      }
629
74
      PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size);
630
74
      int k;
631
204
      for (k = 0; k < per_input_size; 
k++130
)
632
130
      {
633
130
        PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", 
k + 1, input_tensors[k + i * per_input_size], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(input_tensors[k + i * per_input_size]->info.type) : -1));
634
130
        if (input_tensors[k + i * per_input_size] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO))
635
0
          ccv_nnc_print_tensor_info(input_tensors[k + i * per_input_size]);
636
130
        PRINT(CCV_CLI_INFO, "\n");
637
130
      }
638
156
      for (k = 0; k < per_output_size; 
k++82
)
639
82
      {
640
82
        PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", 
k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(output_tensors[k]->info.type) : -1));
641
82
        if (output_tensors[k] && 
CCV_CLI_OUTPUT_LEVEL_IS74
(CCV_CLI_INFO))
642
0
          ccv_nnc_print_tensor_shape(output_tensors[k]);
643
82
        PRINT(CCV_CLI_INFO, "\n");
644
82
      }
645
74
      const int status = ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, stream_0);
646
74
      if (status != 0)
647
0
        PRINT(CCV_CLI_INFO, "Invalid Status: %d\n", status);
648
74
      if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
649
0
      {
650
0
        for (k = 0; k < per_output_size; k++)
651
0
        {
652
0
          PRINT(CCV_CLI_VERBOSE, "POST: |<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? CCV_TENSOR_GET_DEVICE_ID(output_tensors[k]->info.type) : -1));
653
0
          if (output_tensors[k])
654
0
            ccv_nnc_print_tensor_info(output_tensors[k]);
655
0
          PRINT(CCV_CLI_VERBOSE, "\n");
656
0
        }
657
0
      }
658
74
      if (stream_context && 
stream_044
)
659
42
      {
660
42
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
661
42
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
662
42
      }
663
74
      streams[i] = stream_0;
664
74
    }
665
23
    if (!stream_context)
666
39
      
for (i = 0; 9
i < parallel_count;
i++30
)
667
30
        if (streams[i])
668
30
          ccv_nnc_stream_context_wait(streams[i]);
669
15.3k
  } else {
670
30.9k
    for (i = 0; i < per_output_size; 
i++15.5k
)
671
15.5k
      output_tensors[i] = outputs[i] ? ccv_nnc_tensor_from_variable(graph, outputs[i], stream_context) : 
0199
;
672
15.3k
    PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size);
673
43.3k
    for (i = 0; i < per_input_size; 
i++28.0k
)
674
28.0k
    {
675
28.0k
      PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", 
i + 1, input_tensors[i], (input_tensors[i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(input_tensors[i]->info.type) : -1));
676
28.0k
      if (input_tensors[i] && 
CCV_CLI_OUTPUT_LEVEL_IS28.0k
(CCV_CLI_INFO))
677
0
        ccv_nnc_print_tensor_info(input_tensors[i]);
678
28.0k
      PRINT(CCV_CLI_INFO, "\n");
679
28.0k
    }
680
15.3k
    ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors, per_input_size, output_tensors, per_output_size, stream_context);
681
30.9k
    for (i = 0; i < per_output_size; 
i++15.5k
)
682
15.5k
    {
683
15.5k
      PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", 
i + 1, output_tensors[i], (output_tensors[i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(output_tensors[i]->info.type) : -1));
684
15.5k
      if (output_tensors[i] && 
CCV_CLI_OUTPUT_LEVEL_IS15.3k
(CCV_CLI_INFO))
685
0
        ccv_nnc_print_tensor_info(output_tensors[i]);
686
15.5k
      PRINT(CCV_CLI_INFO, "\n");
687
15.5k
    }
688
15.3k
  }
689
15.3k
  int inputs_are_constants = 1;
690
30.7k
  for (i = 0; inputs_are_constants && 
i < input_size15.3k
;
i++15.3k
)
691
15.3k
    if (inputs[i] && 
inputs[i]->type != CCV_NNC_TENSOR_CONSTANT15.3k
)
692
15.3k
      inputs_are_constants = 0;
693
15.3k
  if (input_size > 0 && 
!inputs_are_constants15.3k
&&
!graph->no_grad15.3k
) // No need to record the execution if there is no input or we disabled gradient computation.
694
15.3k
  {
695
15.3k
    ccv_nnc_tensor_symbol_t output_symbols[ccv_max(1, output_size)];
696
30.8k
    for (i = 0; i < output_size; 
i++15.5k
)
697
15.5k
      if (outputs[i])
698
15.3k
      {
699
15.3k
        assert(outputs[i]->type != CCV_NNC_TENSOR_CONSTANT);
700
15.3k
        output_symbols[i] = _ccv_nnc_tensor_symbol_from_variable(graph, outputs[i]);
701
15.3k
      } else
702
207
        output_symbols[i] = NO_TENSOR_SYMBOL;
703
15.3k
    int t;
704
30.6k
    for (t = 0; t < parallel_count; 
t++15.3k
)
705
15.3k
    {
706
15.3k
      ccv_nnc_graph_exec_symbol_t graph_exec = ccv_nnc_graph_exec_symbol_new(graph->tape, cmd, input_symbols + t * per_input_size, per_input_size, output_symbols + t * per_output_size, per_output_size, 0);
707
15.3k
      if (graph_execs)
708
2.40k
        graph_execs[t] = graph_exec;
709
      // This needs to be done before we set the new sources on the outputs.
710
43.4k
      for (i = 0; i < per_input_size; 
i++28.0k
)
711
28.0k
      {
712
28.0k
        ccv_array_t* const input_source = input_sources[i + t * per_input_size];
713
28.0k
        if (input_source)
714
28.1k
          
for (j = 0; 14.0k
j < input_source->rnum;
j++14.0k
)
715
14.0k
            ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){
716
14.0k
              .d = *(int*)ccv_array_get(input_source, j),
717
14.0k
              .graph = graph->tape
718
14.0k
            }, graph_exec);
719
28.0k
        ccv_array_t* const input_alias_source = input_alias_sources[i + t * per_input_size];
720
28.0k
        if (input_alias_source)
721
2.02k
          
for (j = 0; 1.01k
j < input_alias_source->rnum;
j++1.01k
)
722
1.01k
            ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){
723
1.01k
              .d = *(int*)ccv_array_get(input_alias_source, j),
724
1.01k
              .graph = graph->tape
725
1.01k
            }, graph_exec);
726
28.0k
      }
727
43.4k
      for (i = 0; i < per_input_size; 
i++28.0k
)
728
28.0k
      {
729
28.0k
        ccv_nnc_tensor_variable_t const input = inputs[i + t * per_input_size];
730
28.0k
        if (!input || 
input->type == CCV_NNC_TENSOR_CONSTANT28.0k
)
731
236
          continue;
732
27.8k
        ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i + t * per_input_size].d);
733
27.8k
        if (!bind->destinations)
734
22.0k
          bind->destinations = ccv_array_new(sizeof(int), 1, 0);
735
27.8k
        ccv_array_add_unique_int(bind->destinations, graph_exec.d);
736
27.8k
        if (input->alias_index_ref)
737
1.01k
        {
738
1.01k
            const int alias_index = input->alias_index_ref - 1;
739
1.01k
            assert(alias_index >= 0);
740
1.01k
            ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
741
1.01k
            ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d);
742
1.01k
            if (!root_bind->destinations)
743
1.01k
              root_bind->destinations = ccv_array_new(sizeof(int), 1, 0);
744
1.01k
            ccv_array_add_unique_int(root_bind->destinations, graph_exec.d);
745
1.01k
        }
746
27.8k
      }
747
30.9k
      
for (i = 0; 15.3k
i < per_output_size;
i++15.5k
)
748
15.5k
      {
749
15.5k
        ccv_nnc_tensor_variable_t const output = outputs[i + t * per_output_size];
750
15.5k
        if (!output)
751
207
          continue;
752
15.3k
        ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, output_symbols[i + t * per_output_size].d);
753
15.3k
        assert(!bind->sources); // This is a new symbol, therefore, no binded sources associated yet.
754
15.3k
        bind->sources = ccv_array_new(sizeof(int), 1, 0);
755
15.3k
        ccv_array_add_unique_int(bind->sources, graph_exec.d);
756
15.3k
        if (output->alias_index_ref)
757
8
        {
758
8
          const int alias_index = output->alias_index_ref - 1;
759
8
          assert(alias_index >= 0);
760
8
          ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
761
8
          ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d);
762
8
          if (!root_bind->sources)
763
4
            root_bind->sources = ccv_array_new(sizeof(int), 1, 0);
764
8
          ccv_array_add_unique_int(root_bind->sources, graph_exec.d);
765
8
        }
766
15.3k
      }
767
15.3k
    }
768
15.3k
  }
769
  // Now, able to free some of the reused outputs.
770
15.7k
  
for (i = 0; 15.3k
i < freeable_size;
i++415
)
771
415
    ccv_nnc_tensor_variable_free(graph, freeables[i]);
772
15.3k
}
773
774
int ccv_nnc_dynamic_graph_exec(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context)
775
12.9k
{
776
12.9k
  ccv_nnc_dynamic_graph_exec_ret(graph, cmd, hint, flags, inputs, input_size, outputs, output_size, parallel, stream_context, 0);
777
12.9k
  return CCV_NNC_EXEC_SUCCESS;
778
12.9k
}
779
780
static int _ccv_nnc_tensor_variable_is_only_output(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_graph_bind_t* bind, const int symbol_d)
781
17.5k
{
782
17.5k
  if (bind->alias_ref)
783
1.01k
    bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, bind->alias_ref - 1);
784
17.5k
  if (!bind->sources || bind->sources->rnum == 0)
785
0
    return 1;
786
17.5k
  int i;
787
33.9k
  for (i = 0; i < bind->sources->rnum; 
i++16.4k
)
788
17.5k
  {
789
17.5k
    const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i);
790
17.5k
    const ccv_nnc_graph_exec_symbol_t exec_symbol = {
791
17.5k
      .d = exec_symbol_d,
792
17.5k
      .graph = graph->tape
793
17.5k
    };
794
17.5k
    const int* outputs; int output_size;
795
17.5k
    ccv_nnc_graph_exec_symbol_io(graph->tape, exec_symbol, 0, 0, &outputs, &output_size);
796
17.5k
    int j;
797
34.0k
    for (j = 0; j < output_size; 
j++16.5k
)
798
17.5k
      if (outputs[j] >= 0 && 
outputs[j] != symbol_d17.5k
) // If output is me, it is the only output.
799
1.04k
      {
800
1.04k
        assert(outputs[j] < graph->binds->rnum);
801
1.04k
        const ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
802
        // This is in use and is it not a constant symbol.
803
1.04k
        if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT25
)
804
25
          return 0;
805
1.01k
        if (other_bind->alias_ref) // If this is alias, use its original's destinations.
806
1
          other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
807
        // The original is in use and is it not a constant symbol.
808
1.01k
        if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT1
)
809
1
          return 0;
810
1.01k
        if (other_bind->destinations && other_bind->destinations->rnum > 0)
811
1.00k
          return 0;
812
1.01k
      }
813
17.5k
  }
814
16.4k
  return 1;
815
17.5k
}
816
817
static void _ccv_nnc_update_bind_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws)
818
24.9k
{
819
24.9k
  int i;
820
24.9k
  if (bind->destinations)
821
24.7k
  {
822
24.7k
    int flag = 0;
823
49.3k
    for (i = 0; !flag && 
i < bind->destinations->rnum24.7k
;
i++24.5k
)
824
24.5k
    {
825
24.5k
      const int exec_symbol_d = *(int*)ccv_array_get(bind->destinations, i);
826
24.5k
      if (exec_symbol_d == freed_exec_symbol_d)
827
24.5k
      {
828
24.5k
        if (i < bind->destinations->rnum - 1)
829
17
          *(int*)ccv_array_get(bind->destinations, i) = *(int*)ccv_array_get(bind->destinations, bind->destinations->rnum - 1);
830
24.5k
        --bind->destinations->rnum;
831
24.5k
        flag = 1;
832
24.5k
      }
833
24.5k
    }
834
    // This symbol can be freed.
835
24.7k
    if (flag && 
bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED24.5k
)
836
17.6k
    {
837
17.6k
      ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
838
17.6k
      if (bind->alias_ref)
839
1.01k
      {
840
1.01k
        root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1);
841
1.01k
        if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
842
1.01k
          root_bind = bind;
843
1.01k
      }
844
      // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more.
845
      // It is possible because exec will be freed already, thus, it is safe to remove this alias out.
846
17.6k
      if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED &&
847
17.6k
        
(17.6k
(17.6k
!root_bind->sources17.6k
||
root_bind->sources->rnum == 08.64k
) ||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)6.01k
) &&
848
17.6k
        
root_bind->destinations->rnum == 017.6k
)
849
17.6k
      {
850
17.6k
        if (root_bind->sources)
851
14.6k
          
for (i = 0; 8.63k
i < root_bind->sources->rnum;
i++6.00k
)
852
6.00k
            ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
853
17.6k
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
854
17.6k
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
855
17.6k
          .d = tensor_index,
856
17.6k
          .graph = graph->tape
857
17.6k
        });
858
17.6k
      } else 
if (8
bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED8
&& // Handle the case the bind is already freed, and it doesn't have any sources or destinations.
859
8
        bind->alias_ref && 
(2
!bind->sources2
||
bind->sources->rnum == 00
) &&
(2
!bind->destinations2
||
bind->destinations->rnum == 02
)) {
860
2
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
861
2
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
862
2
          .d = tensor_index,
863
2
          .graph = graph->tape
864
2
        });
865
2
      }
866
17.6k
    }
867
24.7k
  }
868
24.9k
}
869
870
static void _ccv_nnc_update_bind_sources_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws)
871
7.29k
{
872
7.29k
  int i;
873
7.29k
  if (bind->sources)
874
7.29k
  {
875
7.29k
    int flag = 0;
876
14.5k
    for (i = 0; !flag && 
i < bind->sources->rnum7.29k
;
i++7.29k
)
877
7.29k
    {
878
7.29k
      const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i);
879
7.29k
      if (exec_symbol_d == freed_exec_symbol_d)
880
7.29k
      {
881
7.29k
        if (i < bind->sources->rnum - 1)
882
2
          *(int*)ccv_array_get(bind->sources, i) = *(int*)ccv_array_get(bind->sources, bind->sources->rnum - 1);
883
7.29k
        --bind->sources->rnum;
884
7.29k
        flag = 1;
885
7.29k
      }
886
7.29k
    }
887
7.29k
    if (flag && !bind->alias_ref && 
bind->index >= 07.28k
&&
bind->type == CCV_NNC_TENSOR_CONSTANT4.84k
&& // If it is detached (constant but previously has sources). Now can check again.
888
7.29k
      
(3
bind->sources->rnum == 03
||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)0
) &&
889
7.29k
      
(3
!bind->destinations3
||
bind->destinations->rnum == 03
))
890
3
    {
891
      // If this is constant, set it to be no symbol again.
892
3
      ccv_nnc_tensor_variable_t tv = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, bind->index);
893
3
      tv->symbol = NO_TENSOR_SYMBOL;
894
3
      _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
895
3
      ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
896
3
        .d = tensor_index,
897
3
        .graph = graph->tape
898
3
      });
899
7.28k
    } else if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED) {
900
      // This symbol can be freed.
901
2.44k
      ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
902
2.44k
      if (bind->alias_ref)
903
3
      {
904
3
        root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1);
905
3
        if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
906
0
          root_bind = bind;
907
3
      }
908
      // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more.
909
      // It is possible because exec will be freed already, thus, it is safe to remove this alias out.
910
2.44k
      if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED &&
911
2.44k
        
(2.43k
root_bind->sources->rnum == 02.43k
||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)0
) &&
912
2.44k
        
(2.43k
!root_bind->destinations2.43k
||
root_bind->destinations->rnum == 02.43k
))
913
6
      {
914
6
        for (i = 0; i < root_bind->sources->rnum; 
i++0
)
915
0
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
916
6
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
917
6
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
918
6
          .d = tensor_index,
919
6
          .graph = graph->tape
920
6
        });
921
2.43k
      } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations.
922
2.43k
        bind->alias_ref && 
(3
!bind->sources3
||
bind->sources->rnum == 03
) &&
(3
!bind->destinations3
||
bind->destinations->rnum == 00
)) {
923
3
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
924
3
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
925
3
          .d = tensor_index,
926
3
          .graph = graph->tape
927
3
        });
928
3
      }
929
2.44k
    }
930
7.29k
  }
931
7.29k
}
932
933
static void _ccv_nnc_update_bind_sources_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, const int* const inputs, const int input_size, const int* const outputs, const int output_size, ccv_array_t* const ws)
934
15.3k
{
935
15.3k
  int i;
936
43.2k
  for (i = 0; i < input_size; 
i++27.9k
)
937
27.9k
    if (inputs[i] >= 0 && inputs[i] < binds->rnum)
938
27.9k
    {
939
27.9k
      ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, inputs[i]);
940
27.9k
      if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
941
4.01k
        continue;
942
23.9k
      if (bind->alias_ref)
943
1.01k
      {
944
1.01k
        const int alias_to = bind->alias_ref - 1;
945
1.01k
        ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to);
946
1.01k
        if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE)
947
1.01k
          _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws);
948
1.01k
      }
949
23.9k
      _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, bind, inputs[i], ws);
950
23.9k
    }
951
  // Note that this works because there is no overlap of inputs / outputs. (What about alias?).
952
30.8k
  for (i = 0; i < output_size; 
i++15.5k
)
953
15.5k
    if (outputs[i] >= 0 && 
outputs[i] < binds->rnum15.3k
)
954
15.3k
    {
955
15.3k
      ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, outputs[i]);
956
15.3k
      if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
957
8.02k
        continue;
958
7.28k
      if (bind->alias_ref)
959
5
      {
960
5
        const int alias_to = bind->alias_ref - 1;
961
5
        ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to);
962
5
        if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE)
963
5
          _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws);
964
5
      }
965
7.28k
      _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, bind, outputs[i], ws);
966
7.28k
    }
967
15.3k
}
968
969
static void _ccv_nnc_stateful_exec_free_if_possible(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol)
970
15.3k
{
971
15.3k
  if (!graph->stateful_execs)
972
6.06k
    return;
973
9.23k
  assert(symbol.d >= 0);
974
9.23k
  ccv_array_t* const stateful_execs = graph->stateful_execs;
975
9.23k
  ccv_nnc_cmd_t cmd = ccv_nnc_graph_exec_symbol_cmd(graph->tape, symbol);
976
9.23k
  ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data;
977
9.23k
  if (!stateful_exec)
978
6.83k
    return;
979
  // If there is no backward, no need to apply gradients.
980
  // Otherwise, if we applied gradients, we can free it as well.
981
  // We don't free this stateful exec because apply gradients doesn't require any variables alive.
982
2.40k
  if (!stateful_exec->did_backward_but_not_apply_gradients)
983
300
  {
984
300
    const int index = stateful_exec->index;
985
300
    ccfree(stateful_exec);
986
300
    if (index < graph->reuse_stateful_exec || graph->reuse_stateful_exec < 0)
987
300
      graph->reuse_stateful_exec = index;
988
300
    *(ccv_nnc_stateful_exec_t**)ccv_array_get(stateful_execs, index) = 0;
989
300
  } else
990
2.10k
    stateful_exec->should_free = 1;
991
2.40k
}
992
993
static int _ccv_nnc_tensor_bind_trace_forward_to_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_graph_bind_t* const bind, ccv_nnc_tensor_variable_graph_bind_t* const root_bind, int* const ws_start, const int assuming_no_source) // assuming_no_source means we are going to remove sources if possible, thus, it is irrelevant.
994
27.4k
{
995
27.4k
  int can_free_symbol = 0;
996
27.4k
  const int sources_and_is_only_output = (root_bind->sources && 
root_bind->sources->rnum > 016.3k
) &&
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d)11.4k
;
997
27.4k
  if (!root_bind->sources || 
root_bind->sources->rnum == 016.3k
||
sources_and_is_only_output11.4k
||
assuming_no_source1.02k
)
998
26.4k
  {
999
26.4k
    int i, j;
1000
26.4k
    can_free_symbol = 1; // Assume we can free this symbol.
1001
26.4k
    if (!graph->ws)
1002
18
      graph->ws = ccv_array_new(sizeof(int), root_bind->destinations ? root_bind->destinations->rnum : 
00
, 0);
1003
26.4k
    ccv_array_t* const ws = graph->ws;
1004
26.4k
    ccv_array_clear(ws);
1005
26.4k
    if (root_bind->destinations)
1006
43.4k
      
for (i = 0; 21.9k
i < root_bind->destinations->rnum;
i++21.5k
)
1007
21.5k
        ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->destinations, i));
1008
26.4k
    const int ws_init_size = ws->rnum;
1009
26.4k
    *ws_start = ws_init_size;
1010
    // Add all sources from root_bind, in case it has been freed (during update bind sources / destinations when free.
1011
26.4k
    if (root_bind->sources)
1012
25.7k
      
for (i = 0; 15.2k
i < root_bind->sources->rnum;
i++10.4k
)
1013
10.4k
        ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
1014
    // If we cannot loop over any exec symbols (this is not in use). It is simple to determine whether we want
1015
    // to free it or not: if this is an alias and the origin is not freed, we cannot free this symbol.
1016
26.4k
    if (ws_init_size == 0)
1017
4.91k
      can_free_symbol = (!bind->alias_ref || 
root_bind->index < 018
);
1018
    // Go through all the exec symbols use this tensor, to see whether they have inputs that has other sources.
1019
47.9k
    for (i = 0; i < ws_init_size; 
i++21.5k
)
1020
21.5k
    {
1021
21.5k
      const int exec_symbol_d = *(int*)ccv_array_get(ws, i);
1022
21.5k
      const ccv_nnc_graph_exec_symbol_t symbol = {
1023
21.5k
        .d = exec_symbol_d,
1024
21.5k
        .graph = graph->tape
1025
21.5k
      };
1026
21.5k
      const int* inputs; int input_size;
1027
21.5k
      const int* outputs; int output_size;
1028
21.5k
      ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size);
1029
21.5k
      int flag = 0; // flag denotes whether there are cases to keep this exec symbol.
1030
21.5k
      if (!root_bind->sources || 
root_bind->sources->rnum == 010.8k
||
assuming_no_source8.44k
)
1031
13.1k
      {
1032
        // If there is no sources, check if other sources can depend on this exec, if they do, we cannot free this.
1033
36.8k
        for (j = 0; !flag && 
j < input_size28.6k
;
j++23.7k
)
1034
23.7k
          if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum && inputs[j] != tensor_variable->symbol.d)
1035
14.4k
          {
1036
14.4k
            ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]);
1037
14.4k
            if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT6.24k
)
1038
6.22k
              flag = 1;
1039
8.26k
            else {
1040
8.26k
              if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1041
4
                other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1042
8.26k
              flag = (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT21
) || (other_bind->type != CCV_NNC_TENSOR_CONSTANT &&
other_bind->sources8.23k
&&
other_bind->sources->rnum > 02.22k
); // Constant should have no source, or it is detached.
1043
8.26k
            }
1044
14.4k
          }
1045
13.1k
      } else {
1046
        // If there are sources, check whether we have outputs or not. If we do, we cannot free this.
1047
16.9k
        for (j = 0; !flag && 
j < output_size8.45k
;
j++8.45k
)
1048
8.45k
          if (outputs[j] >= 0 && 
outputs[j] < graph->binds->rnum8.45k
)
1049
8.45k
          {
1050
8.45k
            ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
1051
8.45k
            if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT4.43k
)
1052
4.43k
              flag = 1;
1053
4.02k
            else {
1054
4.02k
              if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1055
0
                other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1056
4.02k
              flag = (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT0
) || (other_bind->destinations && other_bind->destinations->rnum > 0);
1057
4.02k
            }
1058
8.45k
          }
1059
8.44k
      }
1060
      // This exec can be freed if there is no input required or there is no output required.
1061
21.5k
      can_free_symbol = (can_free_symbol && !flag);
1062
21.5k
      if (!flag)
1063
4.87k
      {
1064
        // Go over inputs and remove all references from binded destinations.
1065
        // and go over outputs remove all references from binded sources.
1066
4.87k
        _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws);
1067
4.87k
        const int* outgoings; int outgoing_size;
1068
4.87k
        ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size);
1069
7.30k
        for (j = 0; j < outgoing_size; 
j++2.43k
)
1070
2.43k
          ccv_array_add_unique_int(ws, outgoings[j]);
1071
4.87k
        _ccv_nnc_stateful_exec_free_if_possible(graph, symbol);
1072
4.87k
        ccv_nnc_graph_exec_symbol_free(graph->tape, symbol);
1073
4.87k
      }
1074
21.5k
    }
1075
26.4k
  }
1076
27.4k
  return can_free_symbol;
1077
27.4k
}
1078
1079
static void _ccv_nnc_tensor_bind_trace_backward_to_free(ccv_nnc_dynamic_graph_t* const graph, ccv_array_t* const ws, const int ws_start)
1080
9.75k
{
1081
9.75k
  int i, j;
1082
  // Now, go over the outgoings, if it is removed, add more to it. Note that the ws array can grow while iterating over.
1083
22.6k
  for (i = ws_start; i < ws->rnum; 
i++12.8k
)
1084
12.8k
  {
1085
12.8k
    const int exec_symbol_d = *(int*)ccv_array_get(ws, i);
1086
12.8k
    const ccv_nnc_graph_exec_symbol_t symbol = {
1087
12.8k
      .d = exec_symbol_d,
1088
12.8k
      .graph = graph->tape
1089
12.8k
    };
1090
12.8k
    const int* inputs; int input_size;
1091
12.8k
    const int* outputs; int output_size;
1092
12.8k
    ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size);
1093
12.8k
    int flag = 0;
1094
29.9k
    for (j = 0; !flag && 
j < input_size19.5k
;
j++17.1k
)
1095
17.1k
      if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum)
1096
17.1k
      {
1097
17.1k
        ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]);
1098
17.1k
        if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT4.64k
)
1099
4.44k
          flag = 1;
1100
12.6k
        else {
1101
12.6k
          if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1102
1.02k
            other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1103
12.6k
          flag = (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT203
) || (other_bind->type != CCV_NNC_TENSOR_CONSTANT &&
other_bind->sources12.4k
&&
other_bind->sources->rnum > 010.4k
);
1104
12.6k
        }
1105
17.1k
      }
1106
12.8k
    if (flag) // If any inputs make free this destination impossible. Check whether all its outputs are done.
1107
10.4k
    {
1108
10.4k
      int output_flag = 0;
1109
21.1k
      for (j = 0; !output_flag && 
j < output_size18.6k
;
j++10.6k
)
1110
10.6k
        if (outputs[j] >= 0 && 
outputs[j] < graph->binds->rnum10.4k
)
1111
10.4k
        {
1112
10.4k
          ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
1113
10.4k
          if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT2.41k
)
1114
2.41k
            output_flag = 1;
1115
8.04k
          else {
1116
8.04k
            if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1117
0
              other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1118
8.04k
            output_flag = (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT0
) || (other_bind->destinations &&
other_bind->destinations->rnum > 020
);
1119
8.04k
          }
1120
10.4k
        }
1121
10.4k
      if (!output_flag) // If no output is used (used means it has a tensor variable, or it has a destination).
1122
8.02k
        flag = 0;
1123
10.4k
    }
1124
    // Went over all the inputs, it turns out no more inputs has other references, safe to remove.
1125
12.8k
    if (!flag)
1126
10.4k
    {
1127
10.4k
      _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws);
1128
10.4k
      const int* outgoings; int outgoing_size;
1129
10.4k
      ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size);
1130
      // It it has outgoings, add that for further inspection.
1131
12.8k
      for (j = 0; j < outgoing_size; 
j++2.40k
)
1132
2.40k
        ccv_array_add_unique_int(ws, outgoings[j]);
1133
10.4k
      _ccv_nnc_stateful_exec_free_if_possible(graph, symbol);
1134
10.4k
      ccv_nnc_graph_exec_symbol_free(graph->tape, symbol);
1135
10.4k
    }
1136
12.8k
  }
1137
9.75k
}
1138
1139
void ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
1140
32.1k
{
1141
  // If it contains a symbol, this tensor variable is not a free variable. It is either used as input or output.
1142
32.1k
  if (tensor_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1143
27.4k
  {
1144
    // If it is not a free variable, when can we free the symbol and the underlying variable?
1145
    // 1. There should be no sources (the command generate this tensor should be freed) or the output of these sources is only the current one;
1146
    // 2. The destinations (the commands that uses this tensor) should have no other inputs, or the other inputs has no binded sources as well.
1147
27.4k
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d);
1148
    // There should be no source associated with it no more.
1149
    // I am free if no exec symbol is producing me or the symbol producing me can only producing me (thus, it is not required to
1150
    // compute gradient because I am the only variable it can compute gradient for).
1151
27.4k
    ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
1152
27.4k
    if (bind->alias_ref)
1153
1.03k
    {
1154
1.03k
      const int alias_to = bind->alias_ref - 1;
1155
1.03k
      root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, alias_to);
1156
1.03k
    }
1157
27.4k
    int ws_start;
1158
27.4k
    const int can_free_symbol = _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, root_bind, &ws_start, 0);
1159
27.4k
    if (can_free_symbol)
1160
9.74k
    {
1161
9.74k
      _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
1162
9.74k
      ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
1163
9.74k
      _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start);
1164
17.7k
    } else { // If this symbol is not freed, move the tensor view to the bind.
1165
      // If current bind is an alias, and it doesn't have any sources or destinations. We cannot find this alias
1166
      // through any exec. This is not only safe to delete, but has to be deleted. We don't need to handle this
1167
      // if free_symbol is true, because when that happens, root_bind will be deleted, and we will clean up the
1168
      // alias in that process.
1169
17.7k
      if (bind->alias_ref && 
(1.03k
!bind->sources1.03k
||
bind->sources->rnum == 05
) &&
(1.03k
!bind->destinations1.03k
||
bind->destinations->rnum == 01.01k
))
1170
20
      {
1171
20
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
1172
20
        ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
1173
17.6k
      } else {
1174
17.6k
        bind->index = CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED; // This tensor variable will be freed, but this symbol extra will continue exists.
1175
17.6k
        bind->destructor_hook.func = tensor_variable->destructor_hook.func; // Transfer the destructor callback.
1176
17.6k
        bind->destructor_hook.context = tensor_variable->destructor_hook.context; // Transfer the destructor callback context.
1177
17.6k
        bind->tensor_view = tensor_variable->tensor_view; // Transfer the ownership to the bind.
1178
17.6k
        tensor_variable->tensor_view = 0;
1179
17.6k
      }
1180
17.7k
    }
1181
27.4k
  }
1182
32.1k
  _ccv_nnc_tensor_variable_free(graph, tensor_variable, 1);
1183
32.1k
}
1184
1185
void ccv_nnc_tensor_variable_detach(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
1186
6
{
1187
  // This cannot be an alias.
1188
6
  assert(!tensor_variable->alias_index_ref);
1189
  // If no computation done yet, mark this as constant.
1190
6
  if (tensor_variable->symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1191
0
  {
1192
0
    tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
1193
0
    return;
1194
0
  }
1195
  // Otherwise, we need to do some book keeping updates to make sure it doesn't participate gradient computation any more.
1196
6
  ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d);
1197
  // Because tensor variable cannot be alias, its bind cannot have alias pointer.
1198
6
  assert(!bind->alias_ref);
1199
  // Go through to break ties between sources and destinations.
1200
6
  int i, j;
1201
6
  if (bind->sources && bind->destinations)
1202
6
  {
1203
11
    for (i = 0; i < bind->sources->rnum; 
i++5
)
1204
5
    {
1205
5
      const int s = *(int*)ccv_array_get(bind->sources, i);
1206
5
      const int* outputs; int output_size;
1207
5
      const ccv_nnc_graph_exec_symbol_t s_symbol = {
1208
5
        .d = s,
1209
5
        .graph = graph->tape
1210
5
      };
1211
5
      ccv_nnc_graph_exec_symbol_io(graph->tape, s_symbol, 0, 0, &outputs, &output_size);
1212
10
      for (j = 0; j < bind->destinations->rnum; 
j++5
)
1213
5
      {
1214
5
        const int d = *(int*)ccv_array_get(bind->destinations, j);
1215
5
        const ccv_nnc_graph_exec_symbol_t d_symbol = {
1216
5
          .d = d,
1217
5
          .graph = graph->tape
1218
5
        };
1219
5
        const int* inputs; int input_size;
1220
5
        ccv_nnc_graph_exec_symbol_io(graph->tape, d_symbol, &inputs, &input_size, 0, 0);
1221
5
        int x, y;
1222
5
        int flag = 0; // Whether we find a symbol that connects source and destination but not the current one we detach. If found, we cannot break the tie between s_symbol and d_symbol.
1223
10
        for (x = 0; !flag && x < output_size; 
x++5
)
1224
5
        {
1225
5
          ccv_nnc_tensor_symbol_t x_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){
1226
5
            .d = outputs[x],
1227
5
            .graph = graph->tape
1228
5
          });
1229
5
          if (x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1230
5
          {
1231
5
            x_symbol.d = outputs[x];
1232
5
            x_symbol.graph = graph->tape;
1233
5
          }
1234
5
          if (x_symbol.d == tensor_variable->symbol.d || 
x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL0
)
1235
5
            continue;
1236
0
          for (y = 0; !flag && y < input_size; y++)
1237
0
          {
1238
0
            ccv_nnc_tensor_symbol_t y_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){
1239
0
              .d = inputs[y],
1240
0
              .graph = graph->tape
1241
0
            });
1242
0
            if (y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1243
0
            {
1244
0
              y_symbol.d = inputs[y];
1245
0
              y_symbol.graph = graph->tape;
1246
0
            }
1247
0
            if (y_symbol.d == tensor_variable->symbol.d || y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1248
0
              continue;
1249
0
            flag = (x_symbol.d == y_symbol.d);
1250
0
          }
1251
0
        }
1252
5
        if (!flag)
1253
5
          ccv_nnc_graph_exec_symbol_disjoin(graph->tape, s_symbol, d_symbol);
1254
5
      }
1255
5
    }
1256
6
  }
1257
6
  const int sources_and_is_only_output = (bind->sources && bind->sources->rnum > 0) && 
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d)5
;
1258
6
  if (!bind->sources || bind->sources->rnum == 0 || 
sources_and_is_only_output5
)
1259
6
  {
1260
6
    int ws_start = -1;
1261
6
    _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, bind, &ws_start, 1);
1262
    // Because we are detaching from the graph, there is no need to forward trace to see if it is not used and
1263
    // then to remove the source execs. We can remove them right now, breaking the graph in two. That is why
1264
    // we called trace backward to free regardless the outcome of the forward to free.
1265
6
    if (ws_start == -1)
1266
0
    {
1267
0
      if (!graph->ws)
1268
0
        graph->ws = ccv_array_new(sizeof(int), bind->destinations ? bind->destinations->rnum : 0, 0);
1269
0
      ccv_array_t* const ws = graph->ws;
1270
0
      ccv_array_clear(ws);
1271
0
      if (bind->sources)
1272
0
        for (i = 0; i < bind->sources->rnum; i++)
1273
0
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(bind->sources, i));
1274
0
      ws_start = 0;
1275
0
    }
1276
6
    _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start);
1277
6
  }
1278
  // If now bind has no relevant sources or destinations, we can safely free the underlying tensor symbol.
1279
6
  if ((!bind->sources || bind->sources->rnum == 0) && 
(1
!bind->destinations1
||
bind->destinations->rnum == 01
))
1280
1
  {
1281
1
    _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
1282
1
    ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
1283
1
    tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
1284
1
    tensor_variable->symbol = NO_TENSOR_SYMBOL;
1285
1
    return;
1286
1
  }
1287
  // Mark both as constant, such that even if it cannot be freed now, it can be freed as soon as possible later.
1288
5
  bind->type = CCV_NNC_TENSOR_CONSTANT;
1289
5
  tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
1290
5
}
1291
1292
void ccv_nnc_dynamic_graph_has_effect_to_tensor_variables(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t* const source_variables, const int source_variable_size, const ccv_nnc_tensor_variable_t* const destination_variables, const int destination_variable_size, uint64_t* const bitmask)
1293
12
{
1294
12
  int i, j;
1295
12
  ccv_array_t* const sources_destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), source_variable_size + destination_variable_size, 0);
1296
31
  for (i = 0; i < source_variable_size; 
i++19
)
1297
19
  {
1298
19
    if (source_variables[i]->symbol.d < 0)
1299
0
      continue;
1300
19
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d);
1301
19
    if (bind->destinations && 
bind->destinations->rnum > 017
)
1302
42
      
for (j = 0; 17
j < bind->destinations->rnum;
j++25
)
1303
25
      {
1304
        // It is ok to have duplicate symbols.
1305
25
        const int d = *(int*)ccv_array_get(bind->destinations, j);
1306
25
        ccv_nnc_graph_exec_symbol_t symbol = {
1307
25
          .d = d,
1308
25
          .graph = graph->tape
1309
25
        };
1310
25
        ccv_array_push(sources_destinations, &symbol);
1311
25
      }
1312
19
  }
1313
12
  const int source_size = sources_destinations->rnum;
1314
24
  for (i = 0; i < destination_variable_size; 
i++12
)
1315
12
  {
1316
12
    if (destination_variables[i]->symbol.d < 0)
1317
0
      continue;
1318
12
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, destination_variables[i]->symbol.d);
1319
12
    if (bind->sources && bind->sources->rnum > 0)
1320
20
      
for (j = 0; 10
j < bind->sources->rnum;
j++10
)
1321
10
      {
1322
        // It is ok to have duplicate symbols.
1323
10
        const int d = *(int*)ccv_array_get(bind->sources, j);
1324
10
        ccv_nnc_graph_exec_symbol_t symbol = {
1325
10
          .d = d,
1326
10
          .graph = graph->tape
1327
10
        };
1328
10
        ccv_array_push(sources_destinations, &symbol);
1329
10
      }
1330
12
  }
1331
12
  const int destination_size = sources_destinations->rnum - source_size;
1332
12
  if (source_size == 0 || destination_size == 0)
1333
2
  {
1334
2
    ccv_array_free(sources_destinations);
1335
2
    return;
1336
2
  }
1337
10
  const int bitmask_size = ((source_size + 63) >> 6);
1338
10
  assert(bitmask_size < 256);
1339
10
  uint64_t exec_bitmask[bitmask_size];
1340
10
  ccv_nnc_symbolic_graph_sources_to_destinations(graph->tape, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, 0), source_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, source_size), destination_size, exec_bitmask);
1341
10
  int k = 0;
1342
27
  for (i = 0; i < source_variable_size; 
i++17
)
1343
17
  {
1344
17
    if (source_variables[i]->symbol.d < 0)
1345
0
    {
1346
0
      bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63));
1347
0
      continue;
1348
0
    }
1349
17
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d);
1350
17
    int flag = 0;
1351
17
    if (bind->destinations && 
bind->destinations->rnum > 015
)
1352
15
    {
1353
15
      assert(k <= source_size - bind->destinations->rnum);
1354
32
      
for (j = 0; 15
!flag &&
j < bind->destinations->rnum20
;
j++17
)
1355
17
        flag = (((uint64_t)1 << ((k + j) & 63)) & exec_bitmask[(k + j) >> 6]);
1356
15
      k += bind->destinations->rnum;
1357
15
    }
1358
17
    if (flag)
1359
12
      bitmask[i >> 6] |= ((uint64_t)1 << (i & 63));
1360
5
    else
1361
5
      bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63));
1362
17
  }
1363
10
  ccv_array_free(sources_destinations);
1364
10
}
1365
1366
int ccv_nnc_dynamic_graph_bookkeeping_count(const ccv_nnc_dynamic_graph_t* const graph, const int type)
1367
451
{
1368
451
  return ccv_nnc_symbolic_graph_active_symbol_count(graph->tape, type);
1369
451
}
1370
1371
void ccv_nnc_dynamic_graph_dot(const ccv_nnc_dynamic_graph_t* const graph, const int flags, FILE* out)
1372
416
{
1373
416
  ccv_nnc_symbolic_graph_dot(graph->tape, flags, out);
1374
416
}
1375
1376
void ccv_nnc_dynamic_graph_format(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
1377
0
{
1378
0
  ccv_nnc_symbolic_graph_format(graph->tape, 0, 0, 0, 0, format_fn, context);
1379
0
}