Coverage Report

Created: 2026-04-14 19:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_dynamic_graph.c
Line
Count
Source
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_nnc_easy.h"
5
#include "ccv_internal.h"
6
#include "_ccv_nnc_dynamic_graph.h"
7
8
// MARK - Level-4 API
9
10
ccv_nnc_dynamic_graph_t* ccv_nnc_dynamic_graph_new(void)
11
61
{
12
61
  ccv_nnc_dynamic_graph_t* graph = ccmalloc(sizeof(ccv_nnc_dynamic_graph_t));
13
61
  graph->no_grad = 0;
14
61
  graph->reuse_var = -1;
15
61
  graph->vars = ccv_array_new(sizeof(ccv_nnc_tensor_variable_t), 1, 0);
16
61
  graph->binds = ccv_array_new(sizeof(ccv_nnc_tensor_variable_graph_bind_t), 1, 0);
17
61
  graph->tape = ccv_nnc_symbolic_graph_new();
18
61
  graph->xpu_alloc.mp_hdr = -1;
19
61
  graph->xpu_alloc.freed = kh_init(dy_str);
20
61
  graph->xpu_alloc.allocd = kh_init(dy_alloc);
21
  // These may not be used as frequent, init as needed.
22
61
  graph->stateful_execs = 0;
23
61
  graph->reuse_stateful_exec = -1;
24
61
  graph->stream_map = 0;
25
61
  graph->ws = 0;
26
61
  return graph;
27
61
}
28
29
static void _ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int zeroing)
30
32.3k
{
31
32.3k
  const int index = tensor_variable->index;
32
32.3k
  if (tensor_variable->tensor_view)
33
14.6k
  {
34
14.6k
    if (tensor_variable->destructor_hook.func)
35
4
      tensor_variable->destructor_hook.func(graph, (ccv_nnc_tensor_t*)tensor_variable->tensor_view, tensor_variable->destructor_hook.context);
36
14.6k
    if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view))
37
14.2k
    {
38
14.2k
      if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))
39
7
        ccv_nnc_tensor_view_free(tensor_variable->tensor_view);
40
14.2k
      else {
41
14.2k
        if (!tensor_variable->alias_index_ref && // Return this memory to the graph.
42
14.2k
          
CCV_TENSOR_GET_MEMORY14.2k
(tensor_variable->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY14.2k
&&
tensor_variable->tensor_view->data.u8419
)
43
419
          ccv_nnc_xpu_free(&graph->xpu_alloc, tensor_variable->tensor_view->data.u8);
44
14.2k
        ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view);
45
14.2k
      }
46
14.2k
    }
47
14.6k
  }
48
32.3k
  ccfree(tensor_variable);
49
32.3k
  if (zeroing)
50
32.1k
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, index) = 0;
51
32.3k
  int i;
52
64.4k
  for (i = graph->vars->rnum - 1; i >= 0; 
i--32.1k
)
53
64.2k
    if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) != 0)
54
32.1k
    {
55
32.1k
      graph->vars->rnum = i + 1;
56
32.1k
      break;
57
32.1k
    }
58
32.3k
  if (index < graph->vars->rnum &&
59
32.3k
    
(23.3k
index < graph->reuse_var23.3k
||
graph->reuse_var < 017.3k
))
60
11.9k
    graph->reuse_var = index;
61
20.4k
  else if (graph->reuse_var >= graph->vars->rnum)
62
5.62k
    graph->reuse_var = -1;
63
32.3k
}
64
65
static void _ccv_nnc_tensor_variable_graph_bind_free(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int zeroing)
66
27.8k
{
67
27.8k
  bind->index = CCV_NNC_TENSOR_NO_VARIABLE;
68
27.8k
  if (bind->sources)
69
15.3k
    ccv_array_free(bind->sources);
70
27.8k
  if (bind->destinations)
71
23.0k
    ccv_array_free(bind->destinations);
72
27.8k
  if (bind->tensor_view)
73
17.7k
  {
74
17.7k
    if (bind->destructor_hook.func)
75
3
      bind->destructor_hook.func(graph, (ccv_nnc_tensor_t*)bind->tensor_view, bind->destructor_hook.context);
76
17.7k
    if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(bind->tensor_view))
77
17.4k
    {
78
17.4k
      if (CCV_IS_TENSOR_VIEW(bind->tensor_view))
79
1
        ccv_nnc_tensor_view_free(bind->tensor_view);
80
17.4k
      else {
81
17.4k
        if (!bind->alias_ref && // Return this memory to the graph.
82
17.4k
          
CCV_TENSOR_GET_MEMORY16.4k
(bind->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY16.4k
&&
bind->tensor_view->data.u838
)
83
38
          ccv_nnc_xpu_free(&graph->xpu_alloc, bind->tensor_view->data.u8);
84
17.4k
        ccv_nnc_tensor_free((ccv_nnc_tensor_t*)bind->tensor_view);
85
17.4k
      }
86
17.4k
    }
87
17.7k
  }
88
27.8k
  if (zeroing)
89
27.4k
  {
90
27.4k
    bind->sources = 0;
91
27.4k
    bind->destinations = 0;
92
27.4k
    bind->tensor_view = 0;
93
27.4k
    bind->destructor_hook.func = 0;
94
27.4k
    bind->destructor_hook.context = 0;
95
27.4k
  }
96
27.8k
}
97
98
void ccv_nnc_dynamic_graph_free(ccv_nnc_dynamic_graph_t* const graph)
99
61
{
100
61
  int i;
101
363
  for (i = 0; i < graph->vars->rnum; 
i++302
)
102
302
  {
103
302
    ccv_nnc_tensor_variable_t tensor_variable = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i);
104
302
    if (tensor_variable)
105
249
      _ccv_nnc_tensor_variable_free(graph, tensor_variable, 0);
106
302
  }
107
61
  ccv_array_free(graph->vars);
108
422
  for (i = 0; i < graph->binds->rnum; 
i++361
)
109
361
    _ccv_nnc_tensor_variable_graph_bind_free(graph, (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i), 0);
110
61
  ccv_array_free(graph->binds);
111
61
  ccv_nnc_symbolic_graph_free(graph->tape);
112
61
  if (graph->ws)
113
42
    ccv_array_free(graph->ws);
114
61
  if (graph->stateful_execs)
115
17
  {
116
43
    for (i = 0; i < graph->stateful_execs->rnum; 
i++26
)
117
26
    {
118
26
      ccv_nnc_stateful_exec_t* const stateful_exec = *(ccv_nnc_stateful_exec_t**)ccv_array_get(graph->stateful_execs, i);
119
26
      if (stateful_exec)
120
13
        ccfree(stateful_exec);
121
26
    }
122
17
    ccv_array_free(graph->stateful_execs);
123
17
  }
124
61
  if (graph->stream_map)
125
10
  {
126
10
    khiter_t k;
127
58
    for (k = 
kh_begin10
(graph->stream_map); k != kh_end(graph->stream_map);
++k48
)
128
48
    {
129
48
      if (!kh_exist(graph->stream_map, k))
130
25
        continue;
131
23
      ccv_nnc_stream_context_t* const stream = kh_val(graph->stream_map, k);
132
23
      ccv_nnc_stream_context_free(stream);
133
23
    }
134
10
    kh_destroy(stream_map, graph->stream_map);
135
10
  }
136
61
  ccv_nnc_xpu_alloc_destroy(&graph->xpu_alloc);
137
61
  ccfree(graph);
138
61
}
139
140
void ccv_nnc_tensor_variable_set(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_t* const tensor)
141
655
{
142
655
  assert(!tensor_variable->alias_index_ref);
143
655
  if (tensor_variable->tensor_view && 
!2
CCV_NNC_IS_EXTERN_TENSOR_VIEW2
(tensor_variable->tensor_view))
144
0
  {
145
0
    assert(!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view));
146
0
    ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view);
147
0
  }
148
655
  tensor_variable->info = tensor->info;
149
655
  tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)((uintptr_t)tensor | 1);
150
655
}
151
152
void ccv_nnc_tensor_variable_destructor_hook(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_destructor_f func, void* const context)
153
7
{
154
7
  tensor_variable->destructor_hook.func = func;
155
7
  tensor_variable->destructor_hook.context = context;
156
7
}
157
158
inline static void _ccv_nnc_tensor_variable_init(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_param_t info)
159
31.3k
{
160
31.3k
  tensor_variable->alias_index_ref = 0;
161
31.3k
  tensor_variable->alias_off = 0;
162
31.3k
  tensor_variable->destructor_hook.func = 0;
163
31.3k
  tensor_variable->destructor_hook.context = 0;
164
31.3k
  tensor_variable->info = info;
165
31.3k
  tensor_variable->symbol = NO_TENSOR_SYMBOL;
166
31.3k
  tensor_variable->tensor_view = 0;
167
31.3k
  if (graph->reuse_var >= 0)
168
802
  {
169
802
    const int reuse_var = graph->reuse_var;
170
802
    assert(reuse_var < graph->vars->rnum);
171
802
    tensor_variable->index = reuse_var;
172
802
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = tensor_variable;
173
802
    int i;
174
802
    graph->reuse_var = -1;
175
1.43k
    for (i = reuse_var + 1; i < graph->vars->rnum && 
graph->reuse_var < 01.02k
;
i++631
)
176
631
      if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0)
177
591
        graph->reuse_var = i;
178
30.5k
  } else {
179
30.5k
    tensor_variable->index = graph->vars->rnum;
180
30.5k
    ccv_array_push(graph->vars, &tensor_variable);
181
30.5k
  }
182
31.3k
}
183
184
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info)
185
31.3k
{
186
31.3k
  ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
187
31.3k
  tensor_variable->type = CCV_NNC_TENSOR_VARIABLE;
188
31.3k
  _ccv_nnc_tensor_variable_init(graph, tensor_variable, info);
189
31.3k
  return tensor_variable;
190
31.3k
}
191
192
ccv_nnc_tensor_variable_t ccv_nnc_tensor_constant_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info)
193
37
{
194
37
  ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
195
37
  tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
196
37
  _ccv_nnc_tensor_variable_init(graph, tensor_variable, info);
197
37
  return tensor_variable;
198
37
}
199
200
int ccv_nnc_tensor_variable_is_constant(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
201
0
{
202
0
  return tensor_variable->type == CCV_NNC_TENSOR_CONSTANT;
203
0
}
204
205
ccv_nnc_tensor_param_t ccv_nnc_tensor_variable_params(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
206
0
{
207
0
  return tensor_variable->info;
208
0
}
209
210
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_alias_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info)
211
1.04k
{
212
1.04k
  ccv_nnc_tensor_variable_t variable_alias = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
213
1.04k
  variable_alias->type = tensor_variable->type;
214
  // If the tensor variable is an alias itself, we point directly to its original.
215
1.04k
  if (tensor_variable->alias_index_ref)
216
1
  {
217
1
    variable_alias->alias_index_ref = tensor_variable->alias_index_ref;
218
    // The tensor variable need to be fully specified if I am doing alias an alias.
219
1
    assert(!ccv_nnc_is_tensor_auto(tensor_variable->info));
220
1
    int i;
221
1
    int no_stride = 1;
222
2
    for (i = 0; no_stride && 
i < 1
CCV_NNC_MAX_DIM_ALLOC1
;
i++1
)
223
1
      no_stride = (tensor_variable->stride[i] == 0);
224
1
    int stride_from_dim[CCV_NNC_MAX_DIM_ALLOC];
225
1
    int* to_stride;
226
1
    if (no_stride)
227
0
    {
228
0
      ccv_nnc_tensor_get_stride(tensor_variable->info.dim, stride_from_dim);
229
0
      to_stride = stride_from_dim;
230
0
    } else
231
1
      to_stride = tensor_variable->stride;
232
    // If we provide stride, or reshape to a different size, assert the tensor variable itself is contiguous (otherwise we cannot satisfy the reshape requirements).
233
1
    const int different_dim = ccv_nnc_tensor_nd(info.dim) != ccv_nnc_tensor_nd(tensor_variable->info.dim);
234
1
    if (different_dim || 
(0
stride[0] != 00
&&
memcmp(stride, to_stride, sizeof(int) * 0
CCV_NNC_MAX_DIM_ALLOC0
) != 0))
235
1
      { assert(ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, to_stride)); }
236
    // Need to compute alias off, that is the alias off of the tensor variable plus its ofs.
237
1
    const off_t off = ccv_nnc_tensor_view_offset(tensor_variable->info.datatype, to_stride, tensor_variable->ofs);
238
1
    variable_alias->alias_off = tensor_variable->alias_off + off;
239
    // If we don't provide stride, copy the stride from previous variable.
240
1
    if (stride[0] == 0)
241
0
    {
242
0
      if (different_dim)
243
0
        ccv_nnc_tensor_get_stride(info.dim, variable_alias->stride);
244
0
      else
245
0
        memcpy(variable_alias->stride, to_stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
246
0
    } else
247
1
      memcpy(variable_alias->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
248
1.04k
  } else {
249
1.04k
    variable_alias->alias_index_ref = tensor_variable->index + 1;
250
1.04k
    variable_alias->alias_off = 0;
251
1.04k
    memcpy(variable_alias->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
252
1.04k
  }
253
1.04k
  variable_alias->info = info;
254
1.04k
  variable_alias->symbol = NO_TENSOR_SYMBOL;
255
1.04k
  variable_alias->destructor_hook.func = 0;
256
1.04k
  variable_alias->destructor_hook.context = 0;
257
1.04k
  variable_alias->tensor_view = 0;
258
1.04k
  memcpy(variable_alias->ofs, ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
259
1.04k
  if (graph->reuse_var >= 0)
260
0
  {
261
0
    const int reuse_var = graph->reuse_var;
262
0
    assert(reuse_var < graph->vars->rnum);
263
0
    variable_alias->index = reuse_var;
264
0
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = variable_alias;
265
0
    int i;
266
0
    graph->reuse_var = -1;
267
0
    for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++)
268
0
      if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0)
269
0
        graph->reuse_var = i;
270
1.04k
  } else {
271
1.04k
    variable_alias->index = graph->vars->rnum;
272
1.04k
    ccv_array_push(graph->vars, &variable_alias);
273
1.04k
  }
274
1.04k
  return variable_alias;
275
1.04k
}
276
277
int ccv_nnc_tensor_variable_alias_params(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, int ofs[CCV_NNC_MAX_DIM_ALLOC], int stride[CCV_NNC_MAX_DIM_ALLOC])
278
0
{
279
0
  if (!tensor_variable->alias_index_ref)
280
0
    return -1;
281
0
  if (ofs)
282
0
    memcpy(ofs, tensor_variable->ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
283
0
  if (stride)
284
0
    memcpy(stride, tensor_variable->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
285
0
  return 0;
286
0
}
287
288
ccv_nnc_tensor_t* ccv_nnc_tensor_from_variable_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_stream_context_t* const stream_context)
289
69.2k
{
290
69.2k
  if (tensor_variable->tensor_view)
291
37.5k
  {
292
37.5k
    if (tensor_variable->alias_index_ref)
293
1.03k
    {
294
1.03k
      const int alias_index = tensor_variable->alias_index_ref - 1;
295
1.03k
      assert(alias_index >= 0);
296
1.03k
      ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
297
1.03k
      if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))
298
12
      {
299
12
        ccv_nnc_tensor_view_t* const tv = tensor_variable->tensor_view;
300
        // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid.
301
12
        assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv));
302
        // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed.
303
12
        ccv_nnc_tensor_data(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->info, CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tv->off + tensor_variable->alias_off, &tv->data, &tv->dataof);
304
1.02k
      } else {
305
1.02k
        ccv_nnc_tensor_t* const tv = (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
306
        // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid.
307
1.02k
        assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv));
308
        // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed.
309
1.02k
        ccv_nnc_tensor_data(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->info, CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tensor_variable->alias_off, &tv->data, &tv->dataof);
310
1.02k
      }
311
1.03k
    }
312
37.5k
    return (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(tensor_variable->tensor_view);
313
37.5k
  }
314
31.7k
  if (!tensor_variable->alias_index_ref)
315
30.6k
  {
316
    // If we haven't allocated tensor_variable, we cannot allocate them now (because no shape specified), return 0.
317
30.6k
    if (ccv_nnc_is_tensor_auto(tensor_variable->info))
318
0
      return 0;
319
30.6k
    void* ptr = 0;
320
30.6k
    const size_t data_size = ccv_nnc_tensor_data_size(tensor_variable->info);
321
30.6k
    if (CCV_TENSOR_GET_MEMORY(tensor_variable->info.type) == CCV_TENSOR_GPU_MEMORY && 
data_size > 0457
)
322
457
      ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(tensor_variable->info.type), stream_context, data_size);
323
30.6k
    tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, tensor_variable->info, 0);
324
30.6k
    if (tensor_variable->info.dim[0] > 0)
325
30.6k
      { assert(tensor_variable->tensor_view->data.u8); }
326
30.6k
    return (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
327
30.6k
  }
328
1.04k
  const int alias_index = tensor_variable->alias_index_ref - 1;
329
1.04k
  assert(alias_index >= 0);
330
1.04k
  ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
331
1.04k
  assert(!variable_to->alias_index_ref);
332
1.04k
  if (!variable_to->tensor_view)
333
3
  {
334
    // If we haven't allocated variable_to, we cannot allocate them now (because no shape specified), return 0.
335
3
    if (ccv_nnc_is_tensor_auto(variable_to->info))
336
0
      return 0;
337
3
    void* ptr = 0;
338
3
    assert(variable_to->info.type == tensor_variable->info.type);
339
3
    const size_t data_size = ccv_nnc_tensor_data_size(variable_to->info);
340
3
    if (CCV_TENSOR_GET_MEMORY(variable_to->info.type) == CCV_TENSOR_GPU_MEMORY && 
data_size > 00
)
341
0
      ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(variable_to->info.type), stream_context, data_size);
342
3
    variable_to->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, variable_to->info, 0);
343
3
    assert(variable_to->tensor_view->data.u8);
344
3
  }
345
1.04k
  int i;
346
1.04k
  int no_ofs = 1;
347
13.5k
  for (i = 0; no_ofs && 
i < 13.5k
CCV_NNC_MAX_DIM_ALLOC13.5k
;
i++12.4k
)
348
12.4k
    no_ofs = (tensor_variable->ofs[i] == 0);
349
1.04k
  int no_stride = 1;
350
2.46k
  for (i = 0; no_stride && 
i < 1.45k
CCV_NNC_MAX_DIM_ALLOC1.45k
;
i++1.42k
)
351
1.42k
    no_stride = (tensor_variable->stride[i] == 0);
352
1.04k
  int stride_is_packed = no_stride;
353
1.04k
  if (!no_stride) // We have stride, now if it is packed.
354
1.01k
    stride_is_packed = ccv_nnc_is_tensor_stride_packed(tensor_variable->stride, tensor_variable->info.dim);
355
1.04k
  assert(CCV_GET_DATA_TYPE_SIZE(tensor_variable->info.datatype) * ccv_nnc_tensor_count(tensor_variable->info) + tensor_variable->alias_off <= CCV_GET_DATA_TYPE_SIZE(variable_to->info.datatype) * ccv_nnc_tensor_count(variable_to->info));
356
  // Allowing vector type to be normal tensor, rather than a tensor view. We cannot have any offset though.
357
1.04k
  if (no_ofs && 
!stride_is_packed1.03k
)
358
3
    stride_is_packed = ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, tensor_variable->stride);
359
1.04k
  if (no_ofs && 
stride_is_packed1.03k
)
360
1.03k
    tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tensor_variable->info, 0);
361
8
  else {
362
8
    if (no_stride)
363
1
      ccv_nnc_tensor_get_stride(tensor_variable->info.dim, tensor_variable->stride);
364
8
    tensor_variable->tensor_view = ccv_nnc_tensor_view_new((ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(variable_to->tensor_view), tensor_variable->info, tensor_variable->ofs, tensor_variable->stride);
365
8
  }
366
1.04k
  if  (tensor_variable->alias_off)
367
1
    ccv_nnc_tensor_data_add(tensor_variable->tensor_view->info, tensor_variable->alias_off, &tensor_variable->tensor_view->data, &tensor_variable->tensor_view->dataof);
368
1.04k
  return (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
369
1.04k
}
370
371
static void _ccv_nnc_tensor_symbol_extra_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_symbol_t symbol)
372
27.6k
{
373
27.6k
  if (symbol.d >= graph->binds->rnum)
374
361
  {
375
361
    const int rnum = graph->binds->rnum;
376
361
    ccv_array_resize(graph->binds, symbol.d + 1);
377
361
    int i;
378
722
    for (i = rnum; i < graph->binds->rnum; 
i++361
)
379
361
      ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i))->index = CCV_NNC_TENSOR_NO_VARIABLE;
380
361
  }
381
27.6k
  ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, symbol.d);
382
27.6k
  bind->type = tensor_variable->type;
383
27.6k
  bind->index = tensor_variable->index;
384
27.6k
  if (tensor_variable->alias_index_ref)
385
1.04k
  {
386
1.04k
    const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){
387
1.04k
      .d = symbol.d,
388
1.04k
      .graph = graph->tape
389
1.04k
    });
390
1.04k
    assert(alias_to.d >= 0 && alias_to.d < graph->binds->rnum);
391
1.04k
    bind->alias_ref = alias_to.d + 1;
392
1.04k
  } else
393
26.5k
    bind->alias_ref = 0;
394
27.6k
  if (bind->sources)
395
0
    ccv_array_free(bind->sources);
396
27.6k
  bind->sources = 0;
397
27.6k
  if (bind->destinations)
398
0
    ccv_array_free(bind->destinations);
399
27.6k
  bind->destinations = 0;
400
27.6k
  bind->destructor_hook.func = 0;
401
27.6k
  bind->destructor_hook.context = 0;
402
27.6k
  bind->tensor_view = 0;
403
27.6k
}
404
405
static ccv_nnc_tensor_symbol_t _ccv_nnc_tensor_symbol_from_variable(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
406
44.5k
{
407
44.5k
  if (tensor_variable->symbol.d >= 0)
408
16.9k
    return tensor_variable->symbol;
409
27.6k
  if (!tensor_variable->alias_index_ref)
410
26.5k
  {
411
26.5k
    const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_new(graph->tape, tensor_variable->info, 0);
412
26.5k
    _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol);
413
26.5k
    return symbol;
414
26.5k
  }
415
1.04k
  const int alias_index = tensor_variable->alias_index_ref - 1;
416
1.04k
  assert(alias_index >= 0);
417
1.04k
  ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
418
1.04k
  assert(!variable_to->alias_index_ref);
419
1.04k
  int no_stride = 1;
420
1.04k
  int i;
421
2.43k
  for (i = 0; no_stride && 
i < 1.42k
CCV_NNC_MAX_DIM_ALLOC1.42k
;
i++1.39k
)
422
1.39k
    no_stride = (tensor_variable->stride[i] == 0);
423
1.04k
  if (no_stride)
424
32
    ccv_nnc_tensor_get_stride(tensor_variable->info.dim, tensor_variable->stride);
425
1.04k
  const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_alias_new(graph->tape, _ccv_nnc_tensor_symbol_from_variable(graph, variable_to), tensor_variable->ofs, tensor_variable->stride, tensor_variable->info, 0);
426
1.04k
  _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol);
427
1.04k
  return symbol;
428
1.04k
}
429
430
// Return the tensor variable that is old (the provided tensor variable will have a new setting).
431
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_exchange_new(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable)
432
6.28k
{
433
6.28k
  struct ccv_nnc_tensor_variable_s x = *tensor_variable;
434
6.28k
  ccv_nnc_tensor_variable_t new_variable;
435
  // Need to handle alias.
436
6.28k
  if (x.alias_index_ref)
437
0
    new_variable = ccv_nnc_tensor_variable_alias_new(graph, *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, x.alias_index_ref - 1), x.ofs, x.stride, x.info);
438
6.28k
  else
439
6.28k
    new_variable = ccv_nnc_tensor_variable_new(graph, x.info);
440
6.28k
  *tensor_variable = *new_variable;
441
6.28k
  *new_variable = x;
442
  // The index should be the same though.
443
6.28k
  const int index = new_variable->index;
444
6.28k
  new_variable->index = tensor_variable->index;
445
6.28k
  if (new_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
446
2.84k
  {
447
2.84k
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, new_variable->symbol.d);
448
2.84k
    bind->index = new_variable->index;
449
2.84k
  }
450
6.28k
  tensor_variable->index = index;
451
6.28k
  return new_variable;
452
6.28k
}
453
454
void ccv_nnc_dynamic_graph_set_max_concurrency(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int max_stream_count)
455
0
{
456
0
  dynamic_graph->max_stream_count = max_stream_count;
457
0
}
458
459
int ccv_nnc_dynamic_graph_set_no_grad(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int no_grad)
460
9
{
461
9
  if (dynamic_graph->no_grad == no_grad)
462
0
    return -1;
463
9
  dynamic_graph->no_grad = no_grad;
464
9
  return 0;
465
9
}
466
467
static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_get_stream(ccv_nnc_dynamic_graph_t* const graph, const int type)
468
72
{
469
72
  if (!graph->stream_map)
470
10
    graph->stream_map = kh_init(stream_map);
471
72
  int ret = 0;
472
72
  khiter_t k = kh_put(stream_map, graph->stream_map, type, &ret);
473
72
  assert(ret >= 0);
474
72
  ccv_nnc_stream_context_t* stream = kh_val(graph->stream_map, k);
475
  // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
476
72
  if (ret != 0)
477
23
  {
478
23
    stream = ccv_nnc_stream_context_new(type);
479
23
    kh_val(graph->stream_map, k) = stream;
480
23
  }
481
72
  return stream;
482
72
}
483
484
typedef struct {
485
  ccv_nnc_dynamic_graph_t* graph;
486
  int stream_type;
487
} ccv_nnc_dynamic_graph_neighbor_context_discovery_t;
488
489
static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_neighbor_context_discovery(const int device_id, void* const context)
490
0
{
491
0
  ccv_nnc_dynamic_graph_neighbor_context_discovery_t* const discovery = (ccv_nnc_dynamic_graph_neighbor_context_discovery_t*)context;
492
0
  int type = discovery->stream_type;
493
0
  CCV_STREAM_SET_DEVICE_ID(type, device_id);
494
0
  return _ccv_nnc_dynamic_graph_get_stream(discovery->graph, type);
495
0
}
496
497
void ccv_nnc_dynamic_graph_exec_ret(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context, ccv_nnc_graph_exec_symbol_t* const graph_execs)
498
15.3k
{
499
15.3k
  int i, j;
500
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
501
28.1k
    if (inputs[i] && 
!inputs[i]->alias_index_ref28.1k
)
502
27.1k
      { assert(inputs[i]->tensor_view); }
503
15.3k
  ccv_nnc_tensor_t* input_tensors[ccv_max(1, input_size)];
504
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
505
28.1k
    input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(graph, inputs[i], stream_context) : 
02
;
506
15.3k
  ccv_nnc_tensor_symbol_t input_symbols[ccv_max(1, input_size)];
507
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
508
28.1k
    input_symbols[i] = inputs[i] ? 
_ccv_nnc_tensor_symbol_from_variable(graph, inputs[i])28.1k
:
NO_TENSOR_SYMBOL2
;
509
15.3k
  ccv_array_t* input_sources[ccv_max(1, input_size)];
510
15.3k
  ccv_array_t* input_alias_sources[ccv_max(1, input_size)];
511
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
512
28.1k
  {
513
28.1k
    input_sources[i] = input_symbols[i].d != CCV_NNC_NO_TENSOR_SYMBOL ? 
((ccv_nnc_tensor_variable_graph_bind_t*)28.1k
ccv_array_get28.1k
(graph->binds, input_symbols[i].d))->sources :
02
;
514
28.1k
    if (inputs[i] && 
inputs[i]->alias_index_ref28.1k
)
515
1.03k
    {
516
1.03k
      const int alias_index_ref = inputs[i]->alias_index_ref - 1;
517
1.03k
      assert(alias_index_ref >= 0);
518
1.03k
      ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index_ref);
519
1.03k
      input_alias_sources[i] = ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d))->sources;
520
1.03k
    } else
521
27.1k
      input_alias_sources[i] = 0;
522
28.1k
  }
523
15.3k
  const int parallel_count = ccv_max(1, parallel);
524
15.3k
  assert(input_size % parallel_count == 0);
525
15.3k
  const int per_input_size = input_size / parallel_count;
526
15.3k
  assert(output_size % parallel_count == 0);
527
15.3k
  const int per_output_size = output_size / parallel_count;
528
15.3k
  int output_auto = 0;
529
31.0k
  for (i = 0; !output_auto && 
i < output_size16.3k
;
i++15.6k
)
530
15.6k
    output_auto = outputs[i] ? 
ccv_nnc_is_tensor_auto(outputs[i]->info)15.4k
:
0200
;
531
  // One extra step, infer the parameters for outputs.
532
15.3k
  if (output_auto)
533
14.6k
  {
534
14.6k
    ccv_nnc_tensor_param_t input_params[ccv_max(1, per_input_size)];
535
14.6k
    ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)];
536
29.4k
    for (i = 0; i < parallel_count; 
i++14.7k
)
537
14.7k
    {
538
41.9k
      for (j = 0; j < per_input_size; 
j++27.2k
)
539
27.2k
        input_params[j] = inputs[j + i * per_input_size] ? 
inputs[j + i * per_input_size]->info27.2k
:
ccv_nnc_tensor_auto2
;
540
29.6k
      for (j = 0; j < per_output_size; 
j++14.9k
)
541
14.9k
        output_params[j] = outputs[j + i * per_output_size] ? 
outputs[j + i * per_output_size]->info14.7k
:
ccv_nnc_tensor_auto207
;
542
14.7k
      ccv_nnc_hint_tensor_auto(cmd, input_params, per_input_size, hint, output_params, per_output_size);
543
29.6k
      for (j = 0; j < per_output_size; 
j++14.9k
)
544
14.9k
        if (outputs[j + i * per_output_size])
545
14.7k
          outputs[j + i * per_output_size]->info = output_params[j];
546
14.7k
    }
547
14.6k
  }
548
15.3k
  int freeable_size = 0;
549
15.3k
  ccv_nnc_tensor_variable_t freeables[ccv_max(1, output_size)];
550
  // Refresh the symbol if it is binded to an existing exec. Otherwise we cannot keep the SSA guarantee.
551
31.0k
  for (i = 0; i < output_size; 
i++15.6k
)
552
15.6k
  {
553
    // First, go over to see whether there is enforce inplace.
554
15.6k
    int enforce_idx = -1;
555
44.7k
    for (j = 0; enforce_idx < 0 && 
j < input_size44.7k
;
j++29.1k
)
556
29.1k
      if (inputs[j] && 
ccv_nnc_cmd_enforce_inplace(cmd, j, input_size, i, output_size)29.1k
)
557
2
        enforce_idx = j;
558
15.6k
    if (enforce_idx >= 0)
559
2
      { assert(outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL); }
560
    // We don't allow or check "allow inplace" yet. That logic will be at odds with backward logic.
561
15.6k
    if (outputs[i] && 
outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL15.4k
)
562
417
    {
563
417
      const ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[i]->symbol.d);
564
417
      if (enforce_idx >= 0)
565
2
        { assert(!bind->destinations || bind->destinations->rnum == 0); }
566
417
      if (bind->sources && 
bind->sources->rnum > 0415
)
567
415
      {
568
415
        const ccv_nnc_tensor_variable_t old_var = freeables[freeable_size++] = ccv_nnc_tensor_variable_exchange_new(graph, outputs[i]);
569
        // If this is enforce output, make sure the tensor view is taken by the output.
570
415
        if (enforce_idx >= 0)
571
0
        {
572
0
          outputs[i]->destructor_hook = old_var->destructor_hook;
573
0
          outputs[i]->tensor_view = old_var->tensor_view; // Make sure the tensor view is taken over by the output.
574
0
          old_var->tensor_view = 0;
575
0
        }
576
415
      }
577
417
    }
578
15.6k
  }
579
15.3k
  ccv_nnc_tensor_t* output_tensors[ccv_max(1, per_output_size)];
580
15.3k
  if (parallel_count > 1)
581
23
  {
582
23
    const int max_device_id_size = per_input_size + per_output_size;
583
23
    assert(max_device_id_size > 0);
584
23
    int device_ids[max_device_id_size];
585
23
    ccv_nnc_stream_context_t* streams[parallel_count];
586
23
    ccv_nnc_stream_signal_t* signal;
587
23
    if (stream_context)
588
14
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
589
97
    for (i = 0; i < parallel_count; 
i++74
)
590
74
    {
591
74
      int flag = 0;
592
148
      for (j = 0; !flag && 
j < per_input_size78
;
j++74
)
593
74
        if (input_tensors[i * per_input_size + j])
594
74
          flag = (CCV_TENSOR_GET_MEMORY(input_tensors[i * per_input_size + j]->info.type) == CCV_TENSOR_GPU_MEMORY);
595
156
      for (j = 0; j < per_output_size; 
j++82
)
596
82
      {
597
82
        output_tensors[j] = outputs[j + i * per_output_size] ? ccv_nnc_tensor_from_variable(graph, outputs[j + i * per_output_size], stream_context) : 
08
;
598
82
        if (output_tensors[j] && 
!flag74
)
599
4
          flag = (CCV_TENSOR_GET_MEMORY(output_tensors[j]->info.type) == CCV_TENSOR_GPU_MEMORY);
600
82
      }
601
74
      const int stream_type = flag ? 
CCV_STREAM_CONTEXT_GPU70
:
CCV_STREAM_CONTEXT_CPU4
;
602
74
      const int tensor_type = flag ? 
CCV_TENSOR_GPU_MEMORY70
:
CCV_TENSOR_CPU_MEMORY4
;
603
74
      const int device_id_size = ccv_nnc_device_ids_for_io(input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, tensor_type, device_ids, max_device_id_size);
604
74
      ccv_nnc_stream_context_t* stream_0 = 0;
605
146
      for (j = 0; j < device_id_size; 
j++72
)
606
72
      {
607
72
        int type = stream_type;
608
72
        CCV_STREAM_SET_DEVICE_ID(type, device_ids[j]);
609
72
        ccv_nnc_stream_context_t* const stream = _ccv_nnc_dynamic_graph_get_stream(graph, type);
610
72
        if (!stream_0)
611
72
          stream_0 = stream;
612
72
      }
613
      // Wait signal to finish.
614
74
      if (stream_context)
615
44
      {
616
44
        if (stream_0)
617
42
          ccv_nnc_stream_context_wait_signal(stream_0, signal);
618
2
        else
619
2
          ccv_nnc_stream_context_wait(stream_context);
620
44
      }
621
74
      if (stream_0)
622
72
      {
623
72
        ccv_nnc_dynamic_graph_neighbor_context_discovery_t discovery = {
624
72
          .graph = graph,
625
72
          .stream_type = stream_type
626
72
        };
627
72
        ccv_nnc_stream_context_set_neighbor_discovery(stream_0, _ccv_nnc_dynamic_graph_neighbor_context_discovery, &discovery);
628
72
      }
629
74
      PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size);
630
74
      int k;
631
204
      for (k = 0; k < per_input_size; 
k++130
)
632
130
      {
633
130
        PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", 
k + 1, input_tensors[k + i * per_input_size], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(input_tensors[k + i * per_input_size]->info.type) : -1));
634
130
        if (input_tensors[k + i * per_input_size] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO))
635
0
          ccv_nnc_print_tensor_info(input_tensors[k + i * per_input_size]);
636
130
        PRINT(CCV_CLI_INFO, "\n");
637
130
      }
638
156
      for (k = 0; k < per_output_size; 
k++82
)
639
82
      {
640
82
        PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", 
k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(output_tensors[k]->info.type) : -1));
641
82
        if (output_tensors[k] && 
CCV_CLI_OUTPUT_LEVEL_IS74
(CCV_CLI_INFO))
642
0
          ccv_nnc_print_tensor_shape(output_tensors[k]);
643
82
        PRINT(CCV_CLI_INFO, "\n");
644
82
      }
645
74
      const int status = ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, stream_0);
646
74
      if (status != 0)
647
0
        PRINT(CCV_CLI_INFO, "Invalid Status: %d\n", status);
648
74
      if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
649
0
      {
650
0
        for (k = 0; k < per_output_size; k++)
651
0
        {
652
0
          PRINT(CCV_CLI_VERBOSE, "POST: |<- %d. %p (%p:%d)", k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? CCV_TENSOR_GET_DEVICE_ID(output_tensors[k]->info.type) : -1));
653
0
          if (output_tensors[k])
654
0
            ccv_nnc_print_tensor_info(output_tensors[k]);
655
0
          PRINT(CCV_CLI_VERBOSE, "\n");
656
0
        }
657
0
      }
658
74
      if (stream_context && 
stream_044
)
659
42
      {
660
42
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
661
42
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
662
42
      }
663
74
      streams[i] = stream_0;
664
74
    }
665
23
    if (!stream_context)
666
39
      
for (i = 0; 9
i < parallel_count;
i++30
)
667
30
        if (streams[i])
668
30
          ccv_nnc_stream_context_wait(streams[i]);
669
15.3k
  } else {
670
30.9k
    for (i = 0; i < per_output_size; 
i++15.5k
)
671
15.5k
      output_tensors[i] = outputs[i] ? ccv_nnc_tensor_from_variable(graph, outputs[i], stream_context) : 
0199
;
672
15.3k
    PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size);
673
43.4k
    for (i = 0; i < per_input_size; 
i++28.0k
)
674
28.0k
    {
675
28.0k
      PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", 
i + 1, input_tensors[i], (input_tensors[i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(input_tensors[i]->info.type) : -1));
676
28.0k
      if (input_tensors[i] && 
CCV_CLI_OUTPUT_LEVEL_IS28.0k
(CCV_CLI_INFO))
677
0
        ccv_nnc_print_tensor_info(input_tensors[i]);
678
28.0k
      PRINT(CCV_CLI_INFO, "\n");
679
28.0k
    }
680
15.3k
    ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors, per_input_size, output_tensors, per_output_size, stream_context);
681
30.9k
    for (i = 0; i < per_output_size; 
i++15.5k
)
682
15.5k
    {
683
15.5k
      PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", 
i + 1, output_tensors[i], (output_tensors[i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(output_tensors[i]->info.type) : -1));
684
15.5k
      if (output_tensors[i] && 
CCV_CLI_OUTPUT_LEVEL_IS15.3k
(CCV_CLI_INFO))
685
0
        ccv_nnc_print_tensor_info(output_tensors[i]);
686
15.5k
      PRINT(CCV_CLI_INFO, "\n");
687
15.5k
    }
688
15.3k
  }
689
15.3k
  int inputs_are_constants = 1;
690
30.7k
  for (i = 0; inputs_are_constants && 
i < input_size15.3k
;
i++15.3k
)
691
15.3k
    if (inputs[i] && 
inputs[i]->type != CCV_NNC_TENSOR_CONSTANT15.3k
)
692
15.3k
      inputs_are_constants = 0;
693
15.3k
  if (input_size > 0 && 
!inputs_are_constants15.3k
&&
!graph->no_grad15.3k
) // No need to record the execution if there is no input or we disabled gradient computation.
694
15.3k
  {
695
15.3k
    ccv_nnc_tensor_symbol_t output_symbols[ccv_max(1, output_size)];
696
30.9k
    for (i = 0; i < output_size; 
i++15.5k
)
697
15.5k
      if (outputs[i])
698
15.3k
      {
699
15.3k
        assert(outputs[i]->type != CCV_NNC_TENSOR_CONSTANT);
700
15.3k
        output_symbols[i] = _ccv_nnc_tensor_symbol_from_variable(graph, outputs[i]);
701
15.3k
      } else
702
207
        output_symbols[i] = NO_TENSOR_SYMBOL;
703
15.3k
    int t;
704
30.6k
    for (t = 0; t < parallel_count; 
t++15.3k
)
705
15.3k
    {
706
15.3k
      ccv_nnc_graph_exec_symbol_t graph_exec = ccv_nnc_graph_exec_symbol_new(graph->tape, cmd, input_symbols + t * per_input_size, per_input_size, output_symbols + t * per_output_size, per_output_size, 0);
707
15.3k
      if (graph_execs)
708
2.41k
        graph_execs[t] = graph_exec;
709
      // This needs to be done before we set the new sources on the outputs.
710
43.4k
      for (i = 0; i < per_input_size; 
i++28.0k
)
711
28.0k
      {
712
28.0k
        ccv_array_t* const input_source = input_sources[i + t * per_input_size];
713
28.0k
        if (input_source)
714
28.1k
          
for (j = 0; 14.0k
j < input_source->rnum;
j++14.0k
)
715
14.0k
            ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){
716
14.0k
              .d = *(int*)ccv_array_get(input_source, j),
717
14.0k
              .graph = graph->tape
718
14.0k
            }, graph_exec);
719
28.0k
        ccv_array_t* const input_alias_source = input_alias_sources[i + t * per_input_size];
720
28.0k
        if (input_alias_source)
721
2.02k
          
for (j = 0; 1.01k
j < input_alias_source->rnum;
j++1.01k
)
722
1.01k
            ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){
723
1.01k
              .d = *(int*)ccv_array_get(input_alias_source, j),
724
1.01k
              .graph = graph->tape
725
1.01k
            }, graph_exec);
726
28.0k
      }
727
43.4k
      for (i = 0; i < per_input_size; 
i++28.0k
)
728
28.0k
      {
729
28.0k
        ccv_nnc_tensor_variable_t const input = inputs[i + t * per_input_size];
730
28.0k
        if (!input || 
input_symbols[i + t * per_input_size].d == CCV_NNC_NO_TENSOR_SYMBOL28.0k
)
731
2
          continue;
732
        // Constant inputs still need lifetime tracking while this exec is alive because
733
        // backward may read their concrete tensor buffers even though they do not require
734
        // gradients themselves.
735
28.0k
        ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i + t * per_input_size].d);
736
28.0k
        if (!bind->destinations)
737
22.0k
          bind->destinations = ccv_array_new(sizeof(int), 1, 0);
738
28.0k
        ccv_array_add_unique_int(bind->destinations, graph_exec.d);
739
28.0k
        if (input->alias_index_ref)
740
1.02k
        {
741
1.02k
            const int alias_index = input->alias_index_ref - 1;
742
1.02k
            assert(alias_index >= 0);
743
1.02k
            ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
744
1.02k
            ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d);
745
1.02k
            if (!root_bind->destinations)
746
1.01k
              root_bind->destinations = ccv_array_new(sizeof(int), 1, 0);
747
1.02k
            ccv_array_add_unique_int(root_bind->destinations, graph_exec.d);
748
1.02k
        }
749
28.0k
      }
750
30.9k
      
for (i = 0; 15.3k
i < per_output_size;
i++15.5k
)
751
15.5k
      {
752
15.5k
        ccv_nnc_tensor_variable_t const output = outputs[i + t * per_output_size];
753
15.5k
        if (!output)
754
207
          continue;
755
15.3k
        ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, output_symbols[i + t * per_output_size].d);
756
15.3k
        assert(!bind->sources); // This is a new symbol, therefore, no binded sources associated yet.
757
15.3k
        bind->sources = ccv_array_new(sizeof(int), 1, 0);
758
15.3k
        ccv_array_add_unique_int(bind->sources, graph_exec.d);
759
15.3k
        if (output->alias_index_ref)
760
8
        {
761
8
          const int alias_index = output->alias_index_ref - 1;
762
8
          assert(alias_index >= 0);
763
8
          ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
764
8
          ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d);
765
8
          if (!root_bind->sources)
766
4
            root_bind->sources = ccv_array_new(sizeof(int), 1, 0);
767
8
          ccv_array_add_unique_int(root_bind->sources, graph_exec.d);
768
8
        }
769
15.3k
      }
770
15.3k
    }
771
15.3k
  }
772
  // Now, able to free some of the reused outputs.
773
15.8k
  
for (i = 0; 15.3k
i < freeable_size;
i++415
)
774
415
    ccv_nnc_tensor_variable_free(graph, freeables[i]);
775
15.3k
}
776
777
int ccv_nnc_dynamic_graph_exec(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context)
778
12.9k
{
779
12.9k
  ccv_nnc_dynamic_graph_exec_ret(graph, cmd, hint, flags, inputs, input_size, outputs, output_size, parallel, stream_context, 0);
780
12.9k
  return CCV_NNC_EXEC_SUCCESS;
781
12.9k
}
782
783
static int _ccv_nnc_tensor_variable_is_only_output(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_graph_bind_t* bind, const int symbol_d)
784
17.5k
{
785
17.5k
  if (bind->alias_ref)
786
1.01k
    bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, bind->alias_ref - 1);
787
17.5k
  if (!bind->sources || bind->sources->rnum == 0)
788
0
    return 1;
789
17.5k
  int i;
790
33.9k
  for (i = 0; i < bind->sources->rnum; 
i++16.4k
)
791
17.5k
  {
792
17.5k
    const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i);
793
17.5k
    const ccv_nnc_graph_exec_symbol_t exec_symbol = {
794
17.5k
      .d = exec_symbol_d,
795
17.5k
      .graph = graph->tape
796
17.5k
    };
797
17.5k
    const int* outputs; int output_size;
798
17.5k
    ccv_nnc_graph_exec_symbol_io(graph->tape, exec_symbol, 0, 0, &outputs, &output_size);
799
17.5k
    int j;
800
34.0k
    for (j = 0; j < output_size; 
j++16.5k
)
801
17.5k
      if (outputs[j] >= 0 && 
outputs[j] != symbol_d17.5k
) // If output is me, it is the only output.
802
1.04k
      {
803
1.04k
        assert(outputs[j] < graph->binds->rnum);
804
1.04k
        const ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
805
        // This is in use and is it not a constant symbol.
806
1.04k
        if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT25
)
807
25
          return 0;
808
1.01k
        if (other_bind->alias_ref) // If this is alias, use its original's destinations.
809
1
          other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
810
        // The original is in use and is it not a constant symbol.
811
1.01k
        if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT1
)
812
1
          return 0;
813
1.01k
        if (other_bind->destinations && other_bind->destinations->rnum > 0)
814
1.00k
          return 0;
815
1.01k
      }
816
17.5k
  }
817
16.4k
  return 1;
818
17.5k
}
819
820
static void _ccv_nnc_update_bind_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws)
821
25.0k
{
822
25.0k
  int i;
823
25.0k
  if (bind->destinations)
824
25.0k
  {
825
25.0k
    int flag = 0;
826
49.8k
    for (i = 0; !flag && 
i < bind->destinations->rnum25.0k
;
i++24.8k
)
827
24.8k
    {
828
24.8k
      const int exec_symbol_d = *(int*)ccv_array_get(bind->destinations, i);
829
24.8k
      if (exec_symbol_d == freed_exec_symbol_d)
830
24.8k
      {
831
24.8k
        if (i < bind->destinations->rnum - 1)
832
19
          *(int*)ccv_array_get(bind->destinations, i) = *(int*)ccv_array_get(bind->destinations, bind->destinations->rnum - 1);
833
24.8k
        --bind->destinations->rnum;
834
24.8k
        flag = 1;
835
24.8k
      }
836
24.8k
    }
837
    // This symbol can be freed.
838
25.0k
    if (flag && 
bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED24.8k
)
839
17.6k
    {
840
17.6k
      ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
841
17.6k
      if (bind->alias_ref)
842
1.01k
      {
843
1.01k
        root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1);
844
1.01k
        if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
845
1.01k
          root_bind = bind;
846
1.01k
      }
847
      // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more.
848
      // It is possible because exec will be freed already, thus, it is safe to remove this alias out.
849
17.6k
      if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED &&
850
17.6k
        
(17.6k
(17.6k
!root_bind->sources17.6k
||
root_bind->sources->rnum == 08.64k
) ||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)6.01k
) &&
851
17.6k
        
root_bind->destinations->rnum == 017.6k
)
852
17.6k
      {
853
17.6k
        if (root_bind->sources)
854
14.6k
          
for (i = 0; 8.63k
i < root_bind->sources->rnum;
i++6.00k
)
855
6.00k
            ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
856
17.6k
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
857
17.6k
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
858
17.6k
          .d = tensor_index,
859
17.6k
          .graph = graph->tape
860
17.6k
        });
861
17.6k
      } else 
if (8
bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED8
&& // Handle the case the bind is already freed, and it doesn't have any sources or destinations.
862
8
        bind->alias_ref && 
(2
!bind->sources2
||
bind->sources->rnum == 00
) &&
(2
!bind->destinations2
||
bind->destinations->rnum == 02
)) {
863
2
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
864
2
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
865
2
          .d = tensor_index,
866
2
          .graph = graph->tape
867
2
        });
868
2
      }
869
17.6k
    }
870
25.0k
  }
871
25.0k
}
872
873
static void _ccv_nnc_update_bind_sources_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws)
874
7.29k
{
875
7.29k
  int i;
876
7.29k
  if (bind->sources)
877
7.29k
  {
878
7.29k
    int flag = 0;
879
14.5k
    for (i = 0; !flag && 
i < bind->sources->rnum7.29k
;
i++7.29k
)
880
7.29k
    {
881
7.29k
      const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i);
882
7.29k
      if (exec_symbol_d == freed_exec_symbol_d)
883
7.29k
      {
884
7.29k
        if (i < bind->sources->rnum - 1)
885
2
          *(int*)ccv_array_get(bind->sources, i) = *(int*)ccv_array_get(bind->sources, bind->sources->rnum - 1);
886
7.29k
        --bind->sources->rnum;
887
7.29k
        flag = 1;
888
7.29k
      }
889
7.29k
    }
890
7.29k
    if (flag && !bind->alias_ref && 
bind->index >= 07.28k
&&
bind->type == CCV_NNC_TENSOR_CONSTANT4.84k
&& // If it is detached (constant but previously has sources). Now can check again.
891
7.29k
      
(3
bind->sources->rnum == 03
||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)0
) &&
892
7.29k
      
(3
!bind->destinations3
||
bind->destinations->rnum == 03
))
893
3
    {
894
      // If this is constant, set it to be no symbol again.
895
3
      ccv_nnc_tensor_variable_t tv = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, bind->index);
896
3
      tv->symbol = NO_TENSOR_SYMBOL;
897
3
      _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
898
3
      ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
899
3
        .d = tensor_index,
900
3
        .graph = graph->tape
901
3
      });
902
7.28k
    } else if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED) {
903
      // This symbol can be freed.
904
2.44k
      ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
905
2.44k
      if (bind->alias_ref)
906
3
      {
907
3
        root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1);
908
3
        if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
909
0
          root_bind = bind;
910
3
      }
911
      // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more.
912
      // It is possible because exec will be freed already, thus, it is safe to remove this alias out.
913
2.44k
      if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED &&
914
2.44k
        
(2.43k
root_bind->sources->rnum == 02.43k
||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)0
) &&
915
2.44k
        
(2.43k
!root_bind->destinations2.43k
||
root_bind->destinations->rnum == 02.43k
))
916
6
      {
917
6
        for (i = 0; i < root_bind->sources->rnum; 
i++0
)
918
0
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
919
6
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
920
6
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
921
6
          .d = tensor_index,
922
6
          .graph = graph->tape
923
6
        });
924
2.43k
      } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations.
925
2.43k
        bind->alias_ref && 
(3
!bind->sources3
||
bind->sources->rnum == 03
) &&
(3
!bind->destinations3
||
bind->destinations->rnum == 00
)) {
926
3
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
927
3
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
928
3
          .d = tensor_index,
929
3
          .graph = graph->tape
930
3
        });
931
3
      }
932
2.44k
    }
933
7.29k
  }
934
7.29k
}
935
936
static void _ccv_nnc_update_bind_sources_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, const int* const inputs, const int input_size, const int* const outputs, const int output_size, ccv_array_t* const ws)
937
15.3k
{
938
15.3k
  int i;
939
43.2k
  for (i = 0; i < input_size; 
i++27.9k
)
940
27.9k
    if (inputs[i] >= 0 && inputs[i] < binds->rnum)
941
27.9k
    {
942
27.9k
      ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, inputs[i]);
943
27.9k
      if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
944
4.00k
        continue;
945
23.9k
      if (bind->alias_ref)
946
1.01k
      {
947
1.01k
        const int alias_to = bind->alias_ref - 1;
948
1.01k
        ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to);
949
1.01k
        if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE)
950
1.01k
          _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws);
951
1.01k
      }
952
23.9k
      _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, bind, inputs[i], ws);
953
23.9k
    }
954
  // Note that this works because there is no overlap of inputs / outputs. (What about alias?).
955
30.8k
  for (i = 0; i < output_size; 
i++15.5k
)
956
15.5k
    if (outputs[i] >= 0 && 
outputs[i] < binds->rnum15.3k
)
957
15.3k
    {
958
15.3k
      ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, outputs[i]);
959
15.3k
      if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
960
8.02k
        continue;
961
7.28k
      if (bind->alias_ref)
962
5
      {
963
5
        const int alias_to = bind->alias_ref - 1;
964
5
        ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to);
965
5
        if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE)
966
5
          _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws);
967
5
      }
968
7.28k
      _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, bind, outputs[i], ws);
969
7.28k
    }
970
15.3k
}
971
972
static void _ccv_nnc_stateful_exec_free_if_possible(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol)
973
15.3k
{
974
15.3k
  if (!graph->stateful_execs)
975
6.06k
    return;
976
15.3k
  assert
(symbol.d >= 0)9.23k
;
977
9.23k
  ccv_array_t* const stateful_execs = graph->stateful_execs;
978
9.23k
  ccv_nnc_cmd_t cmd = ccv_nnc_graph_exec_symbol_cmd(graph->tape, symbol);
979
9.23k
  ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data;
980
9.23k
  if (!stateful_exec)
981
6.83k
    return;
982
  // If there is no backward, no need to apply gradients.
983
  // Otherwise, if we applied gradients, we can free it as well.
984
  // We don't free this stateful exec because apply gradients doesn't require any variables alive.
985
2.40k
  if (!stateful_exec->did_backward_but_not_apply_gradients)
986
300
  {
987
300
    const int index = stateful_exec->index;
988
300
    ccfree(stateful_exec);
989
300
    if (index < graph->reuse_stateful_exec || graph->reuse_stateful_exec < 0)
990
300
      graph->reuse_stateful_exec = index;
991
300
    *(ccv_nnc_stateful_exec_t**)ccv_array_get(stateful_execs, index) = 0;
992
300
  } else
993
2.10k
    stateful_exec->should_free = 1;
994
2.40k
}
995
996
static int _ccv_nnc_tensor_bind_trace_forward_to_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_graph_bind_t* const bind, ccv_nnc_tensor_variable_graph_bind_t* const root_bind, int* const ws_start, const int assuming_no_source) // assuming_no_source means we are going to remove sources if possible, thus, it is irrelevant.
997
27.4k
{
998
27.4k
  int can_free_symbol = 0;
999
27.4k
  const int sources_and_is_only_output = (root_bind->sources && 
root_bind->sources->rnum > 016.3k
) &&
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d)11.4k
;
1000
27.4k
  if (!root_bind->sources || 
root_bind->sources->rnum == 016.3k
||
sources_and_is_only_output11.4k
||
assuming_no_source1.02k
)
1001
26.4k
  {
1002
26.4k
    int i, j;
1003
26.4k
    can_free_symbol = 1; // Assume we can free this symbol.
1004
26.4k
    if (!graph->ws)
1005
18
      graph->ws = ccv_array_new(sizeof(int), root_bind->destinations ? root_bind->destinations->rnum : 
00
, 0);
1006
26.4k
    ccv_array_t* const ws = graph->ws;
1007
26.4k
    ccv_array_clear(ws);
1008
26.4k
    if (root_bind->destinations)
1009
43.5k
      
for (i = 0; 21.9k
i < root_bind->destinations->rnum;
i++21.5k
)
1010
21.5k
        ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->destinations, i));
1011
26.4k
    const int ws_init_size = ws->rnum;
1012
26.4k
    *ws_start = ws_init_size;
1013
    // Add all sources from root_bind, in case it has been freed (during update bind sources / destinations when free.
1014
26.4k
    if (root_bind->sources)
1015
25.7k
      
for (i = 0; 15.2k
i < root_bind->sources->rnum;
i++10.4k
)
1016
10.4k
        ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
1017
    // If we cannot loop over any exec symbols (this is not in use). It is simple to determine whether we want
1018
    // to free it or not: if this is an alias and the origin is not freed, we cannot free this symbol.
1019
26.4k
    if (ws_init_size == 0)
1020
4.90k
      can_free_symbol = (!bind->alias_ref || 
root_bind->index < 018
);
1021
    // Go through all the exec symbols use this tensor, to see whether they have inputs that has other sources.
1022
48.0k
    for (i = 0; i < ws_init_size; 
i++21.5k
)
1023
21.5k
    {
1024
21.5k
      const int exec_symbol_d = *(int*)ccv_array_get(ws, i);
1025
21.5k
      const ccv_nnc_graph_exec_symbol_t symbol = {
1026
21.5k
        .d = exec_symbol_d,
1027
21.5k
        .graph = graph->tape
1028
21.5k
      };
1029
21.5k
      const int* inputs; int input_size;
1030
21.5k
      const int* outputs; int output_size;
1031
21.5k
      ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size);
1032
21.5k
      int flag = 0; // flag denotes whether there are cases to keep this exec symbol.
1033
21.5k
      if (!root_bind->sources || 
root_bind->sources->rnum == 010.8k
||
assuming_no_source8.44k
)
1034
13.1k
      {
1035
        // If there is no sources, check if other sources can depend on this exec, if they do, we cannot free this.
1036
36.9k
        for (j = 0; !flag && 
j < input_size28.6k
;
j++23.8k
)
1037
23.8k
          if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum && inputs[j] != tensor_variable->symbol.d)
1038
14.5k
          {
1039
14.5k
            ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]);
1040
14.5k
            if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT6.24k
)
1041
6.22k
              flag = 1;
1042
8.29k
            else {
1043
8.29k
              if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1044
28
                other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1045
8.29k
              flag = (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT33
) ||
(8.28k
other_bind->type != CCV_NNC_TENSOR_CONSTANT8.28k
&&
other_bind->sources8.25k
&&
other_bind->sources->rnum > 02.23k
); // Constant should have no source, or it is detached.
1046
8.29k
            }
1047
14.5k
          }
1048
13.1k
      } else {
1049
        // If there are sources, check whether we have outputs or not. If we do, we cannot free this.
1050
16.9k
        for (j = 0; !flag && 
j < output_size8.45k
;
j++8.45k
)
1051
8.45k
          if (outputs[j] >= 0 && 
outputs[j] < graph->binds->rnum8.45k
)
1052
8.45k
          {
1053
8.45k
            ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
1054
8.45k
            if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT4.43k
)
1055
4.43k
              flag = 1;
1056
4.02k
            else {
1057
4.02k
              if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1058
0
                other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1059
4.02k
              flag = (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT0
) || (other_bind->destinations && other_bind->destinations->rnum > 0);
1060
4.02k
            }
1061
8.45k
          }
1062
8.44k
      }
1063
      // This exec can be freed if there is no input required or there is no output required.
1064
21.5k
      can_free_symbol = (can_free_symbol && !flag);
1065
21.5k
      if (!flag)
1066
4.87k
      {
1067
        // Go over inputs and remove all references from binded destinations.
1068
        // and go over outputs remove all references from binded sources.
1069
4.87k
        _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws);
1070
4.87k
        const int* outgoings; int outgoing_size;
1071
4.87k
        ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size);
1072
7.30k
        for (j = 0; j < outgoing_size; 
j++2.43k
)
1073
2.43k
          ccv_array_add_unique_int(ws, outgoings[j]);
1074
4.87k
        _ccv_nnc_stateful_exec_free_if_possible(graph, symbol);
1075
4.87k
        ccv_nnc_graph_exec_symbol_free(graph->tape, symbol);
1076
4.87k
      }
1077
21.5k
    }
1078
26.4k
  }
1079
27.4k
  return can_free_symbol;
1080
27.4k
}
1081
1082
static void _ccv_nnc_tensor_bind_trace_backward_to_free(ccv_nnc_dynamic_graph_t* const graph, ccv_array_t* const ws, const int ws_start)
1083
9.74k
{
1084
9.74k
  int i, j;
1085
  // Now, go over the outgoings, if it is removed, add more to it. Note that the ws array can grow while iterating over.
1086
22.6k
  for (i = ws_start; i < ws->rnum; 
i++12.8k
)
1087
12.8k
  {
1088
12.8k
    const int exec_symbol_d = *(int*)ccv_array_get(ws, i);
1089
12.8k
    const ccv_nnc_graph_exec_symbol_t symbol = {
1090
12.8k
      .d = exec_symbol_d,
1091
12.8k
      .graph = graph->tape
1092
12.8k
    };
1093
12.8k
    const int* inputs; int input_size;
1094
12.8k
    const int* outputs; int output_size;
1095
12.8k
    ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size);
1096
12.8k
    int flag = 0;
1097
29.9k
    for (j = 0; !flag && 
j < input_size19.5k
;
j++17.1k
)
1098
17.1k
      if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum)
1099
17.1k
      {
1100
17.1k
        ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]);
1101
17.1k
        if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT4.64k
)
1102
4.44k
          flag = 1;
1103
12.6k
        else {
1104
12.6k
          if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1105
1.02k
            other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1106
12.6k
          flag = (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT203
) || (other_bind->type != CCV_NNC_TENSOR_CONSTANT &&
other_bind->sources12.4k
&&
other_bind->sources->rnum > 010.4k
);
1107
12.6k
        }
1108
17.1k
      }
1109
12.8k
    if (flag) // If any inputs make free this destination impossible. Check whether all its outputs are done.
1110
10.4k
    {
1111
10.4k
      int output_flag = 0;
1112
21.1k
      for (j = 0; !output_flag && 
j < output_size18.6k
;
j++10.6k
)
1113
10.6k
        if (outputs[j] >= 0 && 
outputs[j] < graph->binds->rnum10.4k
)
1114
10.4k
        {
1115
10.4k
          ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
1116
10.4k
          if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT2.41k
)
1117
2.41k
            output_flag = 1;
1118
8.04k
          else {
1119
8.04k
            if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1120
0
              other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1121
8.04k
            output_flag = (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT0
) || (other_bind->destinations &&
other_bind->destinations->rnum > 020
);
1122
8.04k
          }
1123
10.4k
        }
1124
10.4k
      if (!output_flag) // If no output is used (used means it has a tensor variable, or it has a destination).
1125
8.02k
        flag = 0;
1126
10.4k
    }
1127
    // Went over all the inputs, it turns out no more inputs has other references, safe to remove.
1128
12.8k
    if (!flag)
1129
10.4k
    {
1130
10.4k
      _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws);
1131
10.4k
      const int* outgoings; int outgoing_size;
1132
10.4k
      ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size);
1133
      // It it has outgoings, add that for further inspection.
1134
12.8k
      for (j = 0; j < outgoing_size; 
j++2.40k
)
1135
2.40k
        ccv_array_add_unique_int(ws, outgoings[j]);
1136
10.4k
      _ccv_nnc_stateful_exec_free_if_possible(graph, symbol);
1137
10.4k
      ccv_nnc_graph_exec_symbol_free(graph->tape, symbol);
1138
10.4k
    }
1139
12.8k
  }
1140
9.74k
}
1141
1142
void ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
1143
32.1k
{
1144
  // If it contains a symbol, this tensor variable is not a free variable. It is either used as input or output.
1145
32.1k
  if (tensor_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1146
27.4k
  {
1147
    // If it is not a free variable, when can we free the symbol and the underlying variable?
1148
    // 1. There should be no sources (the command generate this tensor should be freed) or the output of these sources is only the current one;
1149
    // 2. The destinations (the commands that uses this tensor) should have no other inputs, or the other inputs has no binded sources as well.
1150
27.4k
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d);
1151
    // There should be no source associated with it no more.
1152
    // I am free if no exec symbol is producing me or the symbol producing me can only producing me (thus, it is not required to
1153
    // compute gradient because I am the only variable it can compute gradient for).
1154
27.4k
    ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
1155
27.4k
    if (bind->alias_ref)
1156
1.03k
    {
1157
1.03k
      const int alias_to = bind->alias_ref - 1;
1158
1.03k
      root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, alias_to);
1159
1.03k
    }
1160
27.4k
    int ws_start;
1161
27.4k
    const int can_free_symbol = _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, root_bind, &ws_start, 0);
1162
27.4k
    if (can_free_symbol)
1163
9.73k
    {
1164
9.73k
      _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
1165
9.73k
      ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
1166
9.73k
      _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start);
1167
17.7k
    } else { // If this symbol is not freed, move the tensor view to the bind.
1168
      // If current bind is an alias, and it doesn't have any sources or destinations. We cannot find this alias
1169
      // through any exec. This is not only safe to delete, but has to be deleted. We don't need to handle this
1170
      // if free_symbol is true, because when that happens, root_bind will be deleted, and we will clean up the
1171
      // alias in that process.
1172
17.7k
      if (bind->alias_ref && 
(1.03k
!bind->sources1.03k
||
bind->sources->rnum == 05
) &&
(1.03k
!bind->destinations1.03k
||
bind->destinations->rnum == 01.01k
))
1173
20
      {
1174
20
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
1175
20
        ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
1176
17.7k
      } else {
1177
17.7k
        bind->index = CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED; // This tensor variable will be freed, but this symbol extra will continue exists.
1178
17.7k
        bind->destructor_hook.func = tensor_variable->destructor_hook.func; // Transfer the destructor callback.
1179
17.7k
        bind->destructor_hook.context = tensor_variable->destructor_hook.context; // Transfer the destructor callback context.
1180
17.7k
        bind->tensor_view = tensor_variable->tensor_view; // Transfer the ownership to the bind.
1181
17.7k
        tensor_variable->tensor_view = 0;
1182
17.7k
      }
1183
17.7k
    }
1184
27.4k
  }
1185
32.1k
  _ccv_nnc_tensor_variable_free(graph, tensor_variable, 1);
1186
32.1k
}
1187
1188
void ccv_nnc_tensor_variable_detach(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
1189
6
{
1190
  // This cannot be an alias.
1191
6
  assert(!tensor_variable->alias_index_ref);
1192
  // If no computation done yet, mark this as constant.
1193
6
  if (tensor_variable->symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1194
0
  {
1195
0
    tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
1196
0
    return;
1197
0
  }
1198
  // Otherwise, we need to do some book keeping updates to make sure it doesn't participate gradient computation any more.
1199
6
  ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d);
1200
  // Because tensor variable cannot be alias, its bind cannot have alias pointer.
1201
6
  assert(!bind->alias_ref);
1202
  // Go through to break ties between sources and destinations.
1203
6
  int i, j;
1204
6
  if (bind->sources && bind->destinations)
1205
6
  {
1206
11
    for (i = 0; i < bind->sources->rnum; 
i++5
)
1207
5
    {
1208
5
      const int s = *(int*)ccv_array_get(bind->sources, i);
1209
5
      const int* outputs; int output_size;
1210
5
      const ccv_nnc_graph_exec_symbol_t s_symbol = {
1211
5
        .d = s,
1212
5
        .graph = graph->tape
1213
5
      };
1214
5
      ccv_nnc_graph_exec_symbol_io(graph->tape, s_symbol, 0, 0, &outputs, &output_size);
1215
10
      for (j = 0; j < bind->destinations->rnum; 
j++5
)
1216
5
      {
1217
5
        const int d = *(int*)ccv_array_get(bind->destinations, j);
1218
5
        const ccv_nnc_graph_exec_symbol_t d_symbol = {
1219
5
          .d = d,
1220
5
          .graph = graph->tape
1221
5
        };
1222
5
        const int* inputs; int input_size;
1223
5
        ccv_nnc_graph_exec_symbol_io(graph->tape, d_symbol, &inputs, &input_size, 0, 0);
1224
5
        int x, y;
1225
5
        int flag = 0; // Whether we find a symbol that connects source and destination but not the current one we detach. If found, we cannot break the tie between s_symbol and d_symbol.
1226
10
        for (x = 0; !flag && x < output_size; 
x++5
)
1227
5
        {
1228
5
          ccv_nnc_tensor_symbol_t x_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){
1229
5
            .d = outputs[x],
1230
5
            .graph = graph->tape
1231
5
          });
1232
5
          if (x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1233
5
          {
1234
5
            x_symbol.d = outputs[x];
1235
5
            x_symbol.graph = graph->tape;
1236
5
          }
1237
5
          if (x_symbol.d == tensor_variable->symbol.d || 
x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL0
)
1238
5
            continue;
1239
0
          for (y = 0; !flag && y < input_size; y++)
1240
0
          {
1241
0
            ccv_nnc_tensor_symbol_t y_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){
1242
0
              .d = inputs[y],
1243
0
              .graph = graph->tape
1244
0
            });
1245
0
            if (y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1246
0
            {
1247
0
              y_symbol.d = inputs[y];
1248
0
              y_symbol.graph = graph->tape;
1249
0
            }
1250
0
            if (y_symbol.d == tensor_variable->symbol.d || y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1251
0
              continue;
1252
0
            flag = (x_symbol.d == y_symbol.d);
1253
0
          }
1254
0
        }
1255
5
        if (!flag)
1256
5
          ccv_nnc_graph_exec_symbol_disjoin(graph->tape, s_symbol, d_symbol);
1257
5
      }
1258
5
    }
1259
6
  }
1260
6
  const int sources_and_is_only_output = (bind->sources && bind->sources->rnum > 0) && 
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d)5
;
1261
6
  if (!bind->sources || bind->sources->rnum == 0 || 
sources_and_is_only_output5
)
1262
6
  {
1263
6
    int ws_start = -1;
1264
6
    _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, bind, &ws_start, 1);
1265
    // Because we are detaching from the graph, there is no need to forward trace to see if it is not used and
1266
    // then to remove the source execs. We can remove them right now, breaking the graph in two. That is why
1267
    // we called trace backward to free regardless the outcome of the forward to free.
1268
6
    if (ws_start == -1)
1269
0
    {
1270
0
      if (!graph->ws)
1271
0
        graph->ws = ccv_array_new(sizeof(int), bind->destinations ? bind->destinations->rnum : 0, 0);
1272
0
      ccv_array_t* const ws = graph->ws;
1273
0
      ccv_array_clear(ws);
1274
0
      if (bind->sources)
1275
0
        for (i = 0; i < bind->sources->rnum; i++)
1276
0
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(bind->sources, i));
1277
0
      ws_start = 0;
1278
0
    }
1279
6
    _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start);
1280
6
  }
1281
  // If now bind has no relevant sources or destinations, we can safely free the underlying tensor symbol.
1282
6
  if ((!bind->sources || bind->sources->rnum == 0) && 
(1
!bind->destinations1
||
bind->destinations->rnum == 01
))
1283
1
  {
1284
1
    _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
1285
1
    ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
1286
1
    tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
1287
1
    tensor_variable->symbol = NO_TENSOR_SYMBOL;
1288
1
    return;
1289
1
  }
1290
  // Mark both as constant, such that even if it cannot be freed now, it can be freed as soon as possible later.
1291
5
  bind->type = CCV_NNC_TENSOR_CONSTANT;
1292
5
  tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
1293
5
}
1294
1295
void ccv_nnc_dynamic_graph_has_effect_to_tensor_variables(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t* const source_variables, const int source_variable_size, const ccv_nnc_tensor_variable_t* const destination_variables, const int destination_variable_size, uint64_t* const bitmask)
1296
12
{
1297
12
  int i, j;
1298
12
  ccv_array_t* const sources_destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), source_variable_size + destination_variable_size, 0);
1299
31
  for (i = 0; i < source_variable_size; 
i++19
)
1300
19
  {
1301
19
    if (source_variables[i]->symbol.d < 0)
1302
0
      continue;
1303
19
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d);
1304
19
    if (bind->destinations && 
bind->destinations->rnum > 017
)
1305
42
      
for (j = 0; 17
j < bind->destinations->rnum;
j++25
)
1306
25
      {
1307
        // It is ok to have duplicate symbols.
1308
25
        const int d = *(int*)ccv_array_get(bind->destinations, j);
1309
25
        ccv_nnc_graph_exec_symbol_t symbol = {
1310
25
          .d = d,
1311
25
          .graph = graph->tape
1312
25
        };
1313
25
        ccv_array_push(sources_destinations, &symbol);
1314
25
      }
1315
19
  }
1316
12
  const int source_size = sources_destinations->rnum;
1317
24
  for (i = 0; i < destination_variable_size; 
i++12
)
1318
12
  {
1319
12
    if (destination_variables[i]->symbol.d < 0)
1320
0
      continue;
1321
12
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, destination_variables[i]->symbol.d);
1322
12
    if (bind->sources && bind->sources->rnum > 0)
1323
20
      
for (j = 0; 10
j < bind->sources->rnum;
j++10
)
1324
10
      {
1325
        // It is ok to have duplicate symbols.
1326
10
        const int d = *(int*)ccv_array_get(bind->sources, j);
1327
10
        ccv_nnc_graph_exec_symbol_t symbol = {
1328
10
          .d = d,
1329
10
          .graph = graph->tape
1330
10
        };
1331
10
        ccv_array_push(sources_destinations, &symbol);
1332
10
      }
1333
12
  }
1334
12
  const int destination_size = sources_destinations->rnum - source_size;
1335
12
  if (source_size == 0 || destination_size == 0)
1336
2
  {
1337
2
    ccv_array_free(sources_destinations);
1338
2
    return;
1339
2
  }
1340
10
  const int bitmask_size = ((source_size + 63) >> 6);
1341
10
  assert(bitmask_size < 256);
1342
10
  uint64_t exec_bitmask[bitmask_size];
1343
10
  ccv_nnc_symbolic_graph_sources_to_destinations(graph->tape, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, 0), source_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, source_size), destination_size, exec_bitmask);
1344
10
  int k = 0;
1345
27
  for (i = 0; i < source_variable_size; 
i++17
)
1346
17
  {
1347
17
    if (source_variables[i]->symbol.d < 0)
1348
0
    {
1349
0
      bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63));
1350
0
      continue;
1351
0
    }
1352
17
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d);
1353
17
    int flag = 0;
1354
17
    if (bind->destinations && 
bind->destinations->rnum > 015
)
1355
15
    {
1356
15
      assert(k <= source_size - bind->destinations->rnum);
1357
32
      
for (j = 0; 15
!flag &&
j < bind->destinations->rnum20
;
j++17
)
1358
17
        flag = (((uint64_t)1 << ((k + j) & 63)) & exec_bitmask[(k + j) >> 6]);
1359
15
      k += bind->destinations->rnum;
1360
15
    }
1361
17
    if (flag)
1362
12
      bitmask[i >> 6] |= ((uint64_t)1 << (i & 63));
1363
5
    else
1364
5
      bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63));
1365
17
  }
1366
10
  ccv_array_free(sources_destinations);
1367
10
}
1368
1369
int ccv_nnc_dynamic_graph_bookkeeping_count(const ccv_nnc_dynamic_graph_t* const graph, const int type)
1370
451
{
1371
451
  return ccv_nnc_symbolic_graph_active_symbol_count(graph->tape, type);
1372
451
}
1373
1374
void ccv_nnc_dynamic_graph_dot(const ccv_nnc_dynamic_graph_t* const graph, const int flags, FILE* out)
1375
419
{
1376
419
  ccv_nnc_symbolic_graph_dot(graph->tape, flags, out);
1377
419
}
1378
1379
void ccv_nnc_dynamic_graph_format(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
1380
0
{
1381
0
  ccv_nnc_symbolic_graph_format(graph->tape, 0, 0, 0, 0, format_fn, context);
1382
0
}