Coverage Report

Created: 2021-10-28 19:33

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc_dynamic_graph.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_nnc_easy.h"
5
#include "ccv_internal.h"
6
#include "_ccv_nnc_dynamic_graph.h"
7
8
// MARK - Level-4 API
9
10
ccv_nnc_dynamic_graph_t* ccv_nnc_dynamic_graph_new(void)
11
41
{
12
41
  ccv_nnc_dynamic_graph_t* graph = ccmalloc(sizeof(ccv_nnc_dynamic_graph_t));
13
41
  graph->no_grad = 0;
14
41
  graph->reuse_var = -1;
15
41
  graph->mp_hdr = -1;
16
41
  graph->vars = ccv_array_new(sizeof(ccv_nnc_tensor_variable_t), 1, 0);
17
41
  graph->binds = ccv_array_new(sizeof(ccv_nnc_tensor_variable_graph_bind_t), 1, 0);
18
41
  graph->tape = ccv_nnc_symbolic_graph_new();
19
41
  graph->freed = kh_init(dy_str);
20
41
  graph->allocd = kh_init(dy_alloc);
21
  // These may not be used as frequent, init as needed.
22
41
  graph->stateful_execs = 0;
23
41
  graph->reuse_stateful_exec = -1;
24
41
  graph->stream_map = 0;
25
41
  graph->ws = 0;
26
41
  return graph;
27
41
}
28
29
static void _ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int zeroing)
30
32.2k
{
31
32.2k
  const int index = tensor_variable->index;
32
32.2k
  if (tensor_variable->tensor_view)
33
14.6k
  {
34
14.6k
    if (tensor_variable->destructor_hook.func)
35
4
      tensor_variable->destructor_hook.func(graph, (ccv_nnc_tensor_t*)tensor_variable->tensor_view, tensor_variable->destructor_hook.context);
36
14.6k
    if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view))
37
14.1k
    {
38
14.1k
      if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))
39
9
        ccv_nnc_tensor_view_free(tensor_variable->tensor_view);
40
14.1k
      else {
41
14.1k
        if (!tensor_variable->alias_index_ref && // Return this memory to the graph.
42
14.1k
          
CCV_TENSOR_GET_MEMORY14.1k
(tensor_variable->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY14.1k
)
43
404
          ccv_nnc_dynamic_graph_xpu_free(graph, tensor_variable->tensor_view->data.ptr);
44
14.1k
        ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view);
45
14.1k
      }
46
14.1k
    }
47
14.6k
  }
48
32.2k
  ccfree(tensor_variable);
49
32.2k
  if (zeroing)
50
32.0k
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, index) = 0;
51
32.2k
  int i;
52
64.3k
  for (i = graph->vars->rnum - 1; i >= 0; 
i--32.0k
)
53
64.1k
    if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) != 0)
54
32.0k
    {
55
32.0k
      graph->vars->rnum = i + 1;
56
32.0k
      break;
57
32.0k
    }
58
32.2k
  if (index < graph->vars->rnum &&
59
32.2k
    
(23.2k
index < graph->reuse_var23.2k
||
graph->reuse_var < 017.2k
))
60
11.8k
    graph->reuse_var = index;
61
20.4k
  else if (graph->reuse_var >= graph->vars->rnum)
62
5.61k
    graph->reuse_var = -1;
63
32.2k
}
64
65
static void _ccv_nnc_tensor_variable_graph_bind_free(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int zeroing)
66
27.7k
{
67
27.7k
  bind->index = CCV_NNC_TENSOR_NO_VARIABLE;
68
27.7k
  if (bind->sources)
69
15.3k
    ccv_array_free(bind->sources);
70
27.7k
  if (bind->destinations)
71
22.9k
    ccv_array_free(bind->destinations);
72
27.7k
  if (bind->tensor_view)
73
17.6k
  {
74
17.6k
    if (bind->destructor_hook.func)
75
3
      bind->destructor_hook.func(graph, (ccv_nnc_tensor_t*)bind->tensor_view, bind->destructor_hook.context);
76
17.6k
    if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(bind->tensor_view))
77
17.4k
    {
78
17.4k
      if (CCV_IS_TENSOR_VIEW(bind->tensor_view))
79
2
        ccv_nnc_tensor_view_free(bind->tensor_view);
80
17.4k
      else {
81
17.4k
        if (!bind->alias_ref && // Return this memory to the graph.
82
17.4k
          
CCV_TENSOR_GET_MEMORY16.4k
(bind->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY16.4k
)
83
32
          ccv_nnc_dynamic_graph_xpu_free(graph, bind->tensor_view->data.ptr);
84
17.4k
        ccv_nnc_tensor_free((ccv_nnc_tensor_t*)bind->tensor_view);
85
17.4k
      }
86
17.4k
    }
87
17.6k
  }
88
27.7k
  if (zeroing)
89
27.4k
  {
90
27.4k
    bind->sources = 0;
91
27.4k
    bind->destinations = 0;
92
27.4k
    bind->tensor_view = 0;
93
27.4k
    bind->destructor_hook.func = 0;
94
27.4k
    bind->destructor_hook.context = 0;
95
27.4k
  }
96
27.7k
}
97
98
void ccv_nnc_dynamic_graph_free(ccv_nnc_dynamic_graph_t* const graph)
99
41
{
100
41
  int i;
101
282
  for (i = 0; i < graph->vars->rnum; 
i++241
)
102
241
  {
103
241
    ccv_nnc_tensor_variable_t tensor_variable = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i);
104
241
    if (tensor_variable)
105
207
      _ccv_nnc_tensor_variable_free(graph, tensor_variable, 0);
106
241
  }
107
41
  ccv_array_free(graph->vars);
108
346
  for (i = 0; i < graph->binds->rnum; 
i++305
)
109
305
    _ccv_nnc_tensor_variable_graph_bind_free(graph, (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i), 0);
110
41
  ccv_array_free(graph->binds);
111
41
  ccv_nnc_symbolic_graph_free(graph->tape);
112
41
  if (graph->ws)
113
27
    ccv_array_free(graph->ws);
114
41
  if (graph->stateful_execs)
115
8
  {
116
25
    for (i = 0; i < graph->stateful_execs->rnum; 
i++17
)
117
17
    {
118
17
      ccv_nnc_stateful_exec_t* const stateful_exec = *(ccv_nnc_stateful_exec_t**)ccv_array_get(graph->stateful_execs, i);
119
17
      if (stateful_exec)
120
5
        ccfree(stateful_exec);
121
17
    }
122
8
    ccv_array_free(graph->stateful_execs);
123
8
  }
124
41
  if (graph->stream_map)
125
10
  {
126
10
    khiter_t k;
127
58
    for (k = 
kh_begin10
(graph->stream_map); k != kh_end(graph->stream_map);
++k48
)
128
48
    {
129
48
      if (!kh_exist(graph->stream_map, k))
130
25
        continue;
131
23
      ccv_nnc_stream_context_t* const stream = kh_val(graph->stream_map, k);
132
23
      ccv_nnc_stream_context_free(stream);
133
23
    }
134
10
    kh_destroy(stream_map, graph->stream_map);
135
10
  }
136
41
  ccv_nnc_dynamic_graph_xpu_alloc_destroy(graph);
137
41
  ccfree(graph);
138
41
}
139
140
void ccv_nnc_tensor_variable_set(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_t* const tensor)
141
639
{
142
639
  assert(!tensor_variable->alias_index_ref);
143
639
  if (tensor_variable->tensor_view && 
!1
CCV_NNC_IS_EXTERN_TENSOR_VIEW1
(tensor_variable->tensor_view))
144
0
  {
145
0
    assert(!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view));
146
0
    ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view);
147
0
  }
148
639
  tensor_variable->info = tensor->info;
149
639
  tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)((uintptr_t)tensor | 1);
150
639
}
151
152
void ccv_nnc_tensor_variable_destructor_hook(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_destructor_f func, void* const context)
153
7
{
154
7
  tensor_variable->destructor_hook.func = func;
155
7
  tensor_variable->destructor_hook.context = context;
156
7
}
157
158
inline static void _ccv_nnc_tensor_variable_init(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_param_t info)
159
31.2k
{
160
31.2k
  tensor_variable->alias_index_ref = 0;
161
31.2k
  tensor_variable->destructor_hook.func = 0;
162
31.2k
  tensor_variable->destructor_hook.context = 0;
163
31.2k
  tensor_variable->info = info;
164
31.2k
  tensor_variable->symbol = NO_TENSOR_SYMBOL;
165
31.2k
  tensor_variable->tensor_view = 0;
166
31.2k
  if (graph->reuse_var >= 0)
167
802
  {
168
802
    const int reuse_var = graph->reuse_var;
169
802
    assert(reuse_var < graph->vars->rnum);
170
802
    tensor_variable->index = reuse_var;
171
802
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = tensor_variable;
172
802
    int i;
173
802
    graph->reuse_var = -1;
174
1.43k
    for (i = reuse_var + 1; i < graph->vars->rnum && 
graph->reuse_var < 01.02k
;
i++631
)
175
631
      if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0)
176
591
        graph->reuse_var = i;
177
30.4k
  } else {
178
30.4k
    tensor_variable->index = graph->vars->rnum;
179
30.4k
    ccv_array_push(graph->vars, &tensor_variable);
180
30.4k
  }
181
31.2k
}
182
183
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info)
184
31.2k
{
185
31.2k
  ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
186
31.2k
  tensor_variable->type = CCV_NNC_TENSOR_VARIABLE;
187
31.2k
  _ccv_nnc_tensor_variable_init(graph, tensor_variable, info);
188
31.2k
  return tensor_variable;
189
31.2k
}
190
191
ccv_nnc_tensor_variable_t ccv_nnc_tensor_constant_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info)
192
29
{
193
29
  ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
194
29
  tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
195
29
  _ccv_nnc_tensor_variable_init(graph, tensor_variable, info);
196
29
  return tensor_variable;
197
29
}
198
199
int ccv_nnc_tensor_variable_is_constant(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
200
0
{
201
0
  return tensor_variable->type == CCV_NNC_TENSOR_CONSTANT;
202
0
}
203
204
ccv_nnc_tensor_param_t ccv_nnc_tensor_variable_params(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
205
0
{
206
0
  return tensor_variable->info;
207
0
}
208
209
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_alias_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info)
210
1.03k
{
211
1.03k
  assert(!tensor_variable->alias_index_ref);
212
1.03k
  ccv_nnc_tensor_variable_t variable_alias = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
213
1.03k
  variable_alias->type = tensor_variable->type;
214
1.03k
  variable_alias->alias_index_ref = tensor_variable->index + 1;
215
1.03k
  variable_alias->info = info;
216
1.03k
  variable_alias->symbol = NO_TENSOR_SYMBOL;
217
1.03k
  variable_alias->destructor_hook.func = 0;
218
1.03k
  variable_alias->destructor_hook.context = 0;
219
1.03k
  variable_alias->tensor_view = 0;
220
1.03k
  memcpy(variable_alias->ofs, ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
221
1.03k
  memcpy(variable_alias->inc, inc, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
222
1.03k
  if (graph->reuse_var >= 0)
223
0
  {
224
0
    const int reuse_var = graph->reuse_var;
225
0
    assert(reuse_var < graph->vars->rnum);
226
0
    variable_alias->index = reuse_var;
227
0
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = variable_alias;
228
0
    int i;
229
0
    graph->reuse_var = -1;
230
0
    for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++)
231
0
      if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0)
232
0
        graph->reuse_var = i;
233
1.03k
  } else {
234
1.03k
    variable_alias->index = graph->vars->rnum;
235
1.03k
    ccv_array_push(graph->vars, &variable_alias);
236
1.03k
  }
237
1.03k
  return variable_alias;
238
1.03k
}
239
240
ccv_nnc_tensor_t* ccv_nnc_tensor_from_variable_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_stream_context_t* const stream_context)
241
68.9k
{
242
68.9k
  if (tensor_variable->tensor_view)
243
37.3k
  {
244
37.3k
    if (tensor_variable->alias_index_ref)
245
1.02k
    {
246
1.02k
      const int alias_index = tensor_variable->alias_index_ref - 1;
247
1.02k
      assert(alias_index >= 0);
248
1.02k
      ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
249
1.02k
      if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))
250
1
      {
251
1
        ccv_nnc_tensor_view_t* const tv = tensor_variable->tensor_view;
252
        // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid.
253
1
        assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv));
254
        // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed.
255
1
        tv->data.u8 = CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8 + tv->off;
256
1.01k
      } else {
257
1.01k
        ccv_nnc_tensor_t* const tv = (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
258
        // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid.
259
1.01k
        assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv));
260
        // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed.
261
1.01k
        tv->data.u8 = CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8;
262
1.01k
      }
263
1.02k
    }
264
37.3k
    return (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(tensor_variable->tensor_view);
265
37.3k
  }
266
31.6k
  if (!tensor_variable->alias_index_ref)
267
30.6k
  {
268
    // If we haven't allocated tensor_variable, we cannot allocate them now (because no shape specified), return 0.
269
30.6k
    if (ccv_nnc_is_tensor_auto(tensor_variable->info))
270
0
      return 0;
271
30.6k
    void* ptr = 0;
272
30.6k
    if (CCV_TENSOR_GET_MEMORY(tensor_variable->info.type) == CCV_TENSOR_GPU_MEMORY)
273
436
      ptr = ccv_nnc_dynamic_graph_xpu_alloc(graph, CCV_TENSOR_GET_DEVICE_ID(tensor_variable->info.type), stream_context, ccv_nnc_tensor_data_size(tensor_variable->info));
274
30.6k
    tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, tensor_variable->info, 0);
275
30.6k
    assert(tensor_variable->tensor_view->data.u8);
276
30.6k
    return (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
277
30.6k
  }
278
1.03k
  const int alias_index = tensor_variable->alias_index_ref - 1;
279
1.03k
  assert(alias_index >= 0);
280
1.03k
  ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
281
1.03k
  assert(!variable_to->alias_index_ref);
282
1.03k
  if (!variable_to->tensor_view)
283
3
  {
284
    // If we haven't allocated variable_to, we cannot allocate them now (because no shape specified), return 0.
285
3
    if (ccv_nnc_is_tensor_auto(variable_to->info))
286
0
      return 0;
287
3
    void* ptr = 0;
288
3
    assert(variable_to->info.type == tensor_variable->info.type);
289
3
    if (CCV_TENSOR_GET_MEMORY(variable_to->info.type) == CCV_TENSOR_GPU_MEMORY)
290
0
      ptr = ccv_nnc_dynamic_graph_xpu_alloc(graph, CCV_TENSOR_GET_DEVICE_ID(variable_to->info.type), stream_context, ccv_nnc_tensor_data_size(variable_to->info));
291
3
    variable_to->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, variable_to->info, 0);
292
3
    assert(variable_to->tensor_view->data.u8);
293
3
  }
294
1.03k
  int no_ofs = 1;
295
1.03k
  int i;
296
13.4k
  for (i = 0; no_ofs && 
i < 13.4k
CCV_NNC_MAX_DIM_ALLOC13.4k
;
i++12.4k
)
297
12.4k
    no_ofs = (tensor_variable->ofs[i] == 0);
298
1.03k
  int no_inc = 1;
299
2.39k
  for (i = 0; no_inc && 
i < 1.38k
CCV_NNC_MAX_DIM_ALLOC1.38k
;
i++1.35k
)
300
1.35k
    no_inc = (tensor_variable->inc[i] == 0);
301
1.03k
  if (!no_inc)
302
1.01k
    no_inc = (memcmp(tensor_variable->inc, tensor_variable->info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC) == 0);
303
1.03k
  assert(ccv_nnc_tensor_count(tensor_variable->info) <= ccv_nnc_tensor_count(variable_to->info));
304
1.03k
  if (no_ofs && 
no_inc1.03k
)
305
1.02k
    tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tensor_variable->info, 0);
306
11
  else
307
11
    tensor_variable->tensor_view = ccv_nnc_tensor_view_new((ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(variable_to->tensor_view), tensor_variable->info, tensor_variable->ofs, no_inc ? 
tensor_variable->info.dim1
:
tensor_variable->inc10
);
308
1.03k
  return (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
309
1.03k
}
310
311
static void _ccv_nnc_tensor_symbol_extra_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_symbol_t symbol)
312
27.5k
{
313
27.5k
  if (symbol.d >= graph->binds->rnum)
314
305
  {
315
305
    const int rnum = graph->binds->rnum;
316
305
    ccv_array_resize(graph->binds, symbol.d + 1);
317
305
    int i;
318
610
    for (i = rnum; i < graph->binds->rnum; 
i++305
)
319
305
      ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i))->index = CCV_NNC_TENSOR_NO_VARIABLE;
320
305
  }
321
27.5k
  ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, symbol.d);
322
27.5k
  bind->type = tensor_variable->type;
323
27.5k
  bind->index = tensor_variable->index;
324
27.5k
  if (tensor_variable->alias_index_ref)
325
1.03k
  {
326
1.03k
    const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){
327
1.03k
      .d = symbol.d,
328
1.03k
      .graph = graph->tape
329
1.03k
    });
330
1.03k
    assert(alias_to.d >= 0 && alias_to.d < graph->binds->rnum);
331
1.03k
    bind->alias_ref = alias_to.d + 1;
332
1.03k
  } else
333
26.5k
    bind->alias_ref = 0;
334
27.5k
  if (bind->sources)
335
0
    ccv_array_free(bind->sources);
336
27.5k
  bind->sources = 0;
337
27.5k
  if (bind->destinations)
338
0
    ccv_array_free(bind->destinations);
339
27.5k
  bind->destinations = 0;
340
27.5k
  bind->destructor_hook.func = 0;
341
27.5k
  bind->destructor_hook.context = 0;
342
27.5k
  bind->tensor_view = 0;
343
27.5k
}
344
345
static ccv_nnc_tensor_symbol_t _ccv_nnc_tensor_symbol_from_variable(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
346
44.5k
{
347
44.5k
  if (tensor_variable->symbol.d >= 0)
348
16.9k
    return tensor_variable->symbol;
349
27.5k
  if (!tensor_variable->alias_index_ref)
350
26.5k
  {
351
26.5k
    const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_new(graph->tape, tensor_variable->info, 0);
352
26.5k
    _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol);
353
26.5k
    return symbol;
354
26.5k
  }
355
1.03k
  const int alias_index = tensor_variable->alias_index_ref - 1;
356
1.03k
  assert(alias_index >= 0);
357
1.03k
  ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
358
1.03k
  assert(!variable_to->alias_index_ref);
359
1.03k
  int no_inc = 1;
360
1.03k
  int i;
361
2.37k
  for (i = 0; no_inc && 
i < 1.36k
CCV_NNC_MAX_DIM_ALLOC1.36k
;
i++1.33k
)
362
1.33k
    no_inc = (tensor_variable->inc[i] == 0);
363
1.03k
  const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_alias_new(graph->tape, _ccv_nnc_tensor_symbol_from_variable(graph, variable_to), tensor_variable->ofs, no_inc ? 
tensor_variable->info.dim27
:
tensor_variable->inc1.01k
, tensor_variable->info, 0);
364
1.03k
  _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol);
365
1.03k
  return symbol;
366
1.03k
}
367
368
// Return the tensor variable that is old (the provided tensor variable will have a new setting).
369
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_exchange_new(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable)
370
6.27k
{
371
6.27k
  struct ccv_nnc_tensor_variable_s x = *tensor_variable;
372
6.27k
  ccv_nnc_tensor_variable_t new_variable;
373
  // Need to handle alias.
374
6.27k
  if (x.alias_index_ref)
375
0
    new_variable = ccv_nnc_tensor_variable_alias_new(graph, *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, x.alias_index_ref - 1), x.ofs, x.inc, x.info);
376
6.27k
  else
377
6.27k
    new_variable = ccv_nnc_tensor_variable_new(graph, x.info);
378
6.27k
  *tensor_variable = *new_variable;
379
6.27k
  *new_variable = x;
380
  // The index should be the same though.
381
6.27k
  const int index = new_variable->index;
382
6.27k
  new_variable->index = tensor_variable->index;
383
6.27k
  if (new_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
384
2.84k
  {
385
2.84k
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, new_variable->symbol.d);
386
2.84k
    bind->index = new_variable->index;
387
2.84k
  }
388
6.27k
  tensor_variable->index = index;
389
6.27k
  return new_variable;
390
6.27k
}
391
392
void ccv_nnc_dynamic_graph_set_no_grad(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int no_grad)
393
8
{
394
8
  dynamic_graph->no_grad = no_grad;
395
8
}
396
397
static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_get_stream(ccv_nnc_dynamic_graph_t* const graph, const int type)
398
72
{
399
72
  if (!graph->stream_map)
400
10
    graph->stream_map = kh_init(stream_map);
401
72
  int ret = 0;
402
72
  khiter_t k = kh_put(stream_map, graph->stream_map, type, &ret);
403
72
  assert(ret >= 0);
404
72
  ccv_nnc_stream_context_t* stream = kh_val(graph->stream_map, k);
405
  // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
406
72
  if (ret != 0)
407
23
  {
408
23
    stream = ccv_nnc_stream_context_new(type);
409
23
    kh_val(graph->stream_map, k) = stream;
410
23
  }
411
72
  return stream;
412
72
}
413
414
typedef struct {
415
  ccv_nnc_dynamic_graph_t* graph;
416
  int stream_type;
417
} ccv_nnc_dynamic_graph_neighbor_context_discovery_t;
418
419
static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_neighbor_context_discovery(const int device_id, void* const context)
420
0
{
421
0
  ccv_nnc_dynamic_graph_neighbor_context_discovery_t* const discovery = (ccv_nnc_dynamic_graph_neighbor_context_discovery_t*)context;
422
0
  int type = discovery->stream_type;
423
0
  CCV_STREAM_SET_DEVICE_ID(type, device_id);
424
0
  return _ccv_nnc_dynamic_graph_get_stream(discovery->graph, type);
425
0
}
426
427
void ccv_nnc_dynamic_graph_exec_ret(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context, ccv_nnc_graph_exec_symbol_t* const graph_execs)
428
15.3k
{
429
15.3k
  int i, j;
430
43.4k
  for (i = 0; i < input_size; 
i++28.1k
)
431
28.1k
    if (inputs[i] && 
!inputs[i]->alias_index_ref28.1k
)
432
27.0k
      { assert(inputs[i]->tensor_view); }
433
15.3k
  ccv_nnc_tensor_t* input_tensors[ccv_max(1, input_size)];
434
43.4k
  for (i = 0; i < input_size; 
i++28.1k
)
435
28.1k
    input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(graph, inputs[i], stream_context) : 
01
;
436
15.3k
  ccv_nnc_tensor_symbol_t input_symbols[ccv_max(1, input_size)];
437
43.4k
  for (i = 0; i < input_size; 
i++28.1k
)
438
28.1k
    input_symbols[i] = inputs[i] ? 
_ccv_nnc_tensor_symbol_from_variable(graph, inputs[i])28.1k
:
NO_TENSOR_SYMBOL1
;
439
15.3k
  ccv_array_t* input_sources[ccv_max(1, input_size)];
440
15.3k
  ccv_array_t* input_alias_sources[ccv_max(1, input_size)];
441
43.4k
  for (i = 0; i < input_size; 
i++28.1k
)
442
28.1k
  {
443
28.1k
    input_sources[i] = input_symbols[i].d != CCV_NNC_NO_TENSOR_SYMBOL ? 
((ccv_nnc_tensor_variable_graph_bind_t*)28.1k
ccv_array_get28.1k
(graph->binds, input_symbols[i].d))->sources :
01
;
444
28.1k
    if (inputs[i] && 
inputs[i]->alias_index_ref28.1k
)
445
1.02k
    {
446
1.02k
      const int alias_index_ref = inputs[i]->alias_index_ref - 1;
447
1.02k
      assert(alias_index_ref >= 0);
448
1.02k
      ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index_ref);
449
1.02k
      input_alias_sources[i] = ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d))->sources;
450
1.02k
    } else
451
27.0k
      input_alias_sources[i] = 0;
452
28.1k
  }
453
15.3k
  const int parallel_count = ccv_max(1, parallel);
454
15.3k
  assert(input_size % parallel_count == 0);
455
15.3k
  const int per_input_size = input_size / parallel_count;
456
15.3k
  assert(output_size % parallel_count == 0);
457
15.3k
  const int per_output_size = output_size / parallel_count;
458
15.3k
  int output_auto = 0;
459
30.9k
  for (i = 0; !output_auto && 
i < output_size16.2k
;
i++15.5k
)
460
15.5k
    output_auto = outputs[i] ? 
ccv_nnc_is_tensor_auto(outputs[i]->info)15.3k
:
0198
;
461
  // One extra step, infer the parameters for outputs.
462
15.3k
  if (output_auto)
463
14.6k
  {
464
14.6k
    ccv_nnc_tensor_param_t input_params[ccv_max(1, per_input_size)];
465
14.6k
    ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)];
466
29.3k
    for (i = 0; i < parallel_count; 
i++14.6k
)
467
14.6k
    {
468
41.8k
      for (j = 0; j < per_input_size; 
j++27.1k
)
469
27.1k
        input_params[j] = inputs[j + i * per_input_size] ? 
inputs[j + i * per_input_size]->info27.1k
:
ccv_nnc_tensor_auto1
;
470
29.6k
      for (j = 0; j < per_output_size; 
j++14.9k
)
471
14.9k
        output_params[j] = outputs[j + i * per_output_size] ? 
outputs[j + i * per_output_size]->info14.7k
:
ccv_nnc_tensor_auto205
;
472
14.6k
      ccv_nnc_hint_tensor_auto(cmd, input_params, per_input_size, hint, output_params, per_output_size);
473
29.6k
      for (j = 0; j < per_output_size; 
j++14.9k
)
474
14.9k
        if (outputs[j + i * per_output_size])
475
14.7k
          outputs[j + i * per_output_size]->info = output_params[j];
476
14.6k
    }
477
14.6k
  }
478
15.3k
  int freeable_size = 0;
479
15.3k
  ccv_nnc_tensor_variable_t freeables[ccv_max(1, output_size)];
480
  // Refresh the symbol if it is binded to an existing exec. Otherwise we cannot keep the SSA guarantee.
481
30.9k
  for (i = 0; i < output_size; 
i++15.6k
)
482
15.6k
  {
483
    // First, go over to see whether there is enforce inplace.
484
15.6k
    int enforce_idx = -1;
485
44.6k
    for (j = 0; enforce_idx < 0 && 
j < input_size44.6k
;
j++29.0k
)
486
29.0k
      if (inputs[j] && 
ccv_nnc_cmd_enforce_inplace(cmd, j, input_size, i, output_size)29.0k
)
487
2
        enforce_idx = j;
488
15.6k
    if (enforce_idx >= 0)
489
2
      { assert(outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL); }
490
    // We don't allow or check "allow inplace" yet. That logic will be at odds with backward logic.
491
15.6k
    if (outputs[i] && 
outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL15.4k
)
492
417
    {
493
417
      const ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[i]->symbol.d);
494
417
      if (enforce_idx >= 0)
495
2
        { assert(!bind->destinations || bind->destinations->rnum == 0); }
496
417
      if (bind->sources && 
bind->sources->rnum > 0415
)
497
415
      {
498
415
        const ccv_nnc_tensor_variable_t old_var = freeables[freeable_size++] = ccv_nnc_tensor_variable_exchange_new(graph, outputs[i]);
499
        // If this is enforce output, make sure the tensor view is taken by the output.
500
415
        if (enforce_idx >= 0)
501
0
        {
502
0
          outputs[i]->tensor_view = old_var->tensor_view; // Make sure the tensor view is taken over by the output.
503
0
          old_var->tensor_view = 0;
504
0
        }
505
415
      }
506
417
    }
507
15.6k
  }
508
15.3k
  ccv_nnc_tensor_t* output_tensors[ccv_max(1, per_output_size)];
509
15.3k
  if (parallel_count > 1)
510
23
  {
511
23
    const int max_device_id_size = per_input_size + per_output_size;
512
23
    assert(max_device_id_size > 0);
513
23
    int device_ids[max_device_id_size];
514
23
    ccv_nnc_stream_context_t* streams[parallel_count];
515
23
    ccv_nnc_stream_signal_t* signal;
516
23
    if (stream_context)
517
14
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
518
97
    for (i = 0; i < parallel_count; 
i++74
)
519
74
    {
520
74
      int flag = 0;
521
148
      for (j = 0; !flag && 
j < per_input_size78
;
j++74
)
522
74
        if (input_tensors[i * per_input_size + j])
523
74
          flag = (CCV_TENSOR_GET_MEMORY(input_tensors[i * per_input_size + j]->info.type) == CCV_TENSOR_GPU_MEMORY);
524
156
      for (j = 0; j < per_output_size; 
j++82
)
525
82
      {
526
82
        output_tensors[j] = outputs[j + i * per_output_size] ? ccv_nnc_tensor_from_variable(graph, outputs[j + i * per_output_size], stream_context) : 
08
;
527
82
        if (output_tensors[j] && 
!flag74
)
528
4
          flag = (CCV_TENSOR_GET_MEMORY(output_tensors[j]->info.type) == CCV_TENSOR_GPU_MEMORY);
529
82
      }
530
74
      const int stream_type = flag ? 
CCV_STREAM_CONTEXT_GPU70
:
CCV_STREAM_CONTEXT_CPU4
;
531
74
      const int tensor_type = flag ? 
CCV_TENSOR_GPU_MEMORY70
:
CCV_TENSOR_CPU_MEMORY4
;
532
74
      const int device_id_size = ccv_nnc_device_ids_for_io(input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, tensor_type, device_ids, max_device_id_size);
533
74
      ccv_nnc_stream_context_t* stream_0 = 0;
534
146
      for (j = 0; j < device_id_size; 
j++72
)
535
72
      {
536
72
        int type = stream_type;
537
72
        CCV_STREAM_SET_DEVICE_ID(type, device_ids[j]);
538
72
        ccv_nnc_stream_context_t* const stream = _ccv_nnc_dynamic_graph_get_stream(graph, type);
539
72
        if (!stream_0)
540
72
          stream_0 = stream;
541
72
      }
542
      // Wait signal to finish.
543
74
      if (stream_context)
544
44
      {
545
44
        if (stream_0)
546
42
          ccv_nnc_stream_context_wait_signal(stream_0, signal);
547
2
        else
548
2
          ccv_nnc_stream_context_wait(stream_context);
549
44
      }
550
74
      if (stream_0)
551
72
      {
552
72
        ccv_nnc_dynamic_graph_neighbor_context_discovery_t discovery = {
553
72
          .graph = graph,
554
72
          .stream_type = stream_type
555
72
        };
556
72
        ccv_nnc_stream_context_set_neighbor_discovery(stream_0, _ccv_nnc_dynamic_graph_neighbor_context_discovery, &discovery);
557
72
      }
558
74
      PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size);
559
74
      int k;
560
204
      for (k = 0; k < per_input_size; 
k++130
)
561
130
      {
562
130
        PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", 
k + 1, input_tensors[k + i * per_input_size], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(input_tensors[k + i * per_input_size]->info.type) : -1));
563
130
        if (input_tensors[k + i * per_input_size] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO))
564
0
          ccv_nnc_print_tensor_info(input_tensors[k + i * per_input_size]);
565
130
        PRINT(CCV_CLI_INFO, "\n");
566
130
      }
567
74
      ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, stream_0);
568
156
      for (k = 0; k < per_output_size; 
k++82
)
569
82
      {
570
82
        PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", 
k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(output_tensors[k]->info.type) : -1));
571
82
        if (output_tensors[k] && 
CCV_CLI_OUTPUT_LEVEL_IS74
(CCV_CLI_INFO))
572
0
          ccv_nnc_print_tensor_info(output_tensors[k]);
573
82
        PRINT(CCV_CLI_INFO, "\n");
574
82
      }
575
74
      if (stream_context && 
stream_044
)
576
42
      {
577
42
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
578
42
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
579
42
      }
580
74
      streams[i] = stream_0;
581
74
    }
582
23
    if (!stream_context)
583
39
      
for (i = 0; 9
i < parallel_count;
i++30
)
584
30
        if (streams[i])
585
30
          ccv_nnc_stream_context_wait(streams[i]);
586
15.3k
  } else {
587
30.8k
    for (i = 0; i < per_output_size; 
i++15.5k
)
588
15.5k
      output_tensors[i] = outputs[i] ? ccv_nnc_tensor_from_variable(graph, outputs[i], stream_context) : 
0197
;
589
15.3k
    PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size);
590
43.3k
    for (i = 0; i < per_input_size; 
i++27.9k
)
591
27.9k
    {
592
27.9k
      PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", 
i + 1, input_tensors[i], (input_tensors[i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(input_tensors[i]->info.type) : -1));
593
27.9k
      if (input_tensors[i] && 
CCV_CLI_OUTPUT_LEVEL_IS27.9k
(CCV_CLI_INFO))
594
0
        ccv_nnc_print_tensor_info(input_tensors[i]);
595
27.9k
      PRINT(CCV_CLI_INFO, "\n");
596
27.9k
    }
597
15.3k
    ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors, per_input_size, output_tensors, per_output_size, stream_context);
598
30.8k
    for (i = 0; i < per_output_size; 
i++15.5k
)
599
15.5k
    {
600
15.5k
      PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", 
i + 1, output_tensors[i], (output_tensors[i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(output_tensors[i]->info.type) : -1));
601
15.5k
      if (output_tensors[i] && 
CCV_CLI_OUTPUT_LEVEL_IS15.3k
(CCV_CLI_INFO))
602
0
        ccv_nnc_print_tensor_info(output_tensors[i]);
603
15.5k
      PRINT(CCV_CLI_INFO, "\n");
604
15.5k
    }
605
15.3k
  }
606
15.3k
  int inputs_are_constants = 1;
607
30.6k
  for (i = 0; inputs_are_constants && 
i < input_size15.3k
;
i++15.3k
)
608
15.3k
    if (inputs[i] && inputs[i]->type != CCV_NNC_TENSOR_CONSTANT)
609
15.3k
      inputs_are_constants = 0;
610
15.3k
  if (input_size > 0 && 
!inputs_are_constants15.3k
&&
!graph->no_grad15.3k
) // No need to record the execution if there is no input or we disabled gradient computation.
611
15.3k
  {
612
15.3k
    ccv_nnc_tensor_symbol_t output_symbols[ccv_max(1, output_size)];
613
30.8k
    for (i = 0; i < output_size; 
i++15.5k
)
614
15.5k
      if (outputs[i])
615
15.3k
      {
616
15.3k
        assert(outputs[i]->type != CCV_NNC_TENSOR_CONSTANT);
617
15.3k
        output_symbols[i] = _ccv_nnc_tensor_symbol_from_variable(graph, outputs[i]);
618
15.3k
      } else
619
205
        output_symbols[i] = NO_TENSOR_SYMBOL;
620
15.3k
    int t;
621
30.6k
    for (t = 0; t < parallel_count; 
t++15.3k
)
622
15.3k
    {
623
15.3k
      ccv_nnc_graph_exec_symbol_t graph_exec = ccv_nnc_graph_exec_symbol_new(graph->tape, cmd, input_symbols + t * per_input_size, per_input_size, output_symbols + t * per_output_size, per_output_size, 0);
624
15.3k
      if (graph_execs)
625
2.40k
        graph_execs[t] = graph_exec;
626
      // This needs to be done before we set the new sources on the outputs.
627
43.3k
      for (i = 0; i < per_input_size; 
i++28.0k
)
628
28.0k
      {
629
28.0k
        ccv_array_t* const input_source = input_sources[i + t * per_input_size];
630
28.0k
        if (input_source)
631
28.1k
          
for (j = 0; 14.0k
j < input_source->rnum;
j++14.0k
)
632
14.0k
            ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){
633
14.0k
              .d = *(int*)ccv_array_get(input_source, j),
634
14.0k
              .graph = graph->tape
635
14.0k
            }, graph_exec);
636
28.0k
        ccv_array_t* const input_alias_source = input_alias_sources[i + t * per_input_size];
637
28.0k
        if (input_alias_source)
638
2.02k
          
for (j = 0; 1.01k
j < input_alias_source->rnum;
j++1.01k
)
639
1.01k
            ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){
640
1.01k
              .d = *(int*)ccv_array_get(input_alias_source, j),
641
1.01k
              .graph = graph->tape
642
1.01k
            }, graph_exec);
643
28.0k
      }
644
43.3k
      for (i = 0; i < per_input_size; 
i++28.0k
)
645
28.0k
      {
646
28.0k
        ccv_nnc_tensor_variable_t const input = inputs[i + t * per_input_size];
647
28.0k
        if (!input || 
input->type == CCV_NNC_TENSOR_CONSTANT28.0k
)
648
231
          continue;
649
27.8k
        ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i + t * per_input_size].d);
650
27.8k
        if (!bind->destinations)
651
21.9k
          bind->destinations = ccv_array_new(sizeof(int), 1, 0);
652
27.8k
        ccv_array_add_unique_int(bind->destinations, graph_exec.d);
653
27.8k
        if (input->alias_index_ref)
654
1.01k
        {
655
1.01k
            const int alias_index = input->alias_index_ref - 1;
656
1.01k
            assert(alias_index >= 0);
657
1.01k
            ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
658
1.01k
            ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d);
659
1.01k
            if (!root_bind->destinations)
660
1.01k
              root_bind->destinations = ccv_array_new(sizeof(int), 1, 0);
661
1.01k
            ccv_array_add_unique_int(root_bind->destinations, graph_exec.d);
662
1.01k
        }
663
27.8k
      }
664
30.8k
      
for (i = 0; 15.3k
i < per_output_size;
i++15.5k
)
665
15.5k
      {
666
15.5k
        ccv_nnc_tensor_variable_t const output = outputs[i + t * per_output_size];
667
15.5k
        if (!output)
668
205
          continue;
669
15.3k
        ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, output_symbols[i + t * per_output_size].d);
670
15.3k
        assert(!bind->sources); // This is a new symbol, therefore, no binded sources associated yet.
671
15.3k
        bind->sources = ccv_array_new(sizeof(int), 1, 0);
672
15.3k
        ccv_array_add_unique_int(bind->sources, graph_exec.d);
673
15.3k
        if (output->alias_index_ref)
674
8
        {
675
8
          const int alias_index = output->alias_index_ref - 1;
676
8
          assert(alias_index >= 0);
677
8
          ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
678
8
          ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d);
679
8
          if (!root_bind->sources)
680
4
            root_bind->sources = ccv_array_new(sizeof(int), 1, 0);
681
8
          ccv_array_add_unique_int(root_bind->sources, graph_exec.d);
682
8
        }
683
15.3k
      }
684
15.3k
    }
685
15.3k
  }
686
  // Now, able to free some of the reused outputs.
687
15.7k
  
for (i = 0; 15.3k
i < freeable_size;
i++415
)
688
415
    ccv_nnc_tensor_variable_free(graph, freeables[i]);
689
15.3k
}
690
691
int ccv_nnc_dynamic_graph_exec(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context)
692
12.9k
{
693
12.9k
  ccv_nnc_dynamic_graph_exec_ret(graph, cmd, hint, flags, inputs, input_size, outputs, output_size, parallel, stream_context, 0);
694
12.9k
  return CCV_NNC_EXEC_SUCCESS;
695
12.9k
}
696
697
static int _ccv_nnc_tensor_variable_is_only_output(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_graph_bind_t* bind, const int symbol_d)
698
17.4k
{
699
17.4k
  if (bind->alias_ref)
700
1.01k
    bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, bind->alias_ref - 1);
701
17.4k
  if (!bind->sources || bind->sources->rnum == 0)
702
0
    return 1;
703
17.4k
  int i;
704
33.9k
  for (i = 0; i < bind->sources->rnum; 
i++16.4k
)
705
17.4k
  {
706
17.4k
    const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i);
707
17.4k
    const ccv_nnc_graph_exec_symbol_t exec_symbol = {
708
17.4k
      .d = exec_symbol_d,
709
17.4k
      .graph = graph->tape
710
17.4k
    };
711
17.4k
    const int* outputs; int output_size;
712
17.4k
    ccv_nnc_graph_exec_symbol_io(graph->tape, exec_symbol, 0, 0, &outputs, &output_size);
713
17.4k
    int j;
714
33.9k
    for (j = 0; j < output_size; 
j++16.4k
)
715
17.5k
      if (outputs[j] >= 0 && 
outputs[j] != symbol_d17.5k
) // If output is me, it is the only output.
716
1.04k
      {
717
1.04k
        assert(outputs[j] < graph->binds->rnum);
718
1.04k
        const ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
719
        // This is in use and is it not a constant symbol.
720
1.04k
        if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT23
)
721
23
          return 0;
722
1.01k
        if (other_bind->alias_ref) // If this is alias, use its original's destinations.
723
1
          other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
724
1.01k
        if (other_bind->destinations && 
other_bind->destinations->rnum > 01.01k
)
725
1.00k
          return 0;
726
1.01k
      }
727
17.4k
  }
728
16.4k
  return 1;
729
17.4k
}
730
731
static void _ccv_nnc_update_bind_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws)
732
24.9k
{
733
24.9k
  int i;
734
24.9k
  if (bind->destinations)
735
24.7k
  {
736
24.7k
    int flag = 0;
737
49.3k
    for (i = 0; !flag && 
i < bind->destinations->rnum24.7k
;
i++24.5k
)
738
24.5k
    {
739
24.5k
      const int exec_symbol_d = *(int*)ccv_array_get(bind->destinations, i);
740
24.5k
      if (exec_symbol_d == freed_exec_symbol_d)
741
24.5k
      {
742
24.5k
        if (i < bind->destinations->rnum - 1)
743
16
          *(int*)ccv_array_get(bind->destinations, i) = *(int*)ccv_array_get(bind->destinations, bind->destinations->rnum - 1);
744
24.5k
        --bind->destinations->rnum;
745
24.5k
        flag = 1;
746
24.5k
      }
747
24.5k
    }
748
    // This symbol can be freed.
749
24.7k
    if (flag && 
bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED24.5k
)
750
17.6k
    {
751
17.6k
      ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
752
17.6k
      if (bind->alias_ref)
753
1.01k
      {
754
1.01k
        root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1);
755
1.01k
        if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
756
1.00k
          root_bind = bind;
757
1.01k
      }
758
      // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more.
759
      // It is possible because exec will be freed already, thus, it is safe to remove this alias out.
760
17.6k
      if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED &&
761
17.6k
        
(17.6k
(17.6k
!root_bind->sources17.6k
||
root_bind->sources->rnum == 08.63k
) ||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)6.01k
) &&
762
17.6k
        
root_bind->destinations->rnum == 017.6k
)
763
17.6k
      {
764
17.6k
        if (root_bind->sources)
765
14.6k
          
for (i = 0; 8.63k
i < root_bind->sources->rnum;
i++6.00k
)
766
6.00k
            ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
767
17.6k
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
768
17.6k
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
769
17.6k
          .d = tensor_index,
770
17.6k
          .graph = graph->tape
771
17.6k
        });
772
17.6k
      } else 
if (8
bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED8
&& // Handle the case the bind is already freed, and it doesn't have any sources or destinations.
773
8
        bind->alias_ref && 
(2
!bind->sources2
||
bind->sources->rnum == 00
) &&
(2
!bind->destinations2
||
bind->destinations->rnum == 02
)) {
774
2
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
775
2
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
776
2
          .d = tensor_index,
777
2
          .graph = graph->tape
778
2
        });
779
2
      }
780
17.6k
    }
781
24.7k
  }
782
24.9k
}
783
784
static void _ccv_nnc_update_bind_sources_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws)
785
7.27k
{
786
7.27k
  int i;
787
7.27k
  if (bind->sources)
788
7.27k
  {
789
7.27k
    int flag = 0;
790
14.5k
    for (i = 0; !flag && 
i < bind->sources->rnum7.27k
;
i++7.27k
)
791
7.27k
    {
792
7.27k
      const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i);
793
7.27k
      if (exec_symbol_d == freed_exec_symbol_d)
794
7.27k
      {
795
7.27k
        if (i < bind->sources->rnum - 1)
796
2
          *(int*)ccv_array_get(bind->sources, i) = *(int*)ccv_array_get(bind->sources, bind->sources->rnum - 1);
797
7.27k
        --bind->sources->rnum;
798
7.27k
        flag = 1;
799
7.27k
      }
800
7.27k
    }
801
    // This symbol can be freed.
802
7.27k
    if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED)
803
2.43k
    {
804
2.43k
      ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
805
2.43k
      if (bind->alias_ref)
806
3
      {
807
3
        root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1);
808
3
        if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
809
0
          root_bind = bind;
810
3
      }
811
      // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more.
812
      // It is possible because exec will be freed already, thus, it is safe to remove this alias out.
813
2.43k
      if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED &&
814
2.43k
        
(2.43k
root_bind->sources->rnum == 02.43k
||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)0
) &&
815
2.43k
        
(2.43k
!root_bind->destinations2.43k
||
root_bind->destinations->rnum == 02.43k
))
816
6
      {
817
6
        for (i = 0; i < root_bind->sources->rnum; 
i++0
)
818
0
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
819
6
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
820
6
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
821
6
          .d = tensor_index,
822
6
          .graph = graph->tape
823
6
        });
824
2.43k
      } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations.
825
2.43k
        bind->alias_ref && 
(3
!bind->sources3
||
bind->sources->rnum == 03
) &&
(3
!bind->destinations3
||
bind->destinations->rnum == 00
)) {
826
3
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
827
3
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
828
3
          .d = tensor_index,
829
3
          .graph = graph->tape
830
3
        });
831
3
      }
832
2.43k
    }
833
7.27k
  }
834
7.27k
}
835
836
static void _ccv_nnc_update_bind_sources_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, const int* const inputs, const int input_size, const int* const outputs, const int output_size, ccv_array_t* const ws)
837
15.2k
{
838
15.2k
  int i;
839
43.2k
  for (i = 0; i < input_size; 
i++27.9k
)
840
27.9k
    if (inputs[i] >= 0 && inputs[i] < binds->rnum)
841
27.9k
    {
842
27.9k
      ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, inputs[i]);
843
27.9k
      if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
844
4.00k
        continue;
845
23.9k
      if (bind->alias_ref)
846
1.01k
      {
847
1.01k
        const int alias_to = bind->alias_ref - 1;
848
1.01k
        ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to);
849
1.01k
        if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE)
850
1.01k
          _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws);
851
1.01k
      }
852
23.9k
      _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, bind, inputs[i], ws);
853
23.9k
    }
854
  // Note that this works because there is no overlap of inputs / outputs. (What about alias?).
855
30.7k
  for (i = 0; i < output_size; 
i++15.4k
)
856
15.4k
    if (outputs[i] >= 0 && 
outputs[i] < binds->rnum15.2k
)
857
15.2k
    {
858
15.2k
      ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, outputs[i]);
859
15.2k
      if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
860
8.01k
        continue;
861
7.26k
      if (bind->alias_ref)
862
5
      {
863
5
        const int alias_to = bind->alias_ref - 1;
864
5
        ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to);
865
5
        if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE)
866
5
          _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws);
867
5
      }
868
7.26k
      _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, bind, outputs[i], ws);
869
7.26k
    }
870
15.2k
}
871
872
static void _ccv_nnc_stateful_exec_free_if_possible(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol)
873
15.2k
{
874
15.2k
  if (!graph->stateful_execs)
875
6.05k
    return;
876
9.22k
  assert(symbol.d >= 0);
877
9.22k
  ccv_array_t* const stateful_execs = graph->stateful_execs;
878
9.22k
  ccv_nnc_cmd_t cmd = ccv_nnc_graph_exec_symbol_cmd(graph->tape, symbol);
879
9.22k
  ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data;
880
9.22k
  if (!stateful_exec)
881
6.82k
    return;
882
  // If there is no backward, no need to apply gradients.
883
  // Otherwise, if we applied gradients, we can free it as well.
884
  // We don't free this stateful exec because apply gradients doesn't require any variables alive.
885
2.39k
  if (!stateful_exec->did_backward_but_not_apply_gradients)
886
298
  {
887
298
    const int index = stateful_exec->index;
888
298
    ccfree(stateful_exec);
889
298
    if (index < graph->reuse_stateful_exec || graph->reuse_stateful_exec < 0)
890
298
      graph->reuse_stateful_exec = index;
891
298
    *(ccv_nnc_stateful_exec_t**)ccv_array_get(stateful_execs, index) = 0;
892
298
  } else
893
2.10k
    stateful_exec->should_free = 1;
894
2.39k
}
895
896
void ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
897
32.0k
{
898
  // If it contains a symbol, this tensor variable is not a free variable. It is either used as input or output.
899
32.0k
  if (tensor_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
900
27.4k
  {
901
    // If it is not a free variable, when can we free the symbol and the underlying variable?
902
    // 1. There should be no sources (the command generate this tensor should be freed) or the output of these sources is only the current one;
903
    // 2. The destinations (the commands that uses this tensor) should have no other inputs, or the other inputs has no binded sources as well.
904
27.4k
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d);
905
    // There should be no source associated with it no more.
906
27.4k
    int free_symbol = 0;
907
    // I am free if no exec symbol is producing me or the symbol producing me can only producing me (thus, it is not required to
908
    // compute gradient because I am the only variable it can compute gradient for).
909
27.4k
    ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
910
27.4k
    if (bind->alias_ref)
911
1.03k
    {
912
1.03k
      const int alias_to = bind->alias_ref - 1;
913
1.03k
      root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, alias_to);
914
1.03k
    }
915
27.4k
    const int sources_and_is_only_output = (root_bind->sources && 
root_bind->sources->rnum > 016.2k
) &&
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d)11.4k
;
916
27.4k
    if (!root_bind->sources || 
root_bind->sources->rnum == 016.2k
||
sources_and_is_only_output11.4k
)
917
26.3k
    {
918
26.3k
      int i, j;
919
26.3k
      free_symbol = 1; // Assume we can free this symbol.
920
26.3k
      if (!graph->ws)
921
12
        graph->ws = ccv_array_new(sizeof(int), root_bind->destinations ? 
root_bind->destinations->rnum11
:
01
, 0);
922
26.3k
      ccv_array_t* const ws = graph->ws;
923
26.3k
      ccv_array_clear(ws);
924
26.3k
      if (root_bind->destinations)
925
43.4k
        
for (i = 0; 21.9k
i < root_bind->destinations->rnum;
i++21.5k
)
926
21.5k
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->destinations, i));
927
26.3k
      const int ws_init_size = ws->rnum;
928
      // Add all sources from root_bind, in case it has been freed (during update bind sources / destinations when free.
929
26.3k
      if (root_bind->sources)
930
25.7k
        
for (i = 0; 15.2k
i < root_bind->sources->rnum;
i++10.4k
)
931
10.4k
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
932
      // If we cannot loop over any exec symbols (this is not in use). It is simple to determine whether we want
933
      // to free it or not: if this is an alias and the origin is not freed, we cannot free this symbol.
934
26.3k
      if (ws_init_size == 0)
935
4.88k
        free_symbol = (!bind->alias_ref || 
root_bind->index < 017
);
936
      // Go through all the exec symbols use this tensor, to see whether they have inputs that has other sources.
937
47.9k
      for (i = 0; i < ws_init_size; 
i++21.5k
)
938
21.5k
      {
939
21.5k
        const int exec_symbol_d = *(int*)ccv_array_get(ws, i);
940
21.5k
        const ccv_nnc_graph_exec_symbol_t symbol = {
941
21.5k
          .d = exec_symbol_d,
942
21.5k
          .graph = graph->tape
943
21.5k
        };
944
21.5k
        const int* inputs; int input_size;
945
21.5k
        const int* outputs; int output_size;
946
21.5k
        ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size);
947
21.5k
        int flag = 0; // flag denotes whether there are cases to keep this exec symbol.
948
21.5k
        if (sources_and_is_only_output)
949
8.44k
        {
950
          // If there are sources, check whether we have outputs or not. If we do, we cannot free this.
951
16.8k
          for (j = 0; !flag && 
j < output_size8.45k
;
j++8.45k
)
952
8.45k
            if (outputs[j] >= 0 && 
outputs[j] < graph->binds->rnum8.45k
)
953
8.45k
            {
954
8.45k
              ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
955
8.45k
              if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT4.43k
)
956
4.43k
                flag = 1;
957
4.02k
              else {
958
4.02k
                if (other_bind->alias_ref) // If this is alias, use its original's destinations.
959
0
                  other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
960
4.02k
                flag = (other_bind->destinations && other_bind->destinations->rnum > 0);
961
4.02k
              }
962
8.45k
            }
963
13.0k
        } else {
964
          // If there is no sources, check if other sources can depend on this exec, if they do, we cannot free this.
965
36.8k
          for (j = 0; !flag && 
j < input_size28.5k
;
j++23.7k
)
966
23.7k
            if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum && inputs[j] != tensor_variable->symbol.d)
967
14.4k
            {
968
14.4k
              ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]);
969
14.4k
              if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT6.23k
)
970
6.21k
                flag = 1;
971
8.25k
              else {
972
8.25k
                if (other_bind->alias_ref) // If this is alias, use its original's destinations.
973
4
                  other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
974
8.25k
                flag = (other_bind->sources && 
other_bind->sources->rnum > 02.22k
);
975
8.25k
              }
976
14.4k
            }
977
13.0k
        }
978
        // This exec can be freed if there is no input required or there is no output required.
979
21.5k
        free_symbol = (free_symbol && !flag);
980
21.5k
        if (!flag)
981
4.85k
        {
982
          // Go over inputs and remove all references from binded destinations.
983
          // and go over outputs remove all references from binded sources.
984
4.85k
          _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws);
985
4.85k
          const int* outgoings; int outgoing_size;
986
4.85k
          ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size);
987
7.29k
          for (j = 0; j < outgoing_size; 
j++2.43k
)
988
2.43k
            ccv_array_add_unique_int(ws, outgoings[j]);
989
4.85k
          _ccv_nnc_stateful_exec_free_if_possible(graph, symbol);
990
4.85k
          ccv_nnc_graph_exec_symbol_free(graph->tape, symbol);
991
4.85k
        }
992
21.5k
      }
993
26.3k
      if (free_symbol)
994
9.71k
      {
995
9.71k
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
996
9.71k
        ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
997
        // Now, go over the outgoings, if it is removed, add more to it. Note that the ws array can grow while iterating over.
998
22.5k
        for (i = ws_init_size; i < ws->rnum; 
i++12.8k
)
999
12.8k
        {
1000
12.8k
          const int exec_symbol_d = *(int*)ccv_array_get(ws, i);
1001
12.8k
          const ccv_nnc_graph_exec_symbol_t symbol = {
1002
12.8k
            .d = exec_symbol_d,
1003
12.8k
            .graph = graph->tape
1004
12.8k
          };
1005
12.8k
          const int* inputs; int input_size;
1006
12.8k
          const int* outputs; int output_size;
1007
12.8k
          ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size);
1008
12.8k
          int flag = 0;
1009
29.9k
          for (j = 0; !flag && 
j < input_size19.5k
;
j++17.0k
)
1010
17.0k
            if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum)
1011
17.0k
            {
1012
17.0k
              ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]);
1013
17.0k
              if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT4.63k
)
1014
4.43k
                flag = 1;
1015
12.6k
              else {
1016
12.6k
                if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1017
1.02k
                  other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1018
12.6k
                flag = (other_bind->sources && 
other_bind->sources->rnum > 010.4k
);
1019
12.6k
              }
1020
17.0k
            }
1021
12.8k
          if (flag) // If any inputs make free this destination impossible. Check whether all its outputs are done.
1022
10.4k
          {
1023
10.4k
            int output_flag = 0;
1024
21.0k
            for (j = 0; !output_flag && 
j < output_size18.6k
;
j++10.6k
)
1025
10.6k
              if (outputs[j] >= 0 && 
outputs[j] < graph->binds->rnum10.4k
)
1026
10.4k
              {
1027
10.4k
                ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
1028
10.4k
                if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT2.40k
)
1029
2.40k
                  output_flag = 1;
1030
8.03k
                else {
1031
8.03k
                  if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1032
0
                    other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1033
8.03k
                  output_flag = (other_bind->destinations && 
other_bind->destinations->rnum > 020
);
1034
8.03k
                }
1035
10.4k
              }
1036
10.4k
            if (!output_flag) // If no output is used (used means it has a tensor variable, or it has a destination).
1037
8.01k
              flag = 0;
1038
10.4k
          }
1039
          // Went over all the inputs, it turns out no more inputs has other references, safe to remove.
1040
12.8k
          if (!flag)
1041
10.4k
          {
1042
10.4k
            _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws);
1043
10.4k
            const int* outgoings; int outgoing_size;
1044
10.4k
            ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size);
1045
            // It it has outgoings, add that for further inspection.
1046
12.8k
            for (j = 0; j < outgoing_size; 
j++2.40k
)
1047
2.40k
              ccv_array_add_unique_int(ws, outgoings[j]);
1048
10.4k
            _ccv_nnc_stateful_exec_free_if_possible(graph, symbol);
1049
10.4k
            ccv_nnc_graph_exec_symbol_free(graph->tape, symbol);
1050
10.4k
          }
1051
12.8k
        }
1052
9.71k
      }
1053
26.3k
    }
1054
    // If this symbol is not freed, move the tensor view to the bind.
1055
27.4k
    if (!free_symbol)
1056
17.7k
    {
1057
      // If current bind is an alias, and it doesn't have any sources or destinations. We cannot find this alias
1058
      // through any exec. This is not only safe to delete, but has to be deleted. We don't need to handle this
1059
      // if free_symbol is true, because when that happens, root_bind will be deleted, and we will clean up the
1060
      // alias in that process.
1061
17.7k
      if (bind->alias_ref && 
(1.03k
!bind->sources1.03k
||
bind->sources->rnum == 05
) &&
(1.02k
!bind->destinations1.02k
||
bind->destinations->rnum == 01.01k
))
1062
18
      {
1063
18
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
1064
18
        ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
1065
17.6k
      } else {
1066
17.6k
        bind->index = CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED; // This tensor variable will be freed, but this symbol extra will continue exists.
1067
17.6k
        bind->destructor_hook.func = tensor_variable->destructor_hook.func; // Transfer the destructor callback.
1068
17.6k
        bind->destructor_hook.context = tensor_variable->destructor_hook.context; // Transfer the destructor callback context.
1069
17.6k
        bind->tensor_view = tensor_variable->tensor_view; // Transfer the ownership to the bind.
1070
17.6k
        tensor_variable->tensor_view = 0;
1071
17.6k
      }
1072
17.7k
    }
1073
27.4k
  }
1074
32.0k
  _ccv_nnc_tensor_variable_free(graph, tensor_variable, 1);
1075
32.0k
}
1076
1077
void ccv_nnc_dynamic_graph_has_effect_to_tensor_variables(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t* const source_variables, const int source_variable_size, const ccv_nnc_tensor_variable_t* const destination_variables, const int destination_variable_size, uint64_t* const bitmask)
1078
3
{
1079
3
  int i, j;
1080
3
  ccv_array_t* const sources_destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), source_variable_size + destination_variable_size, 0);
1081
13
  for (i = 0; i < source_variable_size; 
i++10
)
1082
10
  {
1083
10
    if (source_variables[i]->symbol.d < 0)
1084
0
      continue;
1085
10
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d);
1086
10
    if (bind->destinations && 
bind->destinations->rnum > 08
)
1087
21
      
for (j = 0; 8
j < bind->destinations->rnum;
j++13
)
1088
13
      {
1089
        // It is ok to have duplicate symbols.
1090
13
        const int d = *(int*)ccv_array_get(bind->destinations, j);
1091
13
        ccv_nnc_graph_exec_symbol_t symbol = {
1092
13
          .d = d,
1093
13
          .graph = graph->tape
1094
13
        };
1095
13
        ccv_array_push(sources_destinations, &symbol);
1096
13
      }
1097
10
  }
1098
3
  const int source_size = sources_destinations->rnum;
1099
6
  for (i = 0; i < destination_variable_size; 
i++3
)
1100
3
  {
1101
3
    if (destination_variables[i]->symbol.d < 0)
1102
0
      continue;
1103
3
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, destination_variables[i]->symbol.d);
1104
3
    if (bind->sources && bind->sources->rnum > 0)
1105
6
      
for (j = 0; 3
j < bind->sources->rnum;
j++3
)
1106
3
      {
1107
        // It is ok to have duplicate symbols.
1108
3
        const int d = *(int*)ccv_array_get(bind->sources, j);
1109
3
        ccv_nnc_graph_exec_symbol_t symbol = {
1110
3
          .d = d,
1111
3
          .graph = graph->tape
1112
3
        };
1113
3
        ccv_array_push(sources_destinations, &symbol);
1114
3
      }
1115
3
  }
1116
3
  const int destination_size = sources_destinations->rnum - source_size;
1117
3
  if (source_size == 0 || destination_size == 0)
1118
0
  {
1119
0
    ccv_array_free(sources_destinations);
1120
0
    return;
1121
0
  }
1122
3
  const int bitmask_size = ((source_size + 63) >> 6);
1123
3
  assert(bitmask_size < 256);
1124
3
  uint64_t exec_bitmask[bitmask_size];
1125
3
  ccv_nnc_symbolic_graph_sources_to_destinations(graph->tape, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, 0), source_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, source_size), destination_size, exec_bitmask);
1126
3
  int k = 0;
1127
13
  for (i = 0; i < source_variable_size; 
i++10
)
1128
10
  {
1129
10
    if (source_variables[i]->symbol.d < 0)
1130
0
    {
1131
0
      bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63));
1132
0
      continue;
1133
0
    }
1134
10
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d);
1135
10
    int flag = 0;
1136
10
    if (bind->destinations && 
bind->destinations->rnum > 08
)
1137
8
    {
1138
8
      assert(k <= source_size - bind->destinations->rnum);
1139
18
      
for (j = 0; 8
!flag &&
j < bind->destinations->rnum11
;
j++10
)
1140
10
        flag = (((uint64_t)1 << ((k + j) & 63)) & exec_bitmask[(k + j) >> 6]);
1141
8
      k += bind->destinations->rnum;
1142
8
    }
1143
10
    if (flag)
1144
7
      bitmask[i >> 6] |= ((uint64_t)1 << (i & 63));
1145
3
    else
1146
3
      bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63));
1147
10
  }
1148
3
  ccv_array_free(sources_destinations);
1149
3
}
1150
1151
int ccv_nnc_dynamic_graph_bookkeeping_count(const ccv_nnc_dynamic_graph_t* const graph, const int type)
1152
407
{
1153
407
  return ccv_nnc_symbolic_graph_active_symbol_count(graph->tape, type);
1154
407
}
1155
1156
void ccv_nnc_dynamic_graph_dot(const ccv_nnc_dynamic_graph_t* const graph, const int flags, FILE* out)
1157
415
{
1158
415
  ccv_nnc_symbolic_graph_dot(graph->tape, flags, out);
1159
415
}