Coverage Report

Created: 2021-04-07 21:56

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc_dynamic_graph.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_nnc_easy.h"
5
#include "ccv_internal.h"
6
#include "_ccv_nnc_dynamic_graph.h"
7
8
// MARK - Level-4 API
9
10
ccv_nnc_dynamic_graph_t* ccv_nnc_dynamic_graph_new(void)
11
41
{
12
41
  ccv_nnc_dynamic_graph_t* graph = ccmalloc(sizeof(ccv_nnc_dynamic_graph_t));
13
41
  graph->no_grad = 0;
14
41
  graph->reuse_var = -1;
15
41
  graph->mp_hdr = -1;
16
41
  graph->vars = ccv_array_new(sizeof(ccv_nnc_tensor_variable_t), 1, 0);
17
41
  graph->binds = ccv_array_new(sizeof(ccv_nnc_tensor_variable_graph_bind_t), 1, 0);
18
41
  graph->tape = ccv_nnc_symbolic_graph_new();
19
41
  graph->freed = kh_init(dy_str);
20
41
  graph->allocd = kh_init(dy_alloc);
21
41
  // These may not be used as frequent, init as needed.
22
41
  graph->stateful_execs = 0;
23
41
  graph->reuse_stateful_exec = -1;
24
41
  graph->synced_streams = 0;
25
41
  graph->ws = 0;
26
41
  return graph;
27
41
}
28
29
static void _ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int zeroing)
30
32.2k
{
31
32.2k
  const int index = tensor_variable->index;
32
32.2k
  if (tensor_variable->tensor_view)
33
14.6k
  {
34
14.6k
    if (tensor_variable->destructor_hook.func)
35
4
      tensor_variable->destructor_hook.func(graph, (ccv_nnc_tensor_t*)tensor_variable->tensor_view, tensor_variable->destructor_hook.context);
36
14.6k
    if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view))
37
14.6k
    {
38
14.1k
      if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))
39
14.1k
        
ccv_nnc_tensor_view_free(tensor_variable->tensor_view)9
;
40
14.1k
      else {
41
14.1k
        if (!tensor_variable->alias_index_ref && // Return this memory to the graph.
42
14.1k
          
CCV_TENSOR_GET_MEMORY14.1k
(tensor_variable->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY14.1k
)
43
404
          ccv_nnc_dynamic_graph_xpu_free(graph, tensor_variable->tensor_view->data.ptr);
44
14.1k
        ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view);
45
14.1k
      }
46
14.1k
    }
47
14.6k
  }
48
32.2k
  ccfree(tensor_variable);
49
32.2k
  if (zeroing)
50
32.0k
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, index) = 0;
51
32.2k
  int i;
52
64.3k
  for (i = graph->vars->rnum - 1; i >= 0; 
i--32.0k
)
53
64.1k
    if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) != 0)
54
32.0k
    {
55
32.0k
      graph->vars->rnum = i + 1;
56
32.0k
      break;
57
32.0k
    }
58
32.2k
  if (index < graph->vars->rnum &&
59
32.2k
    
(23.2k
index < graph->reuse_var23.2k
||
graph->reuse_var < 017.2k
))
60
11.8k
    graph->reuse_var = index;
61
20.4k
  else if (graph->reuse_var >= graph->vars->rnum)
62
5.61k
    graph->reuse_var = -1;
63
32.2k
}
64
65
static void _ccv_nnc_tensor_variable_graph_bind_free(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int zeroing)
66
27.7k
{
67
27.7k
  bind->index = CCV_NNC_TENSOR_NO_VARIABLE;
68
27.7k
  if (bind->sources)
69
15.3k
    ccv_array_free(bind->sources);
70
27.7k
  if (bind->destinations)
71
22.9k
    ccv_array_free(bind->destinations);
72
27.7k
  if (bind->tensor_view)
73
17.6k
  {
74
17.6k
    if (bind->destructor_hook.func)
75
3
      bind->destructor_hook.func(graph, (ccv_nnc_tensor_t*)bind->tensor_view, bind->destructor_hook.context);
76
17.6k
    if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(bind->tensor_view))
77
17.6k
    {
78
17.4k
      if (CCV_IS_TENSOR_VIEW(bind->tensor_view))
79
17.4k
        
ccv_nnc_tensor_view_free(bind->tensor_view)2
;
80
17.4k
      else {
81
17.4k
        if (!bind->alias_ref && // Return this memory to the graph.
82
17.4k
          
CCV_TENSOR_GET_MEMORY16.4k
(bind->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY16.4k
)
83
32
          ccv_nnc_dynamic_graph_xpu_free(graph, bind->tensor_view->data.ptr);
84
17.4k
        ccv_nnc_tensor_free((ccv_nnc_tensor_t*)bind->tensor_view);
85
17.4k
      }
86
17.4k
    }
87
17.6k
  }
88
27.7k
  if (zeroing)
89
27.4k
  {
90
27.4k
    bind->sources = 0;
91
27.4k
    bind->destinations = 0;
92
27.4k
    bind->tensor_view = 0;
93
27.4k
    bind->destructor_hook.func = 0;
94
27.4k
    bind->destructor_hook.context = 0;
95
27.4k
  }
96
27.7k
}
97
98
void ccv_nnc_dynamic_graph_free(ccv_nnc_dynamic_graph_t* const graph)
99
41
{
100
41
  int i;
101
282
  for (i = 0; i < graph->vars->rnum; 
i++241
)
102
241
  {
103
241
    ccv_nnc_tensor_variable_t tensor_variable = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i);
104
241
    if (tensor_variable)
105
207
      _ccv_nnc_tensor_variable_free(graph, tensor_variable, 0);
106
241
  }
107
41
  ccv_array_free(graph->vars);
108
346
  for (i = 0; i < graph->binds->rnum; 
i++305
)
109
305
    _ccv_nnc_tensor_variable_graph_bind_free(graph, (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i), 0);
110
41
  ccv_array_free(graph->binds);
111
41
  ccv_nnc_symbolic_graph_free(graph->tape);
112
41
  if (graph->ws)
113
27
    ccv_array_free(graph->ws);
114
41
  if (graph->stateful_execs)
115
8
  {
116
25
    for (i = 0; i < graph->stateful_execs->rnum; 
i++17
)
117
17
    {
118
17
      ccv_nnc_stateful_exec_t* const stateful_exec = *(ccv_nnc_stateful_exec_t**)ccv_array_get(graph->stateful_execs, i);
119
17
      if (stateful_exec)
120
17
        
ccfree5
(stateful_exec)5
;
121
17
    }
122
8
    ccv_array_free(graph->stateful_execs);
123
8
  }
124
41
  if (graph->synced_streams)
125
10
  {
126
10
    khiter_t k;
127
58
    for (k = 
kh_begin10
(graph->synced_streams); k != kh_end(graph->synced_streams);
++k48
)
128
48
    {
129
48
      if (!kh_exist(graph->synced_streams, k))
130
48
        
continue25
;
131
23
      ccv_nnc_synced_stream_t* const synced_stream = &kh_val(graph->synced_streams, k);
132
23
      ccv_nnc_stream_context_free(synced_stream->stream);
133
23
      ccv_nnc_stream_signal_free(synced_stream->synced);
134
23
    }
135
10
    kh_destroy(synced_stream, graph->synced_streams);
136
10
  }
137
41
  ccv_nnc_dynamic_graph_xpu_alloc_destroy(graph);
138
41
  ccfree(graph);
139
41
}
140
141
void ccv_nnc_tensor_variable_set(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_t* const tensor)
142
639
{
143
639
  assert(!tensor_variable->alias_index_ref);
144
639
  if (tensor_variable->tensor_view && 
!1
CCV_NNC_IS_EXTERN_TENSOR_VIEW1
(tensor_variable->tensor_view))
145
639
  {
146
0
    assert(!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view));
147
0
    ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view);
148
0
  }
149
639
  tensor_variable->info = tensor->info;
150
639
  tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)((uintptr_t)tensor | 1);
151
639
}
152
153
void ccv_nnc_tensor_variable_destructor_hook(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_destructor_f func, void* const context)
154
7
{
155
7
  tensor_variable->destructor_hook.func = func;
156
7
  tensor_variable->destructor_hook.context = context;
157
7
}
158
159
inline static void _ccv_nnc_tensor_variable_init(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_param_t info)
160
31.2k
{
161
31.2k
  tensor_variable->alias_index_ref = 0;
162
31.2k
  tensor_variable->destructor_hook.func = 0;
163
31.2k
  tensor_variable->destructor_hook.context = 0;
164
31.2k
  tensor_variable->info = info;
165
31.2k
  tensor_variable->symbol = NO_TENSOR_SYMBOL;
166
31.2k
  tensor_variable->tensor_view = 0;
167
31.2k
  if (graph->reuse_var >= 0)
168
802
  {
169
802
    const int reuse_var = graph->reuse_var;
170
802
    assert(reuse_var < graph->vars->rnum);
171
802
    tensor_variable->index = reuse_var;
172
802
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = tensor_variable;
173
802
    int i;
174
802
    graph->reuse_var = -1;
175
1.43k
    for (i = reuse_var + 1; i < graph->vars->rnum && 
graph->reuse_var < 01.02k
;
i++631
)
176
631
      if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0)
177
591
        graph->reuse_var = i;
178
30.4k
  } else {
179
30.4k
    tensor_variable->index = graph->vars->rnum;
180
30.4k
    ccv_array_push(graph->vars, &tensor_variable);
181
30.4k
  }
182
31.2k
}
183
184
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info)
185
31.2k
{
186
31.2k
  ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
187
31.2k
  tensor_variable->type = CCV_NNC_TENSOR_VARIABLE;
188
31.2k
  _ccv_nnc_tensor_variable_init(graph, tensor_variable, info);
189
31.2k
  return tensor_variable;
190
31.2k
}
191
192
ccv_nnc_tensor_variable_t ccv_nnc_tensor_constant_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info)
193
29
{
194
29
  ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
195
29
  tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
196
29
  _ccv_nnc_tensor_variable_init(graph, tensor_variable, info);
197
29
  return tensor_variable;
198
29
}
199
200
int ccv_nnc_tensor_variable_is_constant(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
201
0
{
202
0
  return tensor_variable->type == CCV_NNC_TENSOR_CONSTANT;
203
0
}
204
205
ccv_nnc_tensor_param_t ccv_nnc_tensor_variable_params(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
206
0
{
207
0
  return tensor_variable->info;
208
0
}
209
210
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_alias_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info)
211
1.03k
{
212
1.03k
  assert(!tensor_variable->alias_index_ref);
213
1.03k
  ccv_nnc_tensor_variable_t variable_alias = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
214
1.03k
  variable_alias->type = tensor_variable->type;
215
1.03k
  variable_alias->alias_index_ref = tensor_variable->index + 1;
216
1.03k
  variable_alias->info = info;
217
1.03k
  variable_alias->symbol = NO_TENSOR_SYMBOL;
218
1.03k
  variable_alias->destructor_hook.func = 0;
219
1.03k
  variable_alias->destructor_hook.context = 0;
220
1.03k
  variable_alias->tensor_view = 0;
221
1.03k
  memcpy(variable_alias->ofs, ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
222
1.03k
  memcpy(variable_alias->inc, inc, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
223
1.03k
  if (graph->reuse_var >= 0)
224
0
  {
225
0
    const int reuse_var = graph->reuse_var;
226
0
    assert(reuse_var < graph->vars->rnum);
227
0
    variable_alias->index = reuse_var;
228
0
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = variable_alias;
229
0
    int i;
230
0
    graph->reuse_var = -1;
231
0
    for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++)
232
0
      if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0)
233
0
        graph->reuse_var = i;
234
1.03k
  } else {
235
1.03k
    variable_alias->index = graph->vars->rnum;
236
1.03k
    ccv_array_push(graph->vars, &variable_alias);
237
1.03k
  }
238
1.03k
  return variable_alias;
239
1.03k
}
240
241
ccv_nnc_tensor_t* ccv_nnc_tensor_from_variable_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_stream_context_t* const stream_context)
242
68.9k
{
243
68.9k
  if (tensor_variable->tensor_view)
244
37.3k
  {
245
37.3k
    if (tensor_variable->alias_index_ref)
246
1.02k
    {
247
1.02k
      const int alias_index = tensor_variable->alias_index_ref - 1;
248
1.02k
      assert(alias_index >= 0);
249
1.02k
      ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
250
1.02k
      if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))
251
1.02k
      {
252
1
        ccv_nnc_tensor_view_t* const tv = tensor_variable->tensor_view;
253
1
        // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid.
254
1
        assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv));
255
1
        // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed.
256
1
        tv->data.u8 = CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8 + tv->off;
257
1.01k
      } else {
258
1.01k
        ccv_nnc_tensor_t* const tv = (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
259
1.01k
        // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid.
260
1.01k
        assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv));
261
1.01k
        // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed.
262
1.01k
        tv->data.u8 = CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8;
263
1.01k
      }
264
1.02k
    }
265
37.3k
    return (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(tensor_variable->tensor_view);
266
31.6k
  }
267
31.6k
  if (!tensor_variable->alias_index_ref)
268
30.6k
  {
269
30.6k
    // If we haven't allocated tensor_variable, we cannot allocate them now (because no shape specified), return 0.
270
30.6k
    if (ccv_nnc_is_tensor_auto(tensor_variable->info))
271
0
      return 0;
272
30.6k
    void* ptr = 0;
273
30.6k
    if (CCV_TENSOR_GET_MEMORY(tensor_variable->info.type) == CCV_TENSOR_GPU_MEMORY)
274
436
      ptr = ccv_nnc_dynamic_graph_xpu_alloc(graph, CCV_TENSOR_GET_DEVICE_ID(tensor_variable->info.type), stream_context, ccv_nnc_tensor_data_size(tensor_variable->info));
275
30.6k
    tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, tensor_variable->info, 0);
276
30.6k
    assert(tensor_variable->tensor_view->data.u8);
277
30.6k
    return (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
278
1.03k
  }
279
1.03k
  const int alias_index = tensor_variable->alias_index_ref - 1;
280
1.03k
  assert(alias_index >= 0);
281
1.03k
  ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
282
1.03k
  assert(!variable_to->alias_index_ref);
283
1.03k
  if (!variable_to->tensor_view)
284
3
  {
285
3
    // If we haven't allocated variable_to, we cannot allocate them now (because no shape specified), return 0.
286
3
    if (ccv_nnc_is_tensor_auto(variable_to->info))
287
0
      return 0;
288
3
    void* ptr = 0;
289
3
    assert(variable_to->info.type == tensor_variable->info.type);
290
3
    if (CCV_TENSOR_GET_MEMORY(variable_to->info.type) == CCV_TENSOR_GPU_MEMORY)
291
0
      ptr = ccv_nnc_dynamic_graph_xpu_alloc(graph, CCV_TENSOR_GET_DEVICE_ID(variable_to->info.type), stream_context, ccv_nnc_tensor_data_size(variable_to->info));
292
3
    variable_to->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, variable_to->info, 0);
293
3
    assert(variable_to->tensor_view->data.u8);
294
3
  }
295
1.03k
  int no_ofs = 1;
296
1.03k
  int i;
297
13.4k
  for (i = 0; no_ofs && 
i < 13.4k
CCV_NNC_MAX_DIM_ALLOC13.4k
;
i++12.4k
)
298
12.4k
    no_ofs = (tensor_variable->ofs[i] == 0);
299
1.03k
  int no_inc = 1;
300
2.39k
  for (i = 0; no_inc && 
i < 1.38k
CCV_NNC_MAX_DIM_ALLOC1.38k
;
i++1.35k
)
301
1.35k
    no_inc = (tensor_variable->inc[i] == 0);
302
1.03k
  if (!no_inc)
303
1.01k
    no_inc = (memcmp(tensor_variable->inc, tensor_variable->info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC) == 0);
304
1.03k
  assert(ccv_nnc_tensor_count(tensor_variable->info) <= ccv_nnc_tensor_count(variable_to->info));
305
1.03k
  if (no_ofs && 
no_inc1.03k
)
306
1.02k
    tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tensor_variable->info, 0);
307
11
  else
308
11
    tensor_variable->tensor_view = ccv_nnc_tensor_view_new((ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(variable_to->tensor_view), tensor_variable->info, tensor_variable->ofs, no_inc ? 
tensor_variable->info.dim1
:
tensor_variable->inc10
);
309
1.03k
  return (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
310
1.03k
}
311
312
static void _ccv_nnc_tensor_symbol_extra_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_symbol_t symbol)
313
27.5k
{
314
27.5k
  if (symbol.d >= graph->binds->rnum)
315
305
  {
316
305
    const int rnum = graph->binds->rnum;
317
305
    ccv_array_resize(graph->binds, symbol.d + 1);
318
305
    int i;
319
610
    for (i = rnum; i < graph->binds->rnum; 
i++305
)
320
305
      ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i))->index = CCV_NNC_TENSOR_NO_VARIABLE;
321
305
  }
322
27.5k
  ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, symbol.d);
323
27.5k
  bind->type = tensor_variable->type;
324
27.5k
  bind->index = tensor_variable->index;
325
27.5k
  if (tensor_variable->alias_index_ref)
326
1.03k
  {
327
1.03k
    const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){
328
1.03k
      .d = symbol.d,
329
1.03k
      .graph = graph->tape
330
1.03k
    });
331
1.03k
    assert(alias_to.d >= 0 && alias_to.d < graph->binds->rnum);
332
1.03k
    bind->alias_ref = alias_to.d + 1;
333
1.03k
  } else
334
26.5k
    bind->alias_ref = 0;
335
27.5k
  if (bind->sources)
336
0
    ccv_array_free(bind->sources);
337
27.5k
  bind->sources = 0;
338
27.5k
  if (bind->destinations)
339
0
    ccv_array_free(bind->destinations);
340
27.5k
  bind->destinations = 0;
341
27.5k
  bind->destructor_hook.func = 0;
342
27.5k
  bind->destructor_hook.context = 0;
343
27.5k
  bind->tensor_view = 0;
344
27.5k
}
345
346
static ccv_nnc_tensor_symbol_t _ccv_nnc_tensor_symbol_from_variable(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
347
44.5k
{
348
44.5k
  if (tensor_variable->symbol.d >= 0)
349
16.9k
    return tensor_variable->symbol;
350
27.5k
  if (!tensor_variable->alias_index_ref)
351
26.5k
  {
352
26.5k
    const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_new(graph->tape, tensor_variable->info, 0);
353
26.5k
    _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol);
354
26.5k
    return symbol;
355
26.5k
  }
356
1.03k
  const int alias_index = tensor_variable->alias_index_ref - 1;
357
1.03k
  assert(alias_index >= 0);
358
1.03k
  ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
359
1.03k
  assert(!variable_to->alias_index_ref);
360
1.03k
  int no_inc = 1;
361
1.03k
  int i;
362
2.37k
  for (i = 0; no_inc && 
i < 1.36k
CCV_NNC_MAX_DIM_ALLOC1.36k
;
i++1.33k
)
363
1.33k
    no_inc = (tensor_variable->inc[i] == 0);
364
1.03k
  const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_alias_new(graph->tape, _ccv_nnc_tensor_symbol_from_variable(graph, variable_to), tensor_variable->ofs, no_inc ? 
tensor_variable->info.dim27
:
tensor_variable->inc1.01k
, tensor_variable->info, 0);
365
1.03k
  _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol);
366
1.03k
  return symbol;
367
1.03k
}
368
369
// Return the tensor variable that is old (the provided tensor variable will have a new setting).
370
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_exchange_new(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable)
371
6.27k
{
372
6.27k
  struct ccv_nnc_tensor_variable_s x = *tensor_variable;
373
6.27k
  ccv_nnc_tensor_variable_t new_variable;
374
6.27k
  // Need to handle alias.
375
6.27k
  if (x.alias_index_ref)
376
0
    new_variable = ccv_nnc_tensor_variable_alias_new(graph, *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, x.alias_index_ref - 1), x.ofs, x.inc, x.info);
377
6.27k
  else
378
6.27k
    new_variable = ccv_nnc_tensor_variable_new(graph, x.info);
379
6.27k
  *tensor_variable = *new_variable;
380
6.27k
  *new_variable = x;
381
6.27k
  // The index should be the same though.
382
6.27k
  const int index = new_variable->index;
383
6.27k
  new_variable->index = tensor_variable->index;
384
6.27k
  if (new_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
385
2.84k
  {
386
2.84k
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, new_variable->symbol.d);
387
2.84k
    bind->index = new_variable->index;
388
2.84k
  }
389
6.27k
  tensor_variable->index = index;
390
6.27k
  return new_variable;
391
6.27k
}
392
393
void ccv_nnc_dynamic_graph_set_no_grad(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int no_grad)
394
8
{
395
8
  dynamic_graph->no_grad = no_grad;
396
8
}
397
398
static ccv_nnc_synced_stream_t _ccv_nnc_dynamic_graph_get_synced_stream(ccv_nnc_dynamic_graph_t* const graph, const int type)
399
72
{
400
72
  if (!graph->synced_streams)
401
10
    graph->synced_streams = kh_init(synced_stream);
402
72
  int ret = 0;
403
72
  khiter_t k = kh_put(synced_stream, graph->synced_streams, type, &ret);
404
72
  assert(ret >= 0);
405
72
  ccv_nnc_synced_stream_t* const synced_stream = &kh_val(graph->synced_streams, k);
406
72
  // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
407
72
  if (ret != 0)
408
23
  {
409
23
    synced_stream->stream = ccv_nnc_stream_context_new(type);
410
23
    synced_stream->synced = ccv_nnc_stream_signal_new(type);
411
23
  }
412
72
  return *synced_stream;
413
72
}
414
415
typedef struct {
416
  ccv_nnc_dynamic_graph_t* graph;
417
  int stream_type;
418
} ccv_nnc_dynamic_graph_neighbor_context_discovery_t;
419
420
static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_neighbor_context_discovery(const int device_id, void* const context)
421
0
{
422
0
  ccv_nnc_dynamic_graph_neighbor_context_discovery_t* const discovery = (ccv_nnc_dynamic_graph_neighbor_context_discovery_t*)context;
423
0
  int type = discovery->stream_type;
424
0
  CCV_STREAM_SET_DEVICE_ID(type, device_id);
425
0
  return _ccv_nnc_dynamic_graph_get_synced_stream(discovery->graph, type).stream;
426
0
}
427
428
void ccv_nnc_dynamic_graph_exec_ret(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context, ccv_nnc_graph_exec_symbol_t* const graph_execs)
429
15.3k
{
430
15.3k
  int i, j;
431
43.4k
  for (i = 0; i < input_size; 
i++28.1k
)
432
28.1k
    if (inputs[i] && 
!inputs[i]->alias_index_ref28.1k
)
433
27.0k
      { assert(inputs[i]->tensor_view); }
434
15.3k
  ccv_nnc_tensor_t* input_tensors[ccv_max(1, input_size)];
435
43.4k
  for (i = 0; i < input_size; 
i++28.1k
)
436
28.1k
    input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(graph, inputs[i], stream_context) : 
01
;
437
15.3k
  ccv_nnc_tensor_symbol_t input_symbols[ccv_max(1, input_size)];
438
43.4k
  for (i = 0; i < input_size; 
i++28.1k
)
439
28.1k
    input_symbols[i] = inputs[i] ? 
_ccv_nnc_tensor_symbol_from_variable(graph, inputs[i])28.1k
:
NO_TENSOR_SYMBOL1
;
440
15.3k
  ccv_array_t* input_sources[ccv_max(1, input_size)];
441
15.3k
  ccv_array_t* input_alias_sources[ccv_max(1, input_size)];
442
43.4k
  for (i = 0; i < input_size; 
i++28.1k
)
443
28.1k
  {
444
28.1k
    input_sources[i] = input_symbols[i].d != CCV_NNC_NO_TENSOR_SYMBOL ? 
((ccv_nnc_tensor_variable_graph_bind_t*)28.1k
ccv_array_get28.1k
(graph->binds, input_symbols[i].d))->sources :
01
;
445
28.1k
    if (inputs[i] && 
inputs[i]->alias_index_ref28.1k
)
446
1.02k
    {
447
1.02k
      const int alias_index_ref = inputs[i]->alias_index_ref - 1;
448
1.02k
      assert(alias_index_ref >= 0);
449
1.02k
      ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index_ref);
450
1.02k
      input_alias_sources[i] = ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d))->sources;
451
1.02k
    } else
452
27.0k
      input_alias_sources[i] = 0;
453
28.1k
  }
454
15.3k
  const int parallel_count = ccv_max(1, parallel);
455
15.3k
  assert(input_size % parallel_count == 0);
456
15.3k
  const int per_input_size = input_size / parallel_count;
457
15.3k
  assert(output_size % parallel_count == 0);
458
15.3k
  const int per_output_size = output_size / parallel_count;
459
15.3k
  int output_auto = 0;
460
30.9k
  for (i = 0; !output_auto && 
i < output_size16.2k
;
i++15.5k
)
461
15.5k
    output_auto = outputs[i] ? 
ccv_nnc_is_tensor_auto(outputs[i]->info)15.3k
:
0198
;
462
15.3k
  // One extra step, infer the parameters for outputs.
463
15.3k
  if (output_auto)
464
14.6k
  {
465
14.6k
    ccv_nnc_tensor_param_t input_params[ccv_max(1, per_input_size)];
466
14.6k
    ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)];
467
29.3k
    for (i = 0; i < parallel_count; 
i++14.6k
)
468
14.6k
    {
469
41.8k
      for (j = 0; j < per_input_size; 
j++27.1k
)
470
27.1k
        input_params[j] = inputs[j + i * per_input_size] ? 
inputs[j + i * per_input_size]->info27.1k
:
ccv_nnc_tensor_auto1
;
471
29.6k
      for (j = 0; j < per_output_size; 
j++14.9k
)
472
14.9k
        output_params[j] = outputs[j + i * per_output_size] ? 
outputs[j + i * per_output_size]->info14.7k
:
ccv_nnc_tensor_auto205
;
473
14.6k
      ccv_nnc_hint_tensor_auto(cmd, input_params, per_input_size, hint, output_params, per_output_size);
474
29.6k
      for (j = 0; j < per_output_size; 
j++14.9k
)
475
14.9k
        if (outputs[j + i * per_output_size])
476
14.7k
          outputs[j + i * per_output_size]->info = output_params[j];
477
14.6k
    }
478
14.6k
  }
479
15.3k
  int freeable_size = 0;
480
15.3k
  ccv_nnc_tensor_variable_t freeables[ccv_max(1, output_size)];
481
15.3k
  // Refresh the symbol if it is binded to an existing exec. Otherwise we cannot keep the SSA guarantee.
482
30.9k
  for (i = 0; i < output_size; 
i++15.6k
)
483
15.6k
  {
484
15.6k
    // First, go over to see whether there is enforce inplace.
485
15.6k
    int enforce_idx = -1;
486
44.6k
    for (j = 0; enforce_idx < 0 && 
j < input_size44.6k
;
j++29.0k
)
487
29.0k
      if (inputs[j] && 
ccv_nnc_cmd_enforce_inplace(cmd, j, input_size, i, output_size)29.0k
)
488
2
        enforce_idx = j;
489
15.6k
    if (enforce_idx >= 0)
490
2
      { assert(outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL); }
491
15.6k
    // We don't allow or check "allow inplace" yet. That logic will be at odds with backward logic.
492
15.6k
    if (outputs[i] && 
outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL15.4k
)
493
417
    {
494
417
      const ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[i]->symbol.d);
495
417
      if (enforce_idx >= 0)
496
2
        { assert(!bind->destinations || bind->destinations->rnum == 0); }
497
417
      if (bind->sources && 
bind->sources->rnum > 0415
)
498
415
      {
499
415
        const ccv_nnc_tensor_variable_t old_var = freeables[freeable_size++] = ccv_nnc_tensor_variable_exchange_new(graph, outputs[i]);
500
415
        // If this is enforce output, make sure the tensor view is taken by the output.
501
415
        if (enforce_idx >= 0)
502
0
        {
503
0
          outputs[i]->tensor_view = old_var->tensor_view; // Make sure the tensor view is taken over by the output.
504
0
          old_var->tensor_view = 0;
505
0
        }
506
415
      }
507
417
    }
508
15.6k
  }
509
15.3k
  ccv_nnc_tensor_t* output_tensors[ccv_max(1, per_output_size)];
510
15.3k
  if (parallel_count > 1)
511
23
  {
512
23
    const int max_device_id_size = per_input_size + per_output_size;
513
23
    assert(max_device_id_size > 0);
514
23
    int device_ids[max_device_id_size];
515
23
    ccv_nnc_stream_context_t* streams[parallel_count];
516
23
    ccv_nnc_stream_signal_t* signal;
517
23
    if (stream_context)
518
14
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
519
97
    for (i = 0; i < parallel_count; 
i++74
)
520
74
    {
521
74
      int flag = 0;
522
148
      for (j = 0; !flag && 
j < per_input_size78
;
j++74
)
523
74
        if (input_tensors[i * per_input_size + j])
524
74
          flag = (CCV_TENSOR_GET_MEMORY(input_tensors[i * per_input_size + j]->info.type) == CCV_TENSOR_GPU_MEMORY);
525
156
      for (j = 0; j < per_output_size; 
j++82
)
526
82
      {
527
82
        output_tensors[j] = outputs[j + i * per_output_size] ? ccv_nnc_tensor_from_variable(graph, outputs[j + i * per_output_size], stream_context) : 
08
;
528
82
        if (output_tensors[j] && 
!flag74
)
529
4
          flag = (CCV_TENSOR_GET_MEMORY(output_tensors[j]->info.type) == CCV_TENSOR_GPU_MEMORY);
530
82
      }
531
74
      const int stream_type = flag ? 
CCV_STREAM_CONTEXT_GPU70
:
CCV_STREAM_CONTEXT_CPU4
;
532
74
      const int tensor_type = flag ? 
CCV_TENSOR_GPU_MEMORY70
:
CCV_TENSOR_CPU_MEMORY4
;
533
74
      const int device_id_size = ccv_nnc_device_ids_for_io(input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, tensor_type, device_ids, max_device_id_size);
534
74
      ccv_nnc_synced_stream_t stream_0 = {};
535
146
      for (j = 0; j < device_id_size; 
j++72
)
536
72
      {
537
72
        int type = stream_type;
538
72
        CCV_STREAM_SET_DEVICE_ID(type, device_ids[j]);
539
72
        ccv_nnc_synced_stream_t synced_stream = _ccv_nnc_dynamic_graph_get_synced_stream(graph, type);
540
72
        if (!stream_0.stream)
541
72
          stream_0 = synced_stream;
542
72
      }
543
74
      // Wait signal to finish.
544
74
      if (stream_context)
545
44
      {
546
44
        if (stream_0.stream)
547
42
          ccv_nnc_stream_context_wait_signal(stream_0.stream, signal);
548
2
        else
549
2
          ccv_nnc_stream_context_wait(stream_context);
550
44
      }
551
74
      if (stream_0.stream)
552
72
      {
553
72
        ccv_nnc_dynamic_graph_neighbor_context_discovery_t discovery = {
554
72
          .graph = graph,
555
72
          .stream_type = stream_type
556
72
        };
557
72
        ccv_nnc_stream_context_set_neighbor_discovery(stream_0.stream, _ccv_nnc_dynamic_graph_neighbor_context_discovery, &discovery);
558
72
      }
559
74
      PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size);
560
74
      int k;
561
204
      for (k = 0; k < per_input_size; 
k++130
)
562
130
      {
563
130
        PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", 
k + 1, input_tensors[k + i * per_input_size], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(input_tensors[k + i * per_input_size]->info.type) : -1));
564
130
        if (input_tensors[k + i * per_input_size] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO))
565
130
          
ccv_nnc_print_tensor_info(input_tensors[k + i * per_input_size])0
;
566
130
        PRINT(CCV_CLI_INFO, "\n");
567
130
      }
568
74
      ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, stream_0.stream);
569
156
      for (k = 0; k < per_output_size; 
k++82
)
570
82
      {
571
82
        PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", 
k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(output_tensors[k]->info.type) : -1));
572
82
        if (output_tensors[k] && 
CCV_CLI_OUTPUT_LEVEL_IS74
(CCV_CLI_INFO))
573
82
          
ccv_nnc_print_tensor_info(output_tensors[k])0
;
574
82
        PRINT(CCV_CLI_INFO, "\n");
575
82
      }
576
74
      if (stream_context && 
stream_0.stream44
)
577
42
      {
578
42
        ccv_nnc_stream_context_emit_signal(stream_0.stream, stream_0.synced);
579
42
        ccv_nnc_stream_context_wait_signal(stream_context, stream_0.synced);
580
42
      }
581
74
      streams[i] = stream_0.stream;
582
74
    }
583
23
    if (!stream_context)
584
39
      
for (i = 0; 9
i < parallel_count;
i++30
)
585
30
        if (streams[i])
586
30
          ccv_nnc_stream_context_wait(streams[i]);
587
15.3k
  } else {
588
30.8k
    for (i = 0; i < per_output_size; 
i++15.5k
)
589
15.5k
      output_tensors[i] = outputs[i] ? ccv_nnc_tensor_from_variable(graph, outputs[i], stream_context) : 
0197
;
590
15.3k
    PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size);
591
43.3k
    for (i = 0; i < per_input_size; 
i++27.9k
)
592
27.9k
    {
593
27.9k
      PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", 
i + 1, input_tensors[i], (input_tensors[i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(input_tensors[i]->info.type) : -1));
594
27.9k
      if (input_tensors[i] && 
CCV_CLI_OUTPUT_LEVEL_IS27.9k
(CCV_CLI_INFO))
595
27.9k
        
ccv_nnc_print_tensor_info(input_tensors[i])0
;
596
27.9k
      PRINT(CCV_CLI_INFO, "\n");
597
27.9k
    }
598
15.3k
    ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors, per_input_size, output_tensors, per_output_size, stream_context);
599
30.8k
    for (i = 0; i < per_output_size; 
i++15.5k
)
600
15.5k
    {
601
15.5k
      PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", 
i + 1, output_tensors[i], (output_tensors[i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(output_tensors[i]->info.type) : -1));
602
15.5k
      if (output_tensors[i] && 
CCV_CLI_OUTPUT_LEVEL_IS15.3k
(CCV_CLI_INFO))
603
15.5k
        
ccv_nnc_print_tensor_info(output_tensors[i])0
;
604
15.5k
      PRINT(CCV_CLI_INFO, "\n");
605
15.5k
    }
606
15.3k
  }
607
15.3k
  int inputs_are_constants = 1;
608
30.6k
  for (i = 0; inputs_are_constants && 
i < input_size15.3k
;
i++15.3k
)
609
15.3k
    if (inputs[i] && inputs[i]->type != CCV_NNC_TENSOR_CONSTANT)
610
15.3k
      inputs_are_constants = 0;
611
15.3k
  if (input_size > 0 && 
!inputs_are_constants15.3k
&&
!graph->no_grad15.3k
) // No need to record the execution if there is no input or we disabled gradient computation.
612
15.3k
  {
613
15.3k
    ccv_nnc_tensor_symbol_t output_symbols[ccv_max(1, output_size)];
614
30.8k
    for (i = 0; i < output_size; 
i++15.5k
)
615
15.5k
      if (outputs[i])
616
15.3k
      {
617
15.3k
        assert(outputs[i]->type != CCV_NNC_TENSOR_CONSTANT);
618
15.3k
        output_symbols[i] = _ccv_nnc_tensor_symbol_from_variable(graph, outputs[i]);
619
15.3k
      } else
620
205
        output_symbols[i] = NO_TENSOR_SYMBOL;
621
15.3k
    int t;
622
30.6k
    for (t = 0; t < parallel_count; 
t++15.3k
)
623
15.3k
    {
624
15.3k
      ccv_nnc_graph_exec_symbol_t graph_exec = ccv_nnc_graph_exec_symbol_new(graph->tape, cmd, input_symbols + t * per_input_size, per_input_size, output_symbols + t * per_output_size, per_output_size, 0);
625
15.3k
      if (graph_execs)
626
2.40k
        graph_execs[t] = graph_exec;
627
15.3k
      // This needs to be done before we set the new sources on the outputs.
628
43.3k
      for (i = 0; i < per_input_size; 
i++28.0k
)
629
28.0k
      {
630
28.0k
        ccv_array_t* const input_source = input_sources[i + t * per_input_size];
631
28.0k
        if (input_source)
632
28.1k
          
for (j = 0; 14.0k
j < input_source->rnum;
j++14.0k
)
633
14.0k
            ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){
634
14.0k
              .d = *(int*)ccv_array_get(input_source, j),
635
14.0k
              .graph = graph->tape
636
14.0k
            }, graph_exec);
637
28.0k
        ccv_array_t* const input_alias_source = input_alias_sources[i + t * per_input_size];
638
28.0k
        if (input_alias_source)
639
2.02k
          
for (j = 0; 1.01k
j < input_alias_source->rnum;
j++1.01k
)
640
1.01k
            ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){
641
1.01k
              .d = *(int*)ccv_array_get(input_alias_source, j),
642
1.01k
              .graph = graph->tape
643
1.01k
            }, graph_exec);
644
28.0k
      }
645
43.3k
      for (i = 0; i < per_input_size; 
i++28.0k
)
646
28.0k
      {
647
28.0k
        ccv_nnc_tensor_variable_t const input = inputs[i + t * per_input_size];
648
28.0k
        if (!input || 
input->type == CCV_NNC_TENSOR_CONSTANT28.0k
)
649
231
          continue;
650
27.8k
        ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i + t * per_input_size].d);
651
27.8k
        if (!bind->destinations)
652
21.9k
          bind->destinations = ccv_array_new(sizeof(int), 1, 0);
653
27.8k
        ccv_array_add_unique_int(bind->destinations, graph_exec.d);
654
27.8k
        if (input->alias_index_ref)
655
1.01k
        {
656
1.01k
            const int alias_index = input->alias_index_ref - 1;
657
1.01k
            assert(alias_index >= 0);
658
1.01k
            ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
659
1.01k
            ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d);
660
1.01k
            if (!root_bind->destinations)
661
1.01k
              root_bind->destinations = ccv_array_new(sizeof(int), 1, 0);
662
1.01k
            ccv_array_add_unique_int(root_bind->destinations, graph_exec.d);
663
1.01k
        }
664
27.8k
      }
665
30.8k
      
for (i = 0; 15.3k
i < per_output_size;
i++15.5k
)
666
15.5k
      {
667
15.5k
        ccv_nnc_tensor_variable_t const output = outputs[i + t * per_output_size];
668
15.5k
        if (!output)
669
205
          continue;
670
15.3k
        ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, output_symbols[i + t * per_output_size].d);
671
15.3k
        assert(!bind->sources); // This is a new symbol, therefore, no binded sources associated yet.
672
15.3k
        bind->sources = ccv_array_new(sizeof(int), 1, 0);
673
15.3k
        ccv_array_add_unique_int(bind->sources, graph_exec.d);
674
15.3k
        if (output->alias_index_ref)
675
8
        {
676
8
          const int alias_index = output->alias_index_ref - 1;
677
8
          assert(alias_index >= 0);
678
8
          ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
679
8
          ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d);
680
8
          if (!root_bind->sources)
681
4
            root_bind->sources = ccv_array_new(sizeof(int), 1, 0);
682
8
          ccv_array_add_unique_int(root_bind->sources, graph_exec.d);
683
8
        }
684
15.3k
      }
685
15.3k
    }
686
15.3k
  }
687
15.3k
  // Now, able to free some of the reused outputs.
688
15.7k
  
for (i = 0; 15.3k
i < freeable_size;
i++415
)
689
415
    ccv_nnc_tensor_variable_free(graph, freeables[i]);
690
15.3k
}
691
692
int ccv_nnc_dynamic_graph_exec(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context)
693
12.9k
{
694
12.9k
  ccv_nnc_dynamic_graph_exec_ret(graph, cmd, hint, flags, inputs, input_size, outputs, output_size, parallel, stream_context, 0);
695
12.9k
  return CCV_NNC_EXEC_SUCCESS;
696
12.9k
}
697
698
static int _ccv_nnc_tensor_variable_is_only_output(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_graph_bind_t* bind, const int symbol_d)
699
17.4k
{
700
17.4k
  if (bind->alias_ref)
701
1.01k
    bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, bind->alias_ref - 1);
702
17.4k
  if (!bind->sources || bind->sources->rnum == 0)
703
0
    return 1;
704
17.4k
  int i;
705
33.9k
  for (i = 0; i < bind->sources->rnum; 
i++16.4k
)
706
17.4k
  {
707
17.4k
    const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i);
708
17.4k
    const ccv_nnc_graph_exec_symbol_t exec_symbol = {
709
17.4k
      .d = exec_symbol_d,
710
17.4k
      .graph = graph->tape
711
17.4k
    };
712
17.4k
    const int* outputs; int output_size;
713
17.4k
    ccv_nnc_graph_exec_symbol_io(graph->tape, exec_symbol, 0, 0, &outputs, &output_size);
714
17.4k
    int j;
715
33.9k
    for (j = 0; j < output_size; 
j++16.4k
)
716
17.5k
      if (outputs[j] >= 0 && 
outputs[j] != symbol_d17.5k
) // If output is me, it is the only output.
717
1.04k
      {
718
1.04k
        assert(outputs[j] < graph->binds->rnum);
719
1.04k
        const ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
720
1.04k
        // This is in use and is it not a constant symbol.
721
1.04k
        if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT23
)
722
23
          return 0;
723
1.01k
        if (other_bind->alias_ref) // If this is alias, use its original's destinations.
724
1
          other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
725
1.01k
        if (other_bind->destinations && 
other_bind->destinations->rnum > 01.01k
)
726
1.00k
          return 0;
727
1.01k
      }
728
17.4k
  }
729
17.4k
  
return 116.4k
;
730
17.4k
}
731
732
static void _ccv_nnc_update_bind_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws)
733
24.9k
{
734
24.9k
  int i;
735
24.9k
  if (bind->destinations)
736
24.7k
  {
737
24.7k
    int flag = 0;
738
49.3k
    for (i = 0; !flag && 
i < bind->destinations->rnum24.7k
;
i++24.5k
)
739
24.5k
    {
740
24.5k
      const int exec_symbol_d = *(int*)ccv_array_get(bind->destinations, i);
741
24.5k
      if (exec_symbol_d == freed_exec_symbol_d)
742
24.5k
      {
743
24.5k
        if (i < bind->destinations->rnum - 1)
744
16
          *(int*)ccv_array_get(bind->destinations, i) = *(int*)ccv_array_get(bind->destinations, bind->destinations->rnum - 1);
745
24.5k
        --bind->destinations->rnum;
746
24.5k
        flag = 1;
747
24.5k
      }
748
24.5k
    }
749
24.7k
    // This symbol can be freed.
750
24.7k
    if (flag && 
bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED24.5k
)
751
17.6k
    {
752
17.6k
      ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
753
17.6k
      if (bind->alias_ref)
754
1.01k
      {
755
1.01k
        root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1);
756
1.01k
        if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
757
1.00k
          root_bind = bind;
758
1.01k
      }
759
17.6k
      // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more.
760
17.6k
      // It is possible because exec will be freed already, thus, it is safe to remove this alias out.
761
17.6k
      if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED &&
762
17.6k
        
(17.6k
(17.6k
!root_bind->sources17.6k
||
root_bind->sources->rnum == 08.63k
) ||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)6.01k
) &&
763
17.6k
        
root_bind->destinations->rnum == 017.6k
)
764
17.6k
      {
765
17.6k
        if (root_bind->sources)
766
14.6k
          
for (i = 0; 8.63k
i < root_bind->sources->rnum;
i++6.00k
)
767
6.00k
            ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
768
17.6k
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
769
17.6k
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
770
17.6k
          .d = tensor_index,
771
17.6k
          .graph = graph->tape
772
17.6k
        });
773
17.6k
      } else 
if (8
bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED8
&& // Handle the case the bind is already freed, and it doesn't have any sources or destinations.
774
8
        bind->alias_ref && 
(2
!bind->sources2
||
bind->sources->rnum == 00
) &&
(2
!bind->destinations2
||
bind->destinations->rnum == 02
)) {
775
2
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
776
2
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
777
2
          .d = tensor_index,
778
2
          .graph = graph->tape
779
2
        });
780
2
      }
781
17.6k
    }
782
24.7k
  }
783
24.9k
}
784
785
static void _ccv_nnc_update_bind_sources_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws)
786
7.27k
{
787
7.27k
  int i;
788
7.27k
  if (bind->sources)
789
7.27k
  {
790
7.27k
    int flag = 0;
791
14.5k
    for (i = 0; !flag && 
i < bind->sources->rnum7.27k
;
i++7.27k
)
792
7.27k
    {
793
7.27k
      const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i);
794
7.27k
      if (exec_symbol_d == freed_exec_symbol_d)
795
7.27k
      {
796
7.27k
        if (i < bind->sources->rnum - 1)
797
2
          *(int*)ccv_array_get(bind->sources, i) = *(int*)ccv_array_get(bind->sources, bind->sources->rnum - 1);
798
7.27k
        --bind->sources->rnum;
799
7.27k
        flag = 1;
800
7.27k
      }
801
7.27k
    }
802
7.27k
    // This symbol can be freed.
803
7.27k
    if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED)
804
2.43k
    {
805
2.43k
      ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
806
2.43k
      if (bind->alias_ref)
807
3
      {
808
3
        root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1);
809
3
        if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
810
0
          root_bind = bind;
811
3
      }
812
2.43k
      // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more.
813
2.43k
      // It is possible because exec will be freed already, thus, it is safe to remove this alias out.
814
2.43k
      if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED &&
815
2.43k
        
(2.43k
root_bind->sources->rnum == 02.43k
||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)0
) &&
816
2.43k
        
(2.43k
!root_bind->destinations2.43k
||
root_bind->destinations->rnum == 02.43k
))
817
6
      {
818
6
        for (i = 0; i < root_bind->sources->rnum; 
i++0
)
819
0
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
820
6
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
821
6
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
822
6
          .d = tensor_index,
823
6
          .graph = graph->tape
824
6
        });
825
2.43k
      } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations.
826
2.43k
        bind->alias_ref && 
(3
!bind->sources3
||
bind->sources->rnum == 03
) &&
(3
!bind->destinations3
||
bind->destinations->rnum == 00
)) {
827
3
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
828
3
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
829
3
          .d = tensor_index,
830
3
          .graph = graph->tape
831
3
        });
832
3
      }
833
2.43k
    }
834
7.27k
  }
835
7.27k
}
836
837
static void _ccv_nnc_update_bind_sources_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, const int* const inputs, const int input_size, const int* const outputs, const int output_size, ccv_array_t* const ws)
838
15.2k
{
839
15.2k
  int i;
840
43.2k
  for (i = 0; i < input_size; 
i++27.9k
)
841
27.9k
    if (inputs[i] >= 0 && inputs[i] < binds->rnum)
842
27.9k
    {
843
27.9k
      ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, inputs[i]);
844
27.9k
      if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
845
4.00k
        continue;
846
23.9k
      if (bind->alias_ref)
847
1.01k
      {
848
1.01k
        const int alias_to = bind->alias_ref - 1;
849
1.01k
        ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to);
850
1.01k
        if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE)
851
1.01k
          _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws);
852
1.01k
      }
853
23.9k
      _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, bind, inputs[i], ws);
854
23.9k
    }
855
15.2k
  // Note that this works because there is no overlap of inputs / outputs. (What about alias?).
856
30.7k
  for (i = 0; i < output_size; 
i++15.4k
)
857
15.4k
    if (outputs[i] >= 0 && 
outputs[i] < binds->rnum15.2k
)
858
15.2k
    {
859
15.2k
      ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, outputs[i]);
860
15.2k
      if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
861
8.01k
        continue;
862
7.26k
      if (bind->alias_ref)
863
5
      {
864
5
        const int alias_to = bind->alias_ref - 1;
865
5
        ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to);
866
5
        if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE)
867
5
          _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws);
868
5
      }
869
7.26k
      _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, bind, outputs[i], ws);
870
7.26k
    }
871
15.2k
}
872
873
static void _ccv_nnc_stateful_exec_free_if_possible(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol)
874
15.2k
{
875
15.2k
  if (!graph->stateful_execs)
876
6.05k
    return;
877
9.22k
  assert(symbol.d >= 0);
878
9.22k
  ccv_array_t* const stateful_execs = graph->stateful_execs;
879
9.22k
  ccv_nnc_cmd_t cmd = ccv_nnc_graph_exec_symbol_cmd(graph->tape, symbol);
880
9.22k
  ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data;
881
9.22k
  if (!stateful_exec)
882
6.82k
    return;
883
2.39k
  // If there is no backward, no need to apply gradients.
884
2.39k
  // Otherwise, if we applied gradients, we can free it as well.
885
2.39k
  // We don't free this stateful exec because apply gradients doesn't require any variables alive.
886
2.39k
  if (!stateful_exec->did_backward_but_not_apply_gradients)
887
298
  {
888
298
    const int index = stateful_exec->index;
889
298
    ccfree(stateful_exec);
890
298
    if (index < graph->reuse_stateful_exec || graph->reuse_stateful_exec < 0)
891
298
      graph->reuse_stateful_exec = index;
892
298
    *(ccv_nnc_stateful_exec_t**)ccv_array_get(stateful_execs, index) = 0;
893
298
  } else
894
2.10k
    stateful_exec->should_free = 1;
895
2.39k
}
896
897
void ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
898
32.0k
{
899
32.0k
  // If it contains a symbol, this tensor variable is not a free variable. It is either used as input or output.
900
32.0k
  if (tensor_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
901
27.4k
  {
902
27.4k
    // If it is not a free variable, when can we free the symbol and the underlying variable?
903
27.4k
    // 1. There should be no sources (the command generate this tensor should be freed) or the output of these sources is only the current one;
904
27.4k
    // 2. The destinations (the commands that uses this tensor) should have no other inputs, or the other inputs has no binded sources as well.
905
27.4k
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d);
906
27.4k
    // There should be no source associated with it no more.
907
27.4k
    int free_symbol = 0;
908
27.4k
    // I am free if no exec symbol is producing me or the symbol producing me can only producing me (thus, it is not required to
909
27.4k
    // compute gradient because I am the only variable it can compute gradient for).
910
27.4k
    ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
911
27.4k
    if (bind->alias_ref)
912
1.03k
    {
913
1.03k
      const int alias_to = bind->alias_ref - 1;
914
1.03k
      root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, alias_to);
915
1.03k
    }
916
27.4k
    const int sources_and_is_only_output = (root_bind->sources && 
root_bind->sources->rnum > 016.2k
) &&
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d)11.4k
;
917
27.4k
    if (!root_bind->sources || 
root_bind->sources->rnum == 016.2k
||
sources_and_is_only_output11.4k
)
918
26.3k
    {
919
26.3k
      int i, j;
920
26.3k
      free_symbol = 1; // Assume we can free this symbol.
921
26.3k
      if (!graph->ws)
922
12
        graph->ws = ccv_array_new(sizeof(int), root_bind->destinations ? 
root_bind->destinations->rnum11
:
01
, 0);
923
26.3k
      ccv_array_t* const ws = graph->ws;
924
26.3k
      ccv_array_clear(ws);
925
26.3k
      if (root_bind->destinations)
926
43.4k
        
for (i = 0; 21.9k
i < root_bind->destinations->rnum;
i++21.5k
)
927
21.5k
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->destinations, i));
928
26.3k
      const int ws_init_size = ws->rnum;
929
26.3k
      // Add all sources from root_bind, in case it has been freed (during update bind sources / destinations when free.
930
26.3k
      if (root_bind->sources)
931
25.7k
        
for (i = 0; 15.2k
i < root_bind->sources->rnum;
i++10.4k
)
932
10.4k
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
933
26.3k
      // If we cannot loop over any exec symbols (this is not in use). It is simple to determine whether we want
934
26.3k
      // to free it or not: if this is an alias and the origin is not freed, we cannot free this symbol.
935
26.3k
      if (ws_init_size == 0)
936
4.88k
        free_symbol = (!bind->alias_ref || 
root_bind->index < 017
);
937
26.3k
      // Go through all the exec symbols use this tensor, to see whether they have inputs that has other sources.
938
47.9k
      for (i = 0; i < ws_init_size; 
i++21.5k
)
939
21.5k
      {
940
21.5k
        const int exec_symbol_d = *(int*)ccv_array_get(ws, i);
941
21.5k
        const ccv_nnc_graph_exec_symbol_t symbol = {
942
21.5k
          .d = exec_symbol_d,
943
21.5k
          .graph = graph->tape
944
21.5k
        };
945
21.5k
        const int* inputs; int input_size;
946
21.5k
        const int* outputs; int output_size;
947
21.5k
        ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size);
948
21.5k
        int flag = 0; // flag denotes whether there are cases to keep this exec symbol.
949
21.5k
        if (sources_and_is_only_output)
950
8.44k
        {
951
8.44k
          // If there are sources, check whether we have outputs or not. If we do, we cannot free this.
952
16.8k
          for (j = 0; !flag && 
j < output_size8.45k
;
j++8.45k
)
953
8.45k
            if (outputs[j] >= 0 && 
outputs[j] < graph->binds->rnum8.45k
)
954
8.45k
            {
955
8.45k
              ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
956
8.45k
              if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT4.43k
)
957
4.43k
                flag = 1;
958
4.02k
              else {
959
4.02k
                if (other_bind->alias_ref) // If this is alias, use its original's destinations.
960
0
                  other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
961
4.02k
                flag = (other_bind->destinations && other_bind->destinations->rnum > 0);
962
4.02k
              }
963
8.45k
            }
964
13.0k
        } else {
965
13.0k
          // If there is no sources, check if other sources can depend on this exec, if they do, we cannot free this.
966
36.8k
          for (j = 0; !flag && 
j < input_size28.5k
;
j++23.7k
)
967
23.7k
            if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum && inputs[j] != tensor_variable->symbol.d)
968
14.4k
            {
969
14.4k
              ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]);
970
14.4k
              if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT6.23k
)
971
6.21k
                flag = 1;
972
8.25k
              else {
973
8.25k
                if (other_bind->alias_ref) // If this is alias, use its original's destinations.
974
4
                  other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
975
8.25k
                flag = (other_bind->sources && 
other_bind->sources->rnum > 02.22k
);
976
8.25k
              }
977
14.4k
            }
978
13.0k
        }
979
21.5k
        // This exec can be freed if there is no input required or there is no output required.
980
21.5k
        free_symbol = (free_symbol && !flag);
981
21.5k
        if (!flag)
982
4.85k
        {
983
4.85k
          // Go over inputs and remove all references from binded destinations.
984
4.85k
          // and go over outputs remove all references from binded sources.
985
4.85k
          _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws);
986
4.85k
          const int* outgoings; int outgoing_size;
987
4.85k
          ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size);
988
7.29k
          for (j = 0; j < outgoing_size; 
j++2.43k
)
989
2.43k
            ccv_array_add_unique_int(ws, outgoings[j]);
990
4.85k
          _ccv_nnc_stateful_exec_free_if_possible(graph, symbol);
991
4.85k
          ccv_nnc_graph_exec_symbol_free(graph->tape, symbol);
992
4.85k
        }
993
21.5k
      }
994
26.3k
      if (free_symbol)
995
9.71k
      {
996
9.71k
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
997
9.71k
        ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
998
9.71k
        // Now, go over the outgoings, if it is removed, add more to it. Note that the ws array can grow while iterating over.
999
22.5k
        for (i = ws_init_size; i < ws->rnum; 
i++12.8k
)
1000
12.8k
        {
1001
12.8k
          const int exec_symbol_d = *(int*)ccv_array_get(ws, i);
1002
12.8k
          const ccv_nnc_graph_exec_symbol_t symbol = {
1003
12.8k
            .d = exec_symbol_d,
1004
12.8k
            .graph = graph->tape
1005
12.8k
          };
1006
12.8k
          const int* inputs; int input_size;
1007
12.8k
          const int* outputs; int output_size;
1008
12.8k
          ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size);
1009
12.8k
          int flag = 0;
1010
29.9k
          for (j = 0; !flag && 
j < input_size19.5k
;
j++17.0k
)
1011
17.0k
            if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum)
1012
17.0k
            {
1013
17.0k
              ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]);
1014
17.0k
              if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT4.63k
)
1015
4.43k
                flag = 1;
1016
12.6k
              else {
1017
12.6k
                if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1018
1.02k
                  other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1019
12.6k
                flag = (other_bind->sources && 
other_bind->sources->rnum > 010.4k
);
1020
12.6k
              }
1021
17.0k
            }
1022
12.8k
          if (flag) // If any inputs make free this destination impossible. Check whether all its outputs are done.
1023
10.4k
          {
1024
10.4k
            int output_flag = 0;
1025
21.0k
            for (j = 0; !output_flag && 
j < output_size18.6k
;
j++10.6k
)
1026
10.6k
              if (outputs[j] >= 0 && 
outputs[j] < graph->binds->rnum10.4k
)
1027
10.4k
              {
1028
10.4k
                ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
1029
10.4k
                if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT2.40k
)
1030
2.40k
                  output_flag = 1;
1031
8.03k
                else {
1032
8.03k
                  if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1033
0
                    other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1034
8.03k
                  output_flag = (other_bind->destinations && 
other_bind->destinations->rnum > 020
);
1035
8.03k
                }
1036
10.4k
              }
1037
10.4k
            if (!output_flag) // If no output is used (used means it has a tensor variable, or it has a destination).
1038
8.01k
              flag = 0;
1039
10.4k
          }
1040
12.8k
          // Went over all the inputs, it turns out no more inputs has other references, safe to remove.
1041
12.8k
          if (!flag)
1042
10.4k
          {
1043
10.4k
            _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws);
1044
10.4k
            const int* outgoings; int outgoing_size;
1045
10.4k
            ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size);
1046
10.4k
            // It it has outgoings, add that for further inspection.
1047
12.8k
            for (j = 0; j < outgoing_size; 
j++2.40k
)
1048
2.40k
              ccv_array_add_unique_int(ws, outgoings[j]);
1049
10.4k
            _ccv_nnc_stateful_exec_free_if_possible(graph, symbol);
1050
10.4k
            ccv_nnc_graph_exec_symbol_free(graph->tape, symbol);
1051
10.4k
          }
1052
12.8k
        }
1053
9.71k
      }
1054
26.3k
    }
1055
27.4k
    // If this symbol is not freed, move the tensor view to the bind.
1056
27.4k
    if (!free_symbol)
1057
17.7k
    {
1058
17.7k
      // If current bind is an alias, and it doesn't have any sources or destinations. We cannot find this alias
1059
17.7k
      // through any exec. This is not only safe to delete, but has to be deleted. We don't need to handle this
1060
17.7k
      // if free_symbol is true, because when that happens, root_bind will be deleted, and we will clean up the
1061
17.7k
      // alias in that process.
1062
17.7k
      if (bind->alias_ref && 
(1.03k
!bind->sources1.03k
||
bind->sources->rnum == 05
) &&
(1.02k
!bind->destinations1.02k
||
bind->destinations->rnum == 01.01k
))
1063
18
      {
1064
18
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
1065
18
        ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
1066
17.6k
      } else {
1067
17.6k
        bind->index = CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED; // This tensor variable will be freed, but this symbol extra will continue exists.
1068
17.6k
        bind->destructor_hook.func = tensor_variable->destructor_hook.func; // Transfer the destructor callback.
1069
17.6k
        bind->destructor_hook.context = tensor_variable->destructor_hook.context; // Transfer the destructor callback context.
1070
17.6k
        bind->tensor_view = tensor_variable->tensor_view; // Transfer the ownership to the bind.
1071
17.6k
        tensor_variable->tensor_view = 0;
1072
17.6k
      }
1073
17.7k
    }
1074
27.4k
  }
1075
32.0k
  _ccv_nnc_tensor_variable_free(graph, tensor_variable, 1);
1076
32.0k
}
1077
1078
void ccv_nnc_dynamic_graph_has_effect_to_tensor_variables(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t* const source_variables, const int source_variable_size, const ccv_nnc_tensor_variable_t* const destination_variables, const int destination_variable_size, uint64_t* const bitmask)
1079
3
{
1080
3
  int i, j;
1081
3
  ccv_array_t* const sources_destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), source_variable_size + destination_variable_size, 0);
1082
13
  for (i = 0; i < source_variable_size; 
i++10
)
1083
10
  {
1084
10
    if (source_variables[i]->symbol.d < 0)
1085
0
      continue;
1086
10
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d);
1087
10
    if (bind->destinations && 
bind->destinations->rnum > 08
)
1088
21
      
for (j = 0; 8
j < bind->destinations->rnum;
j++13
)
1089
13
      {
1090
13
        // It is ok to have duplicate symbols.
1091
13
        const int d = *(int*)ccv_array_get(bind->destinations, j);
1092
13
        ccv_nnc_graph_exec_symbol_t symbol = {
1093
13
          .d = d,
1094
13
          .graph = graph->tape
1095
13
        };
1096
13
        ccv_array_push(sources_destinations, &symbol);
1097
13
      }
1098
10
  }
1099
3
  const int source_size = sources_destinations->rnum;
1100
6
  for (i = 0; i < destination_variable_size; 
i++3
)
1101
3
  {
1102
3
    if (destination_variables[i]->symbol.d < 0)
1103
0
      continue;
1104
3
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, destination_variables[i]->symbol.d);
1105
3
    if (bind->sources && bind->sources->rnum > 0)
1106
6
      
for (j = 0; 3
j < bind->sources->rnum;
j++3
)
1107
3
      {
1108
3
        // It is ok to have duplicate symbols.
1109
3
        const int d = *(int*)ccv_array_get(bind->sources, j);
1110
3
        ccv_nnc_graph_exec_symbol_t symbol = {
1111
3
          .d = d,
1112
3
          .graph = graph->tape
1113
3
        };
1114
3
        ccv_array_push(sources_destinations, &symbol);
1115
3
      }
1116
3
  }
1117
3
  const int destination_size = sources_destinations->rnum - source_size;
1118
3
  if (source_size == 0 || destination_size == 0)
1119
0
  {
1120
0
    ccv_array_free(sources_destinations);
1121
0
    return;
1122
0
  }
1123
3
  const int bitmask_size = ((source_size + 63) >> 6);
1124
3
  assert(bitmask_size < 256);
1125
3
  uint64_t exec_bitmask[bitmask_size];
1126
3
  ccv_nnc_symbolic_graph_sources_to_destinations(graph->tape, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, 0), source_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, source_size), destination_size, exec_bitmask);
1127
3
  int k = 0;
1128
13
  for (i = 0; i < source_variable_size; 
i++10
)
1129
10
  {
1130
10
    if (source_variables[i]->symbol.d < 0)
1131
0
    {
1132
0
      bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63));
1133
0
      continue;
1134
0
    }
1135
10
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d);
1136
10
    int flag = 0;
1137
10
    if (bind->destinations && 
bind->destinations->rnum > 08
)
1138
8
    {
1139
8
      assert(k <= source_size - bind->destinations->rnum);
1140
18
      
for (j = 0; 8
!flag &&
j < bind->destinations->rnum11
;
j++10
)
1141
10
        flag = (((uint64_t)1 << ((k + j) & 63)) & exec_bitmask[(k + j) >> 6]);
1142
8
      k += bind->destinations->rnum;
1143
8
    }
1144
10
    if (flag)
1145
7
      bitmask[i >> 6] |= ((uint64_t)1 << (i & 63));
1146
3
    else
1147
3
      bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63));
1148
10
  }
1149
3
  ccv_array_free(sources_destinations);
1150
3
}
1151
1152
int ccv_nnc_dynamic_graph_bookkeeping_count(const ccv_nnc_dynamic_graph_t* const graph, const int type)
1153
407
{
1154
407
  return ccv_nnc_symbolic_graph_active_symbol_count(graph->tape, type);
1155
407
}
1156
1157
void ccv_nnc_dynamic_graph_dot(const ccv_nnc_dynamic_graph_t* const graph, const int flags, FILE* out)
1158
415
{
1159
415
  ccv_nnc_symbolic_graph_dot(graph->tape, flags, out);
1160
415
}