Coverage Report

Created: 2022-07-27 23:53

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc_dynamic_graph.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_nnc_easy.h"
5
#include "ccv_internal.h"
6
#include "_ccv_nnc_dynamic_graph.h"
7
8
// MARK - Level-4 API
9
10
ccv_nnc_dynamic_graph_t* ccv_nnc_dynamic_graph_new(void)
11
50
{
12
50
  ccv_nnc_dynamic_graph_t* graph = ccmalloc(sizeof(ccv_nnc_dynamic_graph_t));
13
50
  graph->no_grad = 0;
14
50
  graph->reuse_var = -1;
15
50
  graph->vars = ccv_array_new(sizeof(ccv_nnc_tensor_variable_t), 1, 0);
16
50
  graph->binds = ccv_array_new(sizeof(ccv_nnc_tensor_variable_graph_bind_t), 1, 0);
17
50
  graph->tape = ccv_nnc_symbolic_graph_new();
18
50
  graph->xpu_alloc.mp_hdr = -1;
19
50
  graph->xpu_alloc.freed = kh_init(dy_str);
20
50
  graph->xpu_alloc.allocd = kh_init(dy_alloc);
21
  // These may not be used as frequent, init as needed.
22
50
  graph->stateful_execs = 0;
23
50
  graph->reuse_stateful_exec = -1;
24
50
  graph->stream_map = 0;
25
50
  graph->ws = 0;
26
50
  return graph;
27
50
}
28
29
static void _ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int zeroing)
30
32.3k
{
31
32.3k
  const int index = tensor_variable->index;
32
32.3k
  if (tensor_variable->tensor_view)
33
14.6k
  {
34
14.6k
    if (tensor_variable->destructor_hook.func)
35
4
      tensor_variable->destructor_hook.func(graph, (ccv_nnc_tensor_t*)tensor_variable->tensor_view, tensor_variable->destructor_hook.context);
36
14.6k
    if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tensor_variable->tensor_view))
37
14.2k
    {
38
14.2k
      if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))
39
7
        ccv_nnc_tensor_view_free(tensor_variable->tensor_view);
40
14.2k
      else {
41
14.2k
        if (!tensor_variable->alias_index_ref && // Return this memory to the graph.
42
14.2k
          
CCV_TENSOR_GET_MEMORY14.1k
(tensor_variable->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY14.1k
)
43
419
          ccv_nnc_xpu_free(&graph->xpu_alloc, tensor_variable->tensor_view->data.ptr);
44
14.2k
        ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view);
45
14.2k
      }
46
14.2k
    }
47
14.6k
  }
48
32.3k
  ccfree(tensor_variable);
49
32.3k
  if (zeroing)
50
32.1k
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, index) = 0;
51
32.3k
  int i;
52
64.4k
  for (i = graph->vars->rnum - 1; i >= 0; 
i--32.1k
)
53
64.2k
    if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) != 0)
54
32.1k
    {
55
32.1k
      graph->vars->rnum = i + 1;
56
32.1k
      break;
57
32.1k
    }
58
32.3k
  if (index < graph->vars->rnum &&
59
32.3k
    
(23.3k
index < graph->reuse_var23.3k
||
graph->reuse_var < 017.2k
))
60
11.9k
    graph->reuse_var = index;
61
20.4k
  else if (graph->reuse_var >= graph->vars->rnum)
62
5.62k
    graph->reuse_var = -1;
63
32.3k
}
64
65
static void _ccv_nnc_tensor_variable_graph_bind_free(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int zeroing)
66
27.7k
{
67
27.7k
  bind->index = CCV_NNC_TENSOR_NO_VARIABLE;
68
27.7k
  if (bind->sources)
69
15.3k
    ccv_array_free(bind->sources);
70
27.7k
  if (bind->destinations)
71
23.0k
    ccv_array_free(bind->destinations);
72
27.7k
  if (bind->tensor_view)
73
17.6k
  {
74
17.6k
    if (bind->destructor_hook.func)
75
3
      bind->destructor_hook.func(graph, (ccv_nnc_tensor_t*)bind->tensor_view, bind->destructor_hook.context);
76
17.6k
    if (!CCV_NNC_IS_EXTERN_TENSOR_VIEW(bind->tensor_view))
77
17.4k
    {
78
17.4k
      if (CCV_IS_TENSOR_VIEW(bind->tensor_view))
79
1
        ccv_nnc_tensor_view_free(bind->tensor_view);
80
17.4k
      else {
81
17.4k
        if (!bind->alias_ref && // Return this memory to the graph.
82
17.4k
          
CCV_TENSOR_GET_MEMORY16.4k
(bind->tensor_view->info.type) == CCV_TENSOR_GPU_MEMORY16.4k
)
83
38
          ccv_nnc_xpu_free(&graph->xpu_alloc, bind->tensor_view->data.ptr);
84
17.4k
        ccv_nnc_tensor_free((ccv_nnc_tensor_t*)bind->tensor_view);
85
17.4k
      }
86
17.4k
    }
87
17.6k
  }
88
27.7k
  if (zeroing)
89
27.4k
  {
90
27.4k
    bind->sources = 0;
91
27.4k
    bind->destinations = 0;
92
27.4k
    bind->tensor_view = 0;
93
27.4k
    bind->destructor_hook.func = 0;
94
27.4k
    bind->destructor_hook.context = 0;
95
27.4k
  }
96
27.7k
}
97
98
void ccv_nnc_dynamic_graph_free(ccv_nnc_dynamic_graph_t* const graph)
99
50
{
100
50
  int i;
101
321
  for (i = 0; i < graph->vars->rnum; 
i++271
)
102
271
  {
103
271
    ccv_nnc_tensor_variable_t tensor_variable = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i);
104
271
    if (tensor_variable)
105
218
      _ccv_nnc_tensor_variable_free(graph, tensor_variable, 0);
106
271
  }
107
50
  ccv_array_free(graph->vars);
108
388
  for (i = 0; i < graph->binds->rnum; 
i++338
)
109
338
    _ccv_nnc_tensor_variable_graph_bind_free(graph, (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i), 0);
110
50
  ccv_array_free(graph->binds);
111
50
  ccv_nnc_symbolic_graph_free(graph->tape);
112
50
  if (graph->ws)
113
34
    ccv_array_free(graph->ws);
114
50
  if (graph->stateful_execs)
115
9
  {
116
27
    for (i = 0; i < graph->stateful_execs->rnum; 
i++18
)
117
18
    {
118
18
      ccv_nnc_stateful_exec_t* const stateful_exec = *(ccv_nnc_stateful_exec_t**)ccv_array_get(graph->stateful_execs, i);
119
18
      if (stateful_exec)
120
5
        ccfree(stateful_exec);
121
18
    }
122
9
    ccv_array_free(graph->stateful_execs);
123
9
  }
124
50
  if (graph->stream_map)
125
10
  {
126
10
    khiter_t k;
127
58
    for (k = 
kh_begin10
(graph->stream_map); k != kh_end(graph->stream_map);
++k48
)
128
48
    {
129
48
      if (!kh_exist(graph->stream_map, k))
130
25
        continue;
131
23
      ccv_nnc_stream_context_t* const stream = kh_val(graph->stream_map, k);
132
23
      ccv_nnc_stream_context_free(stream);
133
23
    }
134
10
    kh_destroy(stream_map, graph->stream_map);
135
10
  }
136
50
  ccv_nnc_xpu_alloc_destroy(&graph->xpu_alloc);
137
50
  ccfree(graph);
138
50
}
139
140
void ccv_nnc_tensor_variable_set(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_t* const tensor)
141
655
{
142
655
  assert(!tensor_variable->alias_index_ref);
143
655
  if (tensor_variable->tensor_view && 
!2
CCV_NNC_IS_EXTERN_TENSOR_VIEW2
(tensor_variable->tensor_view))
144
0
  {
145
0
    assert(!CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view));
146
0
    ccv_nnc_tensor_free((ccv_nnc_tensor_t*)tensor_variable->tensor_view);
147
0
  }
148
655
  tensor_variable->info = tensor->info;
149
655
  tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)((uintptr_t)tensor | 1);
150
655
}
151
152
void ccv_nnc_tensor_variable_destructor_hook(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_destructor_f func, void* const context)
153
7
{
154
7
  tensor_variable->destructor_hook.func = func;
155
7
  tensor_variable->destructor_hook.context = context;
156
7
}
157
158
inline static void _ccv_nnc_tensor_variable_init(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_param_t info)
159
31.3k
{
160
31.3k
  tensor_variable->alias_index_ref = 0;
161
31.3k
  tensor_variable->alias_off = 0;
162
31.3k
  tensor_variable->destructor_hook.func = 0;
163
31.3k
  tensor_variable->destructor_hook.context = 0;
164
31.3k
  tensor_variable->info = info;
165
31.3k
  tensor_variable->symbol = NO_TENSOR_SYMBOL;
166
31.3k
  tensor_variable->tensor_view = 0;
167
31.3k
  if (graph->reuse_var >= 0)
168
802
  {
169
802
    const int reuse_var = graph->reuse_var;
170
802
    assert(reuse_var < graph->vars->rnum);
171
802
    tensor_variable->index = reuse_var;
172
802
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = tensor_variable;
173
802
    int i;
174
802
    graph->reuse_var = -1;
175
1.43k
    for (i = reuse_var + 1; i < graph->vars->rnum && 
graph->reuse_var < 01.02k
;
i++631
)
176
631
      if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0)
177
591
        graph->reuse_var = i;
178
30.5k
  } else {
179
30.5k
    tensor_variable->index = graph->vars->rnum;
180
30.5k
    ccv_array_push(graph->vars, &tensor_variable);
181
30.5k
  }
182
31.3k
}
183
184
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info)
185
31.2k
{
186
31.2k
  ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
187
31.2k
  tensor_variable->type = CCV_NNC_TENSOR_VARIABLE;
188
31.2k
  _ccv_nnc_tensor_variable_init(graph, tensor_variable, info);
189
31.2k
  return tensor_variable;
190
31.2k
}
191
192
ccv_nnc_tensor_variable_t ccv_nnc_tensor_constant_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info)
193
37
{
194
37
  ccv_nnc_tensor_variable_t tensor_variable = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
195
37
  tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
196
37
  _ccv_nnc_tensor_variable_init(graph, tensor_variable, info);
197
37
  return tensor_variable;
198
37
}
199
200
int ccv_nnc_tensor_variable_is_constant(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
201
0
{
202
0
  return tensor_variable->type == CCV_NNC_TENSOR_CONSTANT;
203
0
}
204
205
ccv_nnc_tensor_param_t ccv_nnc_tensor_variable_params(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
206
0
{
207
0
  return tensor_variable->info;
208
0
}
209
210
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_alias_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info)
211
1.04k
{
212
1.04k
  ccv_nnc_tensor_variable_t variable_alias = ccmalloc(sizeof(struct ccv_nnc_tensor_variable_s));
213
1.04k
  variable_alias->type = tensor_variable->type;
214
  // If the tensor variable is an alias itself, we point directly to its original.
215
1.04k
  if (tensor_variable->alias_index_ref)
216
1
  {
217
1
    variable_alias->alias_index_ref = tensor_variable->alias_index_ref;
218
    // The tensor variable need to be fully specified if I am doing alias an alias.
219
1
    assert(!ccv_nnc_is_tensor_auto(tensor_variable->info));
220
1
    int i;
221
1
    int no_inc = 1;
222
2
    for (i = 0; no_inc && 
i < 1
CCV_NNC_MAX_DIM_ALLOC1
;
i++1
)
223
1
      no_inc = (tensor_variable->inc[i] == 0);
224
    // It has to satisfy the condition that the tensor variable itself is contiguous.
225
1
    assert(ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, no_inc ? tensor_variable->info.dim : tensor_variable->inc, tensor_variable->ofs));
226
    // Need to compute alias off, that is the alias off of the tensor variable plus its ofs.
227
1
    const off_t off = ccv_nnc_tensor_view_offset(tensor_variable->info.datatype, no_inc ? 
tensor_variable->info.dim0
: tensor_variable->inc, tensor_variable->ofs);
228
1
    variable_alias->alias_off = tensor_variable->alias_off + off;
229
1.04k
  } else {
230
1.04k
    variable_alias->alias_index_ref = tensor_variable->index + 1;
231
1.04k
    variable_alias->alias_off = 0;
232
1.04k
  }
233
1.04k
  variable_alias->info = info;
234
1.04k
  variable_alias->symbol = NO_TENSOR_SYMBOL;
235
1.04k
  variable_alias->destructor_hook.func = 0;
236
1.04k
  variable_alias->destructor_hook.context = 0;
237
1.04k
  variable_alias->tensor_view = 0;
238
1.04k
  memcpy(variable_alias->ofs, ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
239
1.04k
  memcpy(variable_alias->inc, inc, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
240
1.04k
  if (graph->reuse_var >= 0)
241
0
  {
242
0
    const int reuse_var = graph->reuse_var;
243
0
    assert(reuse_var < graph->vars->rnum);
244
0
    variable_alias->index = reuse_var;
245
0
    *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, reuse_var) = variable_alias;
246
0
    int i;
247
0
    graph->reuse_var = -1;
248
0
    for (i = reuse_var + 1; i < graph->vars->rnum && graph->reuse_var < 0; i++)
249
0
      if (*(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, i) == 0)
250
0
        graph->reuse_var = i;
251
1.04k
  } else {
252
1.04k
    variable_alias->index = graph->vars->rnum;
253
1.04k
    ccv_array_push(graph->vars, &variable_alias);
254
1.04k
  }
255
1.04k
  return variable_alias;
256
1.04k
}
257
258
ccv_nnc_tensor_t* ccv_nnc_tensor_from_variable_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_stream_context_t* const stream_context)
259
69.1k
{
260
69.1k
  if (tensor_variable->tensor_view)
261
37.4k
  {
262
37.4k
    if (tensor_variable->alias_index_ref)
263
1.03k
    {
264
1.03k
      const int alias_index = tensor_variable->alias_index_ref - 1;
265
1.03k
      assert(alias_index >= 0);
266
1.03k
      ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
267
1.03k
      if (CCV_IS_TENSOR_VIEW(tensor_variable->tensor_view))
268
12
      {
269
12
        ccv_nnc_tensor_view_t* const tv = tensor_variable->tensor_view;
270
        // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid.
271
12
        assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv));
272
        // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed.
273
12
        tv->data.u8 = CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8 + tv->off + tensor_variable->alias_off;
274
1.02k
      } else {
275
1.02k
        ccv_nnc_tensor_t* const tv = (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
276
        // We cannot have an alias with custom set tensor, otherwise the pointer update is invalid.
277
1.02k
        assert(!CCV_NNC_IS_EXTERN_TENSOR_VIEW(tv));
278
        // Update the tensor_view pointer every time access it, because the underlying variable it alias to have changed.
279
1.02k
        tv->data.u8 = CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8 + tensor_variable->alias_off;
280
1.02k
      }
281
1.03k
    }
282
37.4k
    return (ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(tensor_variable->tensor_view);
283
37.4k
  }
284
31.6k
  if (!tensor_variable->alias_index_ref)
285
30.6k
  {
286
    // If we haven't allocated tensor_variable, we cannot allocate them now (because no shape specified), return 0.
287
30.6k
    if (ccv_nnc_is_tensor_auto(tensor_variable->info))
288
0
      return 0;
289
30.6k
    void* ptr = 0;
290
30.6k
    if (CCV_TENSOR_GET_MEMORY(tensor_variable->info.type) == CCV_TENSOR_GPU_MEMORY)
291
457
      ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(tensor_variable->info.type), stream_context, ccv_nnc_tensor_data_size(tensor_variable->info));
292
30.6k
    tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, tensor_variable->info, 0);
293
30.6k
    assert(tensor_variable->tensor_view->data.u8);
294
30.6k
    return (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
295
30.6k
  }
296
1.04k
  const int alias_index = tensor_variable->alias_index_ref - 1;
297
1.04k
  assert(alias_index >= 0);
298
1.04k
  ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
299
1.04k
  assert(!variable_to->alias_index_ref);
300
1.04k
  if (!variable_to->tensor_view)
301
3
  {
302
    // If we haven't allocated variable_to, we cannot allocate them now (because no shape specified), return 0.
303
3
    if (ccv_nnc_is_tensor_auto(variable_to->info))
304
0
      return 0;
305
3
    void* ptr = 0;
306
3
    assert(variable_to->info.type == tensor_variable->info.type);
307
3
    if (CCV_TENSOR_GET_MEMORY(variable_to->info.type) == CCV_TENSOR_GPU_MEMORY)
308
0
      ptr = ccv_nnc_xpu_alloc(&graph->xpu_alloc, CCV_TENSOR_GET_DEVICE_ID(variable_to->info.type), stream_context, ccv_nnc_tensor_data_size(variable_to->info));
309
3
    variable_to->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(ptr, variable_to->info, 0);
310
3
    assert(variable_to->tensor_view->data.u8);
311
3
  }
312
1.04k
  int i;
313
1.04k
  int no_ofs = 1;
314
13.5k
  for (i = 0; no_ofs && 
i < 13.5k
CCV_NNC_MAX_DIM_ALLOC13.5k
;
i++12.4k
)
315
12.4k
    no_ofs = (tensor_variable->ofs[i] == 0);
316
1.04k
  int no_inc = 1;
317
2.46k
  for (i = 0; no_inc && 
i < 1.45k
CCV_NNC_MAX_DIM_ALLOC1.45k
;
i++1.42k
)
318
1.42k
    no_inc = (tensor_variable->inc[i] == 0);
319
1.04k
  if (!no_inc)
320
1.01k
    no_inc = (memcmp(tensor_variable->inc, tensor_variable->info.dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC) == 0);
321
1.04k
  assert(CCV_GET_DATA_TYPE_SIZE(tensor_variable->info.datatype) * ccv_nnc_tensor_count(tensor_variable->info) + tensor_variable->alias_off <= CCV_GET_DATA_TYPE_SIZE(variable_to->info.datatype) * ccv_nnc_tensor_count(variable_to->info));
322
  // Allowing vector type to be normal tensor, rather than a tensor view. We cannot have any offset though.
323
1.04k
  if (no_ofs && 
!no_inc1.03k
)
324
5
    no_inc = ccv_nnc_tensor_view_is_contiguous(tensor_variable->info.dim, tensor_variable->inc, tensor_variable->ofs);
325
1.04k
  if (no_ofs && 
no_inc1.03k
)
326
1.03k
    tensor_variable->tensor_view = (ccv_nnc_tensor_view_t*)ccv_nnc_tensor_new(CCV_NNC_TENSOR_VIEW(variable_to->tensor_view)->data.u8, tensor_variable->info, 0);
327
8
  else
328
8
    tensor_variable->tensor_view = ccv_nnc_tensor_view_new((ccv_nnc_tensor_t*)CCV_NNC_TENSOR_VIEW(variable_to->tensor_view), tensor_variable->info, tensor_variable->ofs, no_inc ? 
tensor_variable->info.dim1
:
tensor_variable->inc7
);
329
1.04k
  if  (tensor_variable->alias_off)
330
1
    tensor_variable->tensor_view->data.u8 += tensor_variable->alias_off;
331
1.04k
  return (ccv_nnc_tensor_t*)tensor_variable->tensor_view;
332
1.04k
}
333
334
static void _ccv_nnc_tensor_symbol_extra_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const ccv_nnc_tensor_symbol_t symbol)
335
27.6k
{
336
27.6k
  if (symbol.d >= graph->binds->rnum)
337
338
  {
338
338
    const int rnum = graph->binds->rnum;
339
338
    ccv_array_resize(graph->binds, symbol.d + 1);
340
338
    int i;
341
676
    for (i = rnum; i < graph->binds->rnum; 
i++338
)
342
338
      ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, i))->index = CCV_NNC_TENSOR_NO_VARIABLE;
343
338
  }
344
27.6k
  ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, symbol.d);
345
27.6k
  bind->type = tensor_variable->type;
346
27.6k
  bind->index = tensor_variable->index;
347
27.6k
  if (tensor_variable->alias_index_ref)
348
1.04k
  {
349
1.04k
    const ccv_nnc_tensor_symbol_t alias_to = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){
350
1.04k
      .d = symbol.d,
351
1.04k
      .graph = graph->tape
352
1.04k
    });
353
1.04k
    assert(alias_to.d >= 0 && alias_to.d < graph->binds->rnum);
354
1.04k
    bind->alias_ref = alias_to.d + 1;
355
1.04k
  } else
356
26.5k
    bind->alias_ref = 0;
357
27.6k
  if (bind->sources)
358
0
    ccv_array_free(bind->sources);
359
27.6k
  bind->sources = 0;
360
27.6k
  if (bind->destinations)
361
0
    ccv_array_free(bind->destinations);
362
27.6k
  bind->destinations = 0;
363
27.6k
  bind->destructor_hook.func = 0;
364
27.6k
  bind->destructor_hook.context = 0;
365
27.6k
  bind->tensor_view = 0;
366
27.6k
}
367
368
static ccv_nnc_tensor_symbol_t _ccv_nnc_tensor_symbol_from_variable(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
369
44.5k
{
370
44.5k
  if (tensor_variable->symbol.d >= 0)
371
16.9k
    return tensor_variable->symbol;
372
27.6k
  if (!tensor_variable->alias_index_ref)
373
26.5k
  {
374
26.5k
    const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_new(graph->tape, tensor_variable->info, 0);
375
26.5k
    _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol);
376
26.5k
    return symbol;
377
26.5k
  }
378
1.04k
  const int alias_index = tensor_variable->alias_index_ref - 1;
379
1.04k
  assert(alias_index >= 0);
380
1.04k
  ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
381
1.04k
  assert(!variable_to->alias_index_ref);
382
1.04k
  int no_inc = 1;
383
1.04k
  int i;
384
2.43k
  for (i = 0; no_inc && 
i < 1.42k
CCV_NNC_MAX_DIM_ALLOC1.42k
;
i++1.39k
)
385
1.39k
    no_inc = (tensor_variable->inc[i] == 0);
386
1.04k
  const ccv_nnc_tensor_symbol_t symbol = tensor_variable->symbol = ccv_nnc_tensor_symbol_alias_new(graph->tape, _ccv_nnc_tensor_symbol_from_variable(graph, variable_to), tensor_variable->ofs, no_inc ? 
tensor_variable->info.dim32
:
tensor_variable->inc1.01k
, tensor_variable->info, 0);
387
1.04k
  _ccv_nnc_tensor_symbol_extra_new(graph, tensor_variable, symbol);
388
1.04k
  return symbol;
389
1.04k
}
390
391
// Return the tensor variable that is old (the provided tensor variable will have a new setting).
392
ccv_nnc_tensor_variable_t ccv_nnc_tensor_variable_exchange_new(ccv_nnc_dynamic_graph_t* const graph, ccv_nnc_tensor_variable_t tensor_variable)
393
6.28k
{
394
6.28k
  struct ccv_nnc_tensor_variable_s x = *tensor_variable;
395
6.28k
  ccv_nnc_tensor_variable_t new_variable;
396
  // Need to handle alias.
397
6.28k
  if (x.alias_index_ref)
398
0
    new_variable = ccv_nnc_tensor_variable_alias_new(graph, *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, x.alias_index_ref - 1), x.ofs, x.inc, x.info);
399
6.28k
  else
400
6.28k
    new_variable = ccv_nnc_tensor_variable_new(graph, x.info);
401
6.28k
  *tensor_variable = *new_variable;
402
6.28k
  *new_variable = x;
403
  // The index should be the same though.
404
6.28k
  const int index = new_variable->index;
405
6.28k
  new_variable->index = tensor_variable->index;
406
6.28k
  if (new_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
407
2.84k
  {
408
2.84k
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, new_variable->symbol.d);
409
2.84k
    bind->index = new_variable->index;
410
2.84k
  }
411
6.28k
  tensor_variable->index = index;
412
6.28k
  return new_variable;
413
6.28k
}
414
415
void ccv_nnc_dynamic_graph_set_no_grad(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int no_grad)
416
9
{
417
9
  dynamic_graph->no_grad = no_grad;
418
9
}
419
420
static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_get_stream(ccv_nnc_dynamic_graph_t* const graph, const int type)
421
72
{
422
72
  if (!graph->stream_map)
423
10
    graph->stream_map = kh_init(stream_map);
424
72
  int ret = 0;
425
72
  khiter_t k = kh_put(stream_map, graph->stream_map, type, &ret);
426
72
  assert(ret >= 0);
427
72
  ccv_nnc_stream_context_t* stream = kh_val(graph->stream_map, k);
428
  // If ret == 0, the key already exist, we can return directly, otherwise, create and return.
429
72
  if (ret != 0)
430
23
  {
431
23
    stream = ccv_nnc_stream_context_new(type);
432
23
    kh_val(graph->stream_map, k) = stream;
433
23
  }
434
72
  return stream;
435
72
}
436
437
typedef struct {
438
  ccv_nnc_dynamic_graph_t* graph;
439
  int stream_type;
440
} ccv_nnc_dynamic_graph_neighbor_context_discovery_t;
441
442
static ccv_nnc_stream_context_t* _ccv_nnc_dynamic_graph_neighbor_context_discovery(const int device_id, void* const context)
443
0
{
444
0
  ccv_nnc_dynamic_graph_neighbor_context_discovery_t* const discovery = (ccv_nnc_dynamic_graph_neighbor_context_discovery_t*)context;
445
0
  int type = discovery->stream_type;
446
0
  CCV_STREAM_SET_DEVICE_ID(type, device_id);
447
0
  return _ccv_nnc_dynamic_graph_get_stream(discovery->graph, type);
448
0
}
449
450
void ccv_nnc_dynamic_graph_exec_ret(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context, ccv_nnc_graph_exec_symbol_t* const graph_execs)
451
15.3k
{
452
15.3k
  int i, j;
453
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
454
28.1k
    if (inputs[i] && 
!inputs[i]->alias_index_ref28.1k
)
455
27.1k
      { assert(inputs[i]->tensor_view); }
456
15.3k
  ccv_nnc_tensor_t* input_tensors[ccv_max(1, input_size)];
457
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
458
28.1k
    input_tensors[i] = inputs[i] ? ccv_nnc_tensor_from_variable(graph, inputs[i], stream_context) : 
02
;
459
15.3k
  ccv_nnc_tensor_symbol_t input_symbols[ccv_max(1, input_size)];
460
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
461
28.1k
    input_symbols[i] = inputs[i] ? 
_ccv_nnc_tensor_symbol_from_variable(graph, inputs[i])28.1k
:
NO_TENSOR_SYMBOL2
;
462
15.3k
  ccv_array_t* input_sources[ccv_max(1, input_size)];
463
15.3k
  ccv_array_t* input_alias_sources[ccv_max(1, input_size)];
464
43.5k
  for (i = 0; i < input_size; 
i++28.1k
)
465
28.1k
  {
466
28.1k
    input_sources[i] = input_symbols[i].d != CCV_NNC_NO_TENSOR_SYMBOL ? 
((ccv_nnc_tensor_variable_graph_bind_t*)28.1k
ccv_array_get28.1k
(graph->binds, input_symbols[i].d))->sources :
02
;
467
28.1k
    if (inputs[i] && 
inputs[i]->alias_index_ref28.1k
)
468
1.03k
    {
469
1.03k
      const int alias_index_ref = inputs[i]->alias_index_ref - 1;
470
1.03k
      assert(alias_index_ref >= 0);
471
1.03k
      ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index_ref);
472
1.03k
      input_alias_sources[i] = ((ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d))->sources;
473
1.03k
    } else
474
27.1k
      input_alias_sources[i] = 0;
475
28.1k
  }
476
15.3k
  const int parallel_count = ccv_max(1, parallel);
477
15.3k
  assert(input_size % parallel_count == 0);
478
15.3k
  const int per_input_size = input_size / parallel_count;
479
15.3k
  assert(output_size % parallel_count == 0);
480
15.3k
  const int per_output_size = output_size / parallel_count;
481
15.3k
  int output_auto = 0;
482
30.9k
  for (i = 0; !output_auto && 
i < output_size16.2k
;
i++15.6k
)
483
15.6k
    output_auto = outputs[i] ? 
ccv_nnc_is_tensor_auto(outputs[i]->info)15.4k
:
0200
;
484
  // One extra step, infer the parameters for outputs.
485
15.3k
  if (output_auto)
486
14.6k
  {
487
14.6k
    ccv_nnc_tensor_param_t input_params[ccv_max(1, per_input_size)];
488
14.6k
    ccv_nnc_tensor_param_t output_params[ccv_max(1, per_output_size)];
489
29.4k
    for (i = 0; i < parallel_count; 
i++14.7k
)
490
14.7k
    {
491
41.9k
      for (j = 0; j < per_input_size; 
j++27.2k
)
492
27.2k
        input_params[j] = inputs[j + i * per_input_size] ? 
inputs[j + i * per_input_size]->info27.2k
:
ccv_nnc_tensor_auto2
;
493
29.6k
      for (j = 0; j < per_output_size; 
j++14.9k
)
494
14.9k
        output_params[j] = outputs[j + i * per_output_size] ? 
outputs[j + i * per_output_size]->info14.7k
:
ccv_nnc_tensor_auto207
;
495
14.7k
      ccv_nnc_hint_tensor_auto(cmd, input_params, per_input_size, hint, output_params, per_output_size);
496
29.6k
      for (j = 0; j < per_output_size; 
j++14.9k
)
497
14.9k
        if (outputs[j + i * per_output_size])
498
14.7k
          outputs[j + i * per_output_size]->info = output_params[j];
499
14.7k
    }
500
14.6k
  }
501
15.3k
  int freeable_size = 0;
502
15.3k
  ccv_nnc_tensor_variable_t freeables[ccv_max(1, output_size)];
503
  // Refresh the symbol if it is binded to an existing exec. Otherwise we cannot keep the SSA guarantee.
504
31.0k
  for (i = 0; i < output_size; 
i++15.6k
)
505
15.6k
  {
506
    // First, go over to see whether there is enforce inplace.
507
15.6k
    int enforce_idx = -1;
508
44.7k
    for (j = 0; enforce_idx < 0 && 
j < input_size44.7k
;
j++29.0k
)
509
29.0k
      if (inputs[j] && 
ccv_nnc_cmd_enforce_inplace(cmd, j, input_size, i, output_size)29.0k
)
510
2
        enforce_idx = j;
511
15.6k
    if (enforce_idx >= 0)
512
2
      { assert(outputs[i] == inputs[enforce_idx] && outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL); }
513
    // We don't allow or check "allow inplace" yet. That logic will be at odds with backward logic.
514
15.6k
    if (outputs[i] && 
outputs[i]->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL15.4k
)
515
417
    {
516
417
      const ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[i]->symbol.d);
517
417
      if (enforce_idx >= 0)
518
2
        { assert(!bind->destinations || bind->destinations->rnum == 0); }
519
417
      if (bind->sources && 
bind->sources->rnum > 0415
)
520
415
      {
521
415
        const ccv_nnc_tensor_variable_t old_var = freeables[freeable_size++] = ccv_nnc_tensor_variable_exchange_new(graph, outputs[i]);
522
        // If this is enforce output, make sure the tensor view is taken by the output.
523
415
        if (enforce_idx >= 0)
524
0
        {
525
0
          outputs[i]->tensor_view = old_var->tensor_view; // Make sure the tensor view is taken over by the output.
526
0
          old_var->tensor_view = 0;
527
0
        }
528
415
      }
529
417
    }
530
15.6k
  }
531
15.3k
  ccv_nnc_tensor_t* output_tensors[ccv_max(1, per_output_size)];
532
15.3k
  if (parallel_count > 1)
533
23
  {
534
23
    const int max_device_id_size = per_input_size + per_output_size;
535
23
    assert(max_device_id_size > 0);
536
23
    int device_ids[max_device_id_size];
537
23
    ccv_nnc_stream_context_t* streams[parallel_count];
538
23
    ccv_nnc_stream_signal_t* signal;
539
23
    if (stream_context)
540
14
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
541
97
    for (i = 0; i < parallel_count; 
i++74
)
542
74
    {
543
74
      int flag = 0;
544
148
      for (j = 0; !flag && 
j < per_input_size78
;
j++74
)
545
74
        if (input_tensors[i * per_input_size + j])
546
74
          flag = (CCV_TENSOR_GET_MEMORY(input_tensors[i * per_input_size + j]->info.type) == CCV_TENSOR_GPU_MEMORY);
547
156
      for (j = 0; j < per_output_size; 
j++82
)
548
82
      {
549
82
        output_tensors[j] = outputs[j + i * per_output_size] ? ccv_nnc_tensor_from_variable(graph, outputs[j + i * per_output_size], stream_context) : 
08
;
550
82
        if (output_tensors[j] && 
!flag74
)
551
4
          flag = (CCV_TENSOR_GET_MEMORY(output_tensors[j]->info.type) == CCV_TENSOR_GPU_MEMORY);
552
82
      }
553
74
      const int stream_type = flag ? 
CCV_STREAM_CONTEXT_GPU70
:
CCV_STREAM_CONTEXT_CPU4
;
554
74
      const int tensor_type = flag ? 
CCV_TENSOR_GPU_MEMORY70
:
CCV_TENSOR_CPU_MEMORY4
;
555
74
      const int device_id_size = ccv_nnc_device_ids_for_io(input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, tensor_type, device_ids, max_device_id_size);
556
74
      ccv_nnc_stream_context_t* stream_0 = 0;
557
146
      for (j = 0; j < device_id_size; 
j++72
)
558
72
      {
559
72
        int type = stream_type;
560
72
        CCV_STREAM_SET_DEVICE_ID(type, device_ids[j]);
561
72
        ccv_nnc_stream_context_t* const stream = _ccv_nnc_dynamic_graph_get_stream(graph, type);
562
72
        if (!stream_0)
563
72
          stream_0 = stream;
564
72
      }
565
      // Wait signal to finish.
566
74
      if (stream_context)
567
44
      {
568
44
        if (stream_0)
569
42
          ccv_nnc_stream_context_wait_signal(stream_0, signal);
570
2
        else
571
2
          ccv_nnc_stream_context_wait(stream_context);
572
44
      }
573
74
      if (stream_0)
574
72
      {
575
72
        ccv_nnc_dynamic_graph_neighbor_context_discovery_t discovery = {
576
72
          .graph = graph,
577
72
          .stream_type = stream_type
578
72
        };
579
72
        ccv_nnc_stream_context_set_neighbor_discovery(stream_0, _ccv_nnc_dynamic_graph_neighbor_context_discovery, &discovery);
580
72
      }
581
74
      PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size);
582
74
      int k;
583
204
      for (k = 0; k < per_input_size; 
k++130
)
584
130
      {
585
130
        PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", 
k + 1, input_tensors[k + i * per_input_size], (input_tensors[k + i * per_input_size] ? input_tensors[k + i * per_input_size]->data.u8 : 0), (input_tensors[k + i * per_input_size] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(input_tensors[k + i * per_input_size]->info.type) : -1));
586
130
        if (input_tensors[k + i * per_input_size] && CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_INFO))
587
0
          ccv_nnc_print_tensor_info(input_tensors[k + i * per_input_size]);
588
130
        PRINT(CCV_CLI_INFO, "\n");
589
130
      }
590
74
      ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors + i * per_input_size, per_input_size, output_tensors, per_output_size, stream_0);
591
156
      for (k = 0; k < per_output_size; 
k++82
)
592
82
      {
593
82
        PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", 
k + 1, output_tensors[k], (output_tensors[k] ? output_tensors[k]->data.u8 : 0), (output_tensors[k] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(output_tensors[k]->info.type) : -1));
594
82
        if (output_tensors[k] && 
CCV_CLI_OUTPUT_LEVEL_IS74
(CCV_CLI_INFO))
595
0
          ccv_nnc_print_tensor_info(output_tensors[k]);
596
82
        PRINT(CCV_CLI_INFO, "\n");
597
82
      }
598
74
      if (stream_context && 
stream_044
)
599
42
      {
600
42
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
601
42
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
602
42
      }
603
74
      streams[i] = stream_0;
604
74
    }
605
23
    if (!stream_context)
606
39
      
for (i = 0; 9
i < parallel_count;
i++30
)
607
30
        if (streams[i])
608
30
          ccv_nnc_stream_context_wait(streams[i]);
609
15.3k
  } else {
610
30.9k
    for (i = 0; i < per_output_size; 
i++15.5k
)
611
15.5k
      output_tensors[i] = outputs[i] ? ccv_nnc_tensor_from_variable(graph, outputs[i], stream_context) : 
0199
;
612
15.3k
    PRINT(CCV_CLI_INFO, "%s: [%d] -> [%d]\n", ccv_nnc_cmd_name(cmd.cmd), per_input_size, per_output_size);
613
43.3k
    for (i = 0; i < per_input_size; 
i++28.0k
)
614
28.0k
    {
615
28.0k
      PRINT(CCV_CLI_INFO, "|-> %d. %p (%p:%d)", 
i + 1, input_tensors[i], (input_tensors[i] ? input_tensors[i]->data.u8 : 0), (input_tensors[i] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(input_tensors[i]->info.type) : -1));
616
28.0k
      if (input_tensors[i] && 
CCV_CLI_OUTPUT_LEVEL_IS28.0k
(CCV_CLI_INFO))
617
0
        ccv_nnc_print_tensor_info(input_tensors[i]);
618
28.0k
      PRINT(CCV_CLI_INFO, "\n");
619
28.0k
    }
620
15.3k
    ccv_nnc_cmd_exec(cmd, hint, flags, input_tensors, per_input_size, output_tensors, per_output_size, stream_context);
621
30.9k
    for (i = 0; i < per_output_size; 
i++15.5k
)
622
15.5k
    {
623
15.5k
      PRINT(CCV_CLI_INFO, "|<- %d. %p (%p:%d)", 
i + 1, output_tensors[i], (output_tensors[i] ? output_tensors[i]->data.u8 : 0), (output_tensors[i] ? 0
CCV_TENSOR_GET_DEVICE_ID0
(output_tensors[i]->info.type) : -1));
624
15.5k
      if (output_tensors[i] && 
CCV_CLI_OUTPUT_LEVEL_IS15.3k
(CCV_CLI_INFO))
625
0
        ccv_nnc_print_tensor_info(output_tensors[i]);
626
15.5k
      PRINT(CCV_CLI_INFO, "\n");
627
15.5k
    }
628
15.3k
  }
629
15.3k
  int inputs_are_constants = 1;
630
30.7k
  for (i = 0; inputs_are_constants && 
i < input_size15.3k
;
i++15.3k
)
631
15.3k
    if (inputs[i] && 
inputs[i]->type != CCV_NNC_TENSOR_CONSTANT15.3k
)
632
15.3k
      inputs_are_constants = 0;
633
15.3k
  if (input_size > 0 && 
!inputs_are_constants15.3k
&&
!graph->no_grad15.3k
) // No need to record the execution if there is no input or we disabled gradient computation.
634
15.3k
  {
635
15.3k
    ccv_nnc_tensor_symbol_t output_symbols[ccv_max(1, output_size)];
636
30.8k
    for (i = 0; i < output_size; 
i++15.5k
)
637
15.5k
      if (outputs[i])
638
15.3k
      {
639
15.3k
        assert(outputs[i]->type != CCV_NNC_TENSOR_CONSTANT);
640
15.3k
        output_symbols[i] = _ccv_nnc_tensor_symbol_from_variable(graph, outputs[i]);
641
15.3k
      } else
642
207
        output_symbols[i] = NO_TENSOR_SYMBOL;
643
15.3k
    int t;
644
30.6k
    for (t = 0; t < parallel_count; 
t++15.3k
)
645
15.3k
    {
646
15.3k
      ccv_nnc_graph_exec_symbol_t graph_exec = ccv_nnc_graph_exec_symbol_new(graph->tape, cmd, input_symbols + t * per_input_size, per_input_size, output_symbols + t * per_output_size, per_output_size, 0);
647
15.3k
      if (graph_execs)
648
2.40k
        graph_execs[t] = graph_exec;
649
      // This needs to be done before we set the new sources on the outputs.
650
43.4k
      for (i = 0; i < per_input_size; 
i++28.0k
)
651
28.0k
      {
652
28.0k
        ccv_array_t* const input_source = input_sources[i + t * per_input_size];
653
28.0k
        if (input_source)
654
28.1k
          
for (j = 0; 14.0k
j < input_source->rnum;
j++14.0k
)
655
14.0k
            ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){
656
14.0k
              .d = *(int*)ccv_array_get(input_source, j),
657
14.0k
              .graph = graph->tape
658
14.0k
            }, graph_exec);
659
28.0k
        ccv_array_t* const input_alias_source = input_alias_sources[i + t * per_input_size];
660
28.0k
        if (input_alias_source)
661
2.02k
          
for (j = 0; 1.01k
j < input_alias_source->rnum;
j++1.01k
)
662
1.01k
            ccv_nnc_graph_exec_symbol_concat(graph->tape, (ccv_nnc_graph_exec_symbol_t){
663
1.01k
              .d = *(int*)ccv_array_get(input_alias_source, j),
664
1.01k
              .graph = graph->tape
665
1.01k
            }, graph_exec);
666
28.0k
      }
667
43.4k
      for (i = 0; i < per_input_size; 
i++28.0k
)
668
28.0k
      {
669
28.0k
        ccv_nnc_tensor_variable_t const input = inputs[i + t * per_input_size];
670
28.0k
        if (!input || 
input->type == CCV_NNC_TENSOR_CONSTANT28.0k
)
671
236
          continue;
672
27.8k
        ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, input_symbols[i + t * per_input_size].d);
673
27.8k
        if (!bind->destinations)
674
22.0k
          bind->destinations = ccv_array_new(sizeof(int), 1, 0);
675
27.8k
        ccv_array_add_unique_int(bind->destinations, graph_exec.d);
676
27.8k
        if (input->alias_index_ref)
677
1.01k
        {
678
1.01k
            const int alias_index = input->alias_index_ref - 1;
679
1.01k
            assert(alias_index >= 0);
680
1.01k
            ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
681
1.01k
            ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d);
682
1.01k
            if (!root_bind->destinations)
683
1.01k
              root_bind->destinations = ccv_array_new(sizeof(int), 1, 0);
684
1.01k
            ccv_array_add_unique_int(root_bind->destinations, graph_exec.d);
685
1.01k
        }
686
27.8k
      }
687
30.9k
      
for (i = 0; 15.3k
i < per_output_size;
i++15.5k
)
688
15.5k
      {
689
15.5k
        ccv_nnc_tensor_variable_t const output = outputs[i + t * per_output_size];
690
15.5k
        if (!output)
691
207
          continue;
692
15.3k
        ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, output_symbols[i + t * per_output_size].d);
693
15.3k
        assert(!bind->sources); // This is a new symbol, therefore, no binded sources associated yet.
694
15.3k
        bind->sources = ccv_array_new(sizeof(int), 1, 0);
695
15.3k
        ccv_array_add_unique_int(bind->sources, graph_exec.d);
696
15.3k
        if (output->alias_index_ref)
697
8
        {
698
8
          const int alias_index = output->alias_index_ref - 1;
699
8
          assert(alias_index >= 0);
700
8
          ccv_nnc_tensor_variable_t variable_to = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, alias_index);
701
8
          ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, variable_to->symbol.d);
702
8
          if (!root_bind->sources)
703
4
            root_bind->sources = ccv_array_new(sizeof(int), 1, 0);
704
8
          ccv_array_add_unique_int(root_bind->sources, graph_exec.d);
705
8
        }
706
15.3k
      }
707
15.3k
    }
708
15.3k
  }
709
  // Now, able to free some of the reused outputs.
710
15.7k
  
for (i = 0; 15.3k
i < freeable_size;
i++415
)
711
415
    ccv_nnc_tensor_variable_free(graph, freeables[i]);
712
15.3k
}
713
714
int ccv_nnc_dynamic_graph_exec(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context)
715
12.9k
{
716
12.9k
  ccv_nnc_dynamic_graph_exec_ret(graph, cmd, hint, flags, inputs, input_size, outputs, output_size, parallel, stream_context, 0);
717
12.9k
  return CCV_NNC_EXEC_SUCCESS;
718
12.9k
}
719
720
static int _ccv_nnc_tensor_variable_is_only_output(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_graph_bind_t* bind, const int symbol_d)
721
17.5k
{
722
17.5k
  if (bind->alias_ref)
723
1.01k
    bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, bind->alias_ref - 1);
724
17.5k
  if (!bind->sources || bind->sources->rnum == 0)
725
0
    return 1;
726
17.5k
  int i;
727
33.9k
  for (i = 0; i < bind->sources->rnum; 
i++16.4k
)
728
17.5k
  {
729
17.5k
    const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i);
730
17.5k
    const ccv_nnc_graph_exec_symbol_t exec_symbol = {
731
17.5k
      .d = exec_symbol_d,
732
17.5k
      .graph = graph->tape
733
17.5k
    };
734
17.5k
    const int* outputs; int output_size;
735
17.5k
    ccv_nnc_graph_exec_symbol_io(graph->tape, exec_symbol, 0, 0, &outputs, &output_size);
736
17.5k
    int j;
737
34.0k
    for (j = 0; j < output_size; 
j++16.5k
)
738
17.5k
      if (outputs[j] >= 0 && 
outputs[j] != symbol_d17.5k
) // If output is me, it is the only output.
739
1.04k
      {
740
1.04k
        assert(outputs[j] < graph->binds->rnum);
741
1.04k
        const ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
742
        // This is in use and is it not a constant symbol.
743
1.04k
        if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT25
)
744
25
          return 0;
745
1.01k
        if (other_bind->alias_ref) // If this is alias, use its original's destinations.
746
1
          other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
747
1.01k
        if (other_bind->destinations && 
other_bind->destinations->rnum > 01.01k
)
748
1.00k
          return 0;
749
1.01k
      }
750
17.5k
  }
751
16.4k
  return 1;
752
17.5k
}
753
754
static void _ccv_nnc_update_bind_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws)
755
24.9k
{
756
24.9k
  int i;
757
24.9k
  if (bind->destinations)
758
24.7k
  {
759
24.7k
    int flag = 0;
760
49.3k
    for (i = 0; !flag && 
i < bind->destinations->rnum24.7k
;
i++24.5k
)
761
24.5k
    {
762
24.5k
      const int exec_symbol_d = *(int*)ccv_array_get(bind->destinations, i);
763
24.5k
      if (exec_symbol_d == freed_exec_symbol_d)
764
24.5k
      {
765
24.5k
        if (i < bind->destinations->rnum - 1)
766
17
          *(int*)ccv_array_get(bind->destinations, i) = *(int*)ccv_array_get(bind->destinations, bind->destinations->rnum - 1);
767
24.5k
        --bind->destinations->rnum;
768
24.5k
        flag = 1;
769
24.5k
      }
770
24.5k
    }
771
    // This symbol can be freed.
772
24.7k
    if (flag && 
bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED24.5k
)
773
17.6k
    {
774
17.6k
      ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
775
17.6k
      if (bind->alias_ref)
776
1.01k
      {
777
1.01k
        root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1);
778
1.01k
        if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
779
1.01k
          root_bind = bind;
780
1.01k
      }
781
      // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more.
782
      // It is possible because exec will be freed already, thus, it is safe to remove this alias out.
783
17.6k
      if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED &&
784
17.6k
        
(17.6k
(17.6k
!root_bind->sources17.6k
||
root_bind->sources->rnum == 08.64k
) ||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)6.01k
) &&
785
17.6k
        
root_bind->destinations->rnum == 017.6k
)
786
17.6k
      {
787
17.6k
        if (root_bind->sources)
788
14.6k
          
for (i = 0; 8.63k
i < root_bind->sources->rnum;
i++6.00k
)
789
6.00k
            ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
790
17.6k
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
791
17.6k
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
792
17.6k
          .d = tensor_index,
793
17.6k
          .graph = graph->tape
794
17.6k
        });
795
17.6k
      } else 
if (8
bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED8
&& // Handle the case the bind is already freed, and it doesn't have any sources or destinations.
796
8
        bind->alias_ref && 
(2
!bind->sources2
||
bind->sources->rnum == 00
) &&
(2
!bind->destinations2
||
bind->destinations->rnum == 02
)) {
797
2
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
798
2
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
799
2
          .d = tensor_index,
800
2
          .graph = graph->tape
801
2
        });
802
2
      }
803
17.6k
    }
804
24.7k
  }
805
24.9k
}
806
807
static void _ccv_nnc_update_bind_sources_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, ccv_nnc_tensor_variable_graph_bind_t* const bind, const int tensor_index, ccv_array_t* const ws)
808
7.29k
{
809
7.29k
  int i;
810
7.29k
  if (bind->sources)
811
7.29k
  {
812
7.29k
    int flag = 0;
813
14.5k
    for (i = 0; !flag && 
i < bind->sources->rnum7.29k
;
i++7.29k
)
814
7.29k
    {
815
7.29k
      const int exec_symbol_d = *(int*)ccv_array_get(bind->sources, i);
816
7.29k
      if (exec_symbol_d == freed_exec_symbol_d)
817
7.29k
      {
818
7.29k
        if (i < bind->sources->rnum - 1)
819
2
          *(int*)ccv_array_get(bind->sources, i) = *(int*)ccv_array_get(bind->sources, bind->sources->rnum - 1);
820
7.29k
        --bind->sources->rnum;
821
7.29k
        flag = 1;
822
7.29k
      }
823
7.29k
    }
824
7.29k
    if (flag && !bind->alias_ref && 
bind->index >= 07.28k
&&
bind->type == CCV_NNC_TENSOR_CONSTANT4.84k
&& // If it is detached (constant but previously has sources). Now can check again.
825
7.29k
      
(3
bind->sources->rnum == 03
||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)0
) &&
826
7.29k
      
(3
!bind->destinations3
||
bind->destinations->rnum == 03
))
827
3
    {
828
      // If this is constant, set it to be no symbol again.
829
3
      ccv_nnc_tensor_variable_t tv = *(ccv_nnc_tensor_variable_t*)ccv_array_get(graph->vars, bind->index);
830
3
      tv->symbol = NO_TENSOR_SYMBOL;
831
3
      _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
832
3
      ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
833
3
        .d = tensor_index,
834
3
        .graph = graph->tape
835
3
      });
836
7.28k
    } else if (flag && bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED) {
837
      // This symbol can be freed.
838
2.44k
      ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
839
2.44k
      if (bind->alias_ref)
840
3
      {
841
3
        root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, bind->alias_ref - 1);
842
3
        if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
843
0
          root_bind = bind;
844
3
      }
845
      // If the alias_ref is not freed, we cannot free this, unless it is very clear there is no reference to this any more.
846
      // It is possible because exec will be freed already, thus, it is safe to remove this alias out.
847
2.44k
      if (root_bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED &&
848
2.44k
        
(2.43k
root_bind->sources->rnum == 02.43k
||
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_index)0
) &&
849
2.44k
        
(2.43k
!root_bind->destinations2.43k
||
root_bind->destinations->rnum == 02.43k
))
850
6
      {
851
6
        for (i = 0; i < root_bind->sources->rnum; 
i++0
)
852
0
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
853
6
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
854
6
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
855
6
          .d = tensor_index,
856
6
          .graph = graph->tape
857
6
        });
858
2.43k
      } else if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED && // Handle the case the bind is already freed, and it doesn't have any sources or destinations.
859
2.43k
        bind->alias_ref && 
(3
!bind->sources3
||
bind->sources->rnum == 03
) &&
(3
!bind->destinations3
||
bind->destinations->rnum == 00
)) {
860
3
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
861
3
        ccv_nnc_tensor_symbol_free(graph->tape, (ccv_nnc_tensor_symbol_t){
862
3
          .d = tensor_index,
863
3
          .graph = graph->tape
864
3
        });
865
3
      }
866
2.44k
    }
867
7.29k
  }
868
7.29k
}
869
870
static void _ccv_nnc_update_bind_sources_destinations_when_free(ccv_nnc_dynamic_graph_t* const graph, const int freed_exec_symbol_d, ccv_array_t* const binds, const int* const inputs, const int input_size, const int* const outputs, const int output_size, ccv_array_t* const ws)
871
15.3k
{
872
15.3k
  int i;
873
43.2k
  for (i = 0; i < input_size; 
i++27.9k
)
874
27.9k
    if (inputs[i] >= 0 && inputs[i] < binds->rnum)
875
27.9k
    {
876
27.9k
      ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, inputs[i]);
877
27.9k
      if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
878
4.01k
        continue;
879
23.9k
      if (bind->alias_ref)
880
1.01k
      {
881
1.01k
        const int alias_to = bind->alias_ref - 1;
882
1.01k
        ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to);
883
1.01k
        if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE)
884
1.01k
          _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws);
885
1.01k
      }
886
23.9k
      _ccv_nnc_update_bind_destinations_when_free(graph, freed_exec_symbol_d, binds, bind, inputs[i], ws);
887
23.9k
    }
888
  // Note that this works because there is no overlap of inputs / outputs. (What about alias?).
889
30.8k
  for (i = 0; i < output_size; 
i++15.5k
)
890
15.5k
    if (outputs[i] >= 0 && 
outputs[i] < binds->rnum15.3k
)
891
15.3k
    {
892
15.3k
      ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, outputs[i]);
893
15.3k
      if (bind->index == CCV_NNC_TENSOR_NO_VARIABLE)
894
8.02k
        continue;
895
7.28k
      if (bind->alias_ref)
896
5
      {
897
5
        const int alias_to = bind->alias_ref - 1;
898
5
        ccv_nnc_tensor_variable_graph_bind_t* const root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(binds, alias_to);
899
5
        if (root_bind && root_bind->index != CCV_NNC_TENSOR_NO_VARIABLE)
900
5
          _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, root_bind, alias_to, ws);
901
5
      }
902
7.28k
      _ccv_nnc_update_bind_sources_when_free(graph, freed_exec_symbol_d, binds, bind, outputs[i], ws);
903
7.28k
    }
904
15.3k
}
905
906
static void _ccv_nnc_stateful_exec_free_if_possible(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol)
907
15.3k
{
908
15.3k
  if (!graph->stateful_execs)
909
6.06k
    return;
910
9.23k
  assert(symbol.d >= 0);
911
9.23k
  ccv_array_t* const stateful_execs = graph->stateful_execs;
912
9.23k
  ccv_nnc_cmd_t cmd = ccv_nnc_graph_exec_symbol_cmd(graph->tape, symbol);
913
9.23k
  ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data;
914
9.23k
  if (!stateful_exec)
915
6.83k
    return;
916
  // If there is no backward, no need to apply gradients.
917
  // Otherwise, if we applied gradients, we can free it as well.
918
  // We don't free this stateful exec because apply gradients doesn't require any variables alive.
919
2.40k
  if (!stateful_exec->did_backward_but_not_apply_gradients)
920
300
  {
921
300
    const int index = stateful_exec->index;
922
300
    ccfree(stateful_exec);
923
300
    if (index < graph->reuse_stateful_exec || graph->reuse_stateful_exec < 0)
924
300
      graph->reuse_stateful_exec = index;
925
300
    *(ccv_nnc_stateful_exec_t**)ccv_array_get(stateful_execs, index) = 0;
926
300
  } else
927
2.10k
    stateful_exec->should_free = 1;
928
2.40k
}
929
930
static int _ccv_nnc_tensor_bind_trace_forward_to_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_graph_bind_t* const bind, ccv_nnc_tensor_variable_graph_bind_t* const root_bind, int* const ws_start, const int assuming_no_source) // assuming_no_source means we are going to remove sources if possible, thus, it is irrelevant.
931
27.4k
{
932
27.4k
  int can_free_symbol = 0;
933
27.4k
  const int sources_and_is_only_output = (root_bind->sources && 
root_bind->sources->rnum > 016.3k
) &&
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d)11.4k
;
934
27.4k
  if (!root_bind->sources || 
root_bind->sources->rnum == 016.3k
||
sources_and_is_only_output11.4k
||
assuming_no_source1.01k
)
935
26.4k
  {
936
26.4k
    int i, j;
937
26.4k
    can_free_symbol = 1; // Assume we can free this symbol.
938
26.4k
    if (!graph->ws)
939
18
      graph->ws = ccv_array_new(sizeof(int), root_bind->destinations ? 
root_bind->destinations->rnum17
:
01
, 0);
940
26.4k
    ccv_array_t* const ws = graph->ws;
941
26.4k
    ccv_array_clear(ws);
942
26.4k
    if (root_bind->destinations)
943
43.4k
      
for (i = 0; 21.9k
i < root_bind->destinations->rnum;
i++21.5k
)
944
21.5k
        ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->destinations, i));
945
26.4k
    const int ws_init_size = ws->rnum;
946
26.4k
    *ws_start = ws_init_size;
947
    // Add all sources from root_bind, in case it has been freed (during update bind sources / destinations when free.
948
26.4k
    if (root_bind->sources)
949
25.7k
      
for (i = 0; 15.2k
i < root_bind->sources->rnum;
i++10.4k
)
950
10.4k
        ccv_array_add_unique_int(ws, *(int*)ccv_array_get(root_bind->sources, i));
951
    // If we cannot loop over any exec symbols (this is not in use). It is simple to determine whether we want
952
    // to free it or not: if this is an alias and the origin is not freed, we cannot free this symbol.
953
26.4k
    if (ws_init_size == 0)
954
4.91k
      can_free_symbol = (!bind->alias_ref || 
root_bind->index < 019
);
955
    // Go through all the exec symbols use this tensor, to see whether they have inputs that has other sources.
956
47.9k
    for (i = 0; i < ws_init_size; 
i++21.5k
)
957
21.5k
    {
958
21.5k
      const int exec_symbol_d = *(int*)ccv_array_get(ws, i);
959
21.5k
      const ccv_nnc_graph_exec_symbol_t symbol = {
960
21.5k
        .d = exec_symbol_d,
961
21.5k
        .graph = graph->tape
962
21.5k
      };
963
21.5k
      const int* inputs; int input_size;
964
21.5k
      const int* outputs; int output_size;
965
21.5k
      ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size);
966
21.5k
      int flag = 0; // flag denotes whether there are cases to keep this exec symbol.
967
21.5k
      if (!root_bind->sources || 
root_bind->sources->rnum == 010.8k
||
assuming_no_source8.44k
)
968
13.1k
      {
969
        // If there is no sources, check if other sources can depend on this exec, if they do, we cannot free this.
970
36.8k
        for (j = 0; !flag && 
j < input_size28.6k
;
j++23.7k
)
971
23.7k
          if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum && inputs[j] != tensor_variable->symbol.d)
972
14.4k
          {
973
14.4k
            ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]);
974
14.4k
            if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT6.24k
)
975
6.22k
              flag = 1;
976
8.26k
            else {
977
8.26k
              if (other_bind->alias_ref) // If this is alias, use its original's destinations.
978
4
                other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
979
8.26k
              flag = (other_bind->type != CCV_NNC_TENSOR_CONSTANT) && 
(8.23k
other_bind->sources8.23k
&&
other_bind->sources->rnum > 02.22k
); // Constant should have no source, or it is detached.
980
8.26k
            }
981
14.4k
          }
982
13.1k
      } else {
983
        // If there are sources, check whether we have outputs or not. If we do, we cannot free this.
984
16.9k
        for (j = 0; !flag && 
j < output_size8.45k
;
j++8.45k
)
985
8.45k
          if (outputs[j] >= 0 && 
outputs[j] < graph->binds->rnum8.45k
)
986
8.45k
          {
987
8.45k
            ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
988
8.45k
            if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT4.43k
)
989
4.43k
              flag = 1;
990
4.02k
            else {
991
4.02k
              if (other_bind->alias_ref) // If this is alias, use its original's destinations.
992
0
                other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
993
4.02k
              flag = (other_bind->destinations && other_bind->destinations->rnum > 0);
994
4.02k
            }
995
8.45k
          }
996
8.44k
      }
997
      // This exec can be freed if there is no input required or there is no output required.
998
21.5k
      can_free_symbol = (can_free_symbol && !flag);
999
21.5k
      if (!flag)
1000
4.87k
      {
1001
        // Go over inputs and remove all references from binded destinations.
1002
        // and go over outputs remove all references from binded sources.
1003
4.87k
        _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws);
1004
4.87k
        const int* outgoings; int outgoing_size;
1005
4.87k
        ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size);
1006
7.30k
        for (j = 0; j < outgoing_size; 
j++2.43k
)
1007
2.43k
          ccv_array_add_unique_int(ws, outgoings[j]);
1008
4.87k
        _ccv_nnc_stateful_exec_free_if_possible(graph, symbol);
1009
4.87k
        ccv_nnc_graph_exec_symbol_free(graph->tape, symbol);
1010
4.87k
      }
1011
21.5k
    }
1012
26.4k
  }
1013
27.4k
  return can_free_symbol;
1014
27.4k
}
1015
1016
static void _ccv_nnc_tensor_bind_trace_backward_to_free(ccv_nnc_dynamic_graph_t* const graph, ccv_array_t* const ws, const int ws_start)
1017
9.75k
{
1018
9.75k
  int i, j;
1019
  // Now, go over the outgoings, if it is removed, add more to it. Note that the ws array can grow while iterating over.
1020
22.6k
  for (i = ws_start; i < ws->rnum; 
i++12.8k
)
1021
12.8k
  {
1022
12.8k
    const int exec_symbol_d = *(int*)ccv_array_get(ws, i);
1023
12.8k
    const ccv_nnc_graph_exec_symbol_t symbol = {
1024
12.8k
      .d = exec_symbol_d,
1025
12.8k
      .graph = graph->tape
1026
12.8k
    };
1027
12.8k
    const int* inputs; int input_size;
1028
12.8k
    const int* outputs; int output_size;
1029
12.8k
    ccv_nnc_graph_exec_symbol_io(graph->tape, symbol, &inputs, &input_size, &outputs, &output_size);
1030
12.8k
    int flag = 0;
1031
29.9k
    for (j = 0; !flag && 
j < input_size19.5k
;
j++17.1k
)
1032
17.1k
      if (inputs[j] >= 0 && inputs[j] < graph->binds->rnum)
1033
17.1k
      {
1034
17.1k
        ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, inputs[j]);
1035
17.1k
        if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT4.64k
)
1036
4.44k
          flag = 1;
1037
12.6k
        else {
1038
12.6k
          if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1039
1.02k
            other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1040
12.6k
          flag = (other_bind->type != CCV_NNC_TENSOR_CONSTANT) && 
(12.4k
other_bind->sources12.4k
&&
other_bind->sources->rnum > 010.4k
);
1041
12.6k
        }
1042
17.1k
      }
1043
12.8k
    if (flag) // If any inputs make free this destination impossible. Check whether all its outputs are done.
1044
10.4k
    {
1045
10.4k
      int output_flag = 0;
1046
21.1k
      for (j = 0; !output_flag && 
j < output_size18.6k
;
j++10.6k
)
1047
10.6k
        if (outputs[j] >= 0 && 
outputs[j] < graph->binds->rnum10.4k
)
1048
10.4k
        {
1049
10.4k
          ccv_nnc_tensor_variable_graph_bind_t* other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, outputs[j]);
1050
10.4k
          if (other_bind->index >= 0 && 
other_bind->type != CCV_NNC_TENSOR_CONSTANT2.41k
)
1051
2.41k
            output_flag = 1;
1052
8.04k
          else {
1053
8.04k
            if (other_bind->alias_ref) // If this is alias, use its original's destinations.
1054
0
              other_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, other_bind->alias_ref - 1);
1055
8.04k
            output_flag = (other_bind->destinations && 
other_bind->destinations->rnum > 020
);
1056
8.04k
          }
1057
10.4k
        }
1058
10.4k
      if (!output_flag) // If no output is used (used means it has a tensor variable, or it has a destination).
1059
8.02k
        flag = 0;
1060
10.4k
    }
1061
    // Went over all the inputs, it turns out no more inputs has other references, safe to remove.
1062
12.8k
    if (!flag)
1063
10.4k
    {
1064
10.4k
      _ccv_nnc_update_bind_sources_destinations_when_free(graph, exec_symbol_d, graph->binds, inputs, input_size, outputs, output_size, ws);
1065
10.4k
      const int* outgoings; int outgoing_size;
1066
10.4k
      ccv_nnc_graph_exec_symbol_to(graph->tape, symbol, &outgoings, &outgoing_size);
1067
      // It it has outgoings, add that for further inspection.
1068
12.8k
      for (j = 0; j < outgoing_size; 
j++2.40k
)
1069
2.40k
        ccv_array_add_unique_int(ws, outgoings[j]);
1070
10.4k
      _ccv_nnc_stateful_exec_free_if_possible(graph, symbol);
1071
10.4k
      ccv_nnc_graph_exec_symbol_free(graph->tape, symbol);
1072
10.4k
    }
1073
12.8k
  }
1074
9.75k
}
1075
1076
void ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
1077
32.1k
{
1078
  // If it contains a symbol, this tensor variable is not a free variable. It is either used as input or output.
1079
32.1k
  if (tensor_variable->symbol.d != CCV_NNC_NO_TENSOR_SYMBOL)
1080
27.4k
  {
1081
    // If it is not a free variable, when can we free the symbol and the underlying variable?
1082
    // 1. There should be no sources (the command generate this tensor should be freed) or the output of these sources is only the current one;
1083
    // 2. The destinations (the commands that uses this tensor) should have no other inputs, or the other inputs has no binded sources as well.
1084
27.4k
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d);
1085
    // There should be no source associated with it no more.
1086
    // I am free if no exec symbol is producing me or the symbol producing me can only producing me (thus, it is not required to
1087
    // compute gradient because I am the only variable it can compute gradient for).
1088
27.4k
    ccv_nnc_tensor_variable_graph_bind_t* root_bind = bind;
1089
27.4k
    if (bind->alias_ref)
1090
1.03k
    {
1091
1.03k
      const int alias_to = bind->alias_ref - 1;
1092
1.03k
      root_bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, alias_to);
1093
1.03k
    }
1094
27.4k
    int ws_start;
1095
27.4k
    const int can_free_symbol = _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, root_bind, &ws_start, 0);
1096
27.4k
    if (can_free_symbol)
1097
9.74k
    {
1098
9.74k
      _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
1099
9.74k
      ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
1100
9.74k
      _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start);
1101
17.7k
    } else { // If this symbol is not freed, move the tensor view to the bind.
1102
      // If current bind is an alias, and it doesn't have any sources or destinations. We cannot find this alias
1103
      // through any exec. This is not only safe to delete, but has to be deleted. We don't need to handle this
1104
      // if free_symbol is true, because when that happens, root_bind will be deleted, and we will clean up the
1105
      // alias in that process.
1106
17.7k
      if (bind->alias_ref && 
(1.03k
!bind->sources1.03k
||
bind->sources->rnum == 05
) &&
(1.03k
!bind->destinations1.03k
||
bind->destinations->rnum == 01.01k
))
1107
20
      {
1108
20
        _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
1109
20
        ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
1110
17.6k
      } else {
1111
17.6k
        bind->index = CCV_NNC_TENSOR_NO_VARIABLE_BUT_USED; // This tensor variable will be freed, but this symbol extra will continue exists.
1112
17.6k
        bind->destructor_hook.func = tensor_variable->destructor_hook.func; // Transfer the destructor callback.
1113
17.6k
        bind->destructor_hook.context = tensor_variable->destructor_hook.context; // Transfer the destructor callback context.
1114
17.6k
        bind->tensor_view = tensor_variable->tensor_view; // Transfer the ownership to the bind.
1115
17.6k
        tensor_variable->tensor_view = 0;
1116
17.6k
      }
1117
17.7k
    }
1118
27.4k
  }
1119
32.1k
  _ccv_nnc_tensor_variable_free(graph, tensor_variable, 1);
1120
32.1k
}
1121
1122
void ccv_nnc_tensor_variable_detach(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable)
1123
6
{
1124
  // This cannot be an alias.
1125
6
  assert(!tensor_variable->alias_index_ref);
1126
  // If no computation done yet, mark this as constant.
1127
6
  if (tensor_variable->symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1128
0
  {
1129
0
    tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
1130
0
    return;
1131
0
  }
1132
  // Otherwise, we need to do some book keeping updates to make sure it doesn't participate gradient computation any more.
1133
6
  ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, tensor_variable->symbol.d);
1134
  // Because tensor variable cannot be alias, its bind cannot have alias pointer.
1135
6
  assert(!bind->alias_ref);
1136
  // Go through to break ties between sources and destinations.
1137
6
  int i, j;
1138
6
  if (bind->sources && bind->destinations)
1139
6
  {
1140
11
    for (i = 0; i < bind->sources->rnum; 
i++5
)
1141
5
    {
1142
5
      const int s = *(int*)ccv_array_get(bind->sources, i);
1143
5
      const int* outputs; int output_size;
1144
5
      const ccv_nnc_graph_exec_symbol_t s_symbol = {
1145
5
        .d = s,
1146
5
        .graph = graph->tape
1147
5
      };
1148
5
      ccv_nnc_graph_exec_symbol_io(graph->tape, s_symbol, 0, 0, &outputs, &output_size);
1149
10
      for (j = 0; j < bind->destinations->rnum; 
j++5
)
1150
5
      {
1151
5
        const int d = *(int*)ccv_array_get(bind->destinations, j);
1152
5
        const ccv_nnc_graph_exec_symbol_t d_symbol = {
1153
5
          .d = d,
1154
5
          .graph = graph->tape
1155
5
        };
1156
5
        const int* inputs; int input_size;
1157
5
        ccv_nnc_graph_exec_symbol_io(graph->tape, d_symbol, &inputs, &input_size, 0, 0);
1158
5
        int x, y;
1159
5
        int flag = 0; // Whether we find a symbol that connects source and destination but not the current one we detach. If found, we cannot break the tie between s_symbol and d_symbol.
1160
10
        for (x = 0; !flag && x < output_size; 
x++5
)
1161
5
        {
1162
5
          ccv_nnc_tensor_symbol_t x_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){
1163
5
            .d = outputs[x],
1164
5
            .graph = graph->tape
1165
5
          });
1166
5
          if (x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1167
5
          {
1168
5
            x_symbol.d = outputs[x];
1169
5
            x_symbol.graph = graph->tape;
1170
5
          }
1171
5
          if (x_symbol.d == tensor_variable->symbol.d || 
x_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL0
)
1172
5
            continue;
1173
0
          for (y = 0; !flag && y < input_size; y++)
1174
0
          {
1175
0
            ccv_nnc_tensor_symbol_t y_symbol = ccv_nnc_tensor_symbol_alias_to(graph->tape, (ccv_nnc_tensor_symbol_t){
1176
0
              .d = inputs[y],
1177
0
              .graph = graph->tape
1178
0
            });
1179
0
            if (y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1180
0
            {
1181
0
              y_symbol.d = inputs[y];
1182
0
              y_symbol.graph = graph->tape;
1183
0
            }
1184
0
            if (y_symbol.d == tensor_variable->symbol.d || y_symbol.d == CCV_NNC_NO_TENSOR_SYMBOL)
1185
0
              continue;
1186
0
            flag = (x_symbol.d == y_symbol.d);
1187
0
          }
1188
0
        }
1189
5
        if (!flag)
1190
5
          ccv_nnc_graph_exec_symbol_disjoin(graph->tape, s_symbol, d_symbol);
1191
5
      }
1192
5
    }
1193
6
  }
1194
6
  const int sources_and_is_only_output = (bind->sources && bind->sources->rnum > 0) && 
_ccv_nnc_tensor_variable_is_only_output(graph, bind, tensor_variable->symbol.d)5
;
1195
6
  if (!bind->sources || bind->sources->rnum == 0 || 
sources_and_is_only_output5
)
1196
6
  {
1197
6
    int ws_start = -1;
1198
6
    _ccv_nnc_tensor_bind_trace_forward_to_free(graph, tensor_variable, bind, bind, &ws_start, 1);
1199
    // Because we are detaching from the graph, there is no need to forward trace to see if it is not used and
1200
    // then to remove the source execs. We can remove them right now, breaking the graph in two. That is why
1201
    // we called trace backward to free regardless the outcome of the forward to free.
1202
6
    if (ws_start == -1)
1203
0
    {
1204
0
      if (!graph->ws)
1205
0
        graph->ws = ccv_array_new(sizeof(int), bind->destinations ? bind->destinations->rnum : 0, 0);
1206
0
      ccv_array_t* const ws = graph->ws;
1207
0
      ccv_array_clear(ws);
1208
0
      if (bind->sources)
1209
0
        for (i = 0; i < bind->sources->rnum; i++)
1210
0
          ccv_array_add_unique_int(ws, *(int*)ccv_array_get(bind->sources, i));
1211
0
      ws_start = 0;
1212
0
    }
1213
6
    _ccv_nnc_tensor_bind_trace_backward_to_free(graph, graph->ws, ws_start);
1214
6
  }
1215
  // If now bind has no relevant sources or destinations, we can safely free the underlying tensor symbol.
1216
6
  if ((!bind->sources || bind->sources->rnum == 0) && 
(1
!bind->destinations1
||
bind->destinations->rnum == 01
))
1217
1
  {
1218
1
    _ccv_nnc_tensor_variable_graph_bind_free(graph, bind, 1);
1219
1
    ccv_nnc_tensor_symbol_free(graph->tape, tensor_variable->symbol);
1220
1
    tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
1221
1
    tensor_variable->symbol = NO_TENSOR_SYMBOL;
1222
1
    return;
1223
1
  }
1224
  // Mark both as constant, such that even if it cannot be freed now, it can be freed as soon as possible later.
1225
5
  bind->type = CCV_NNC_TENSOR_CONSTANT;
1226
5
  tensor_variable->type = CCV_NNC_TENSOR_CONSTANT;
1227
5
}
1228
1229
void ccv_nnc_dynamic_graph_has_effect_to_tensor_variables(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t* const source_variables, const int source_variable_size, const ccv_nnc_tensor_variable_t* const destination_variables, const int destination_variable_size, uint64_t* const bitmask)
1230
12
{
1231
12
  int i, j;
1232
12
  ccv_array_t* const sources_destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), source_variable_size + destination_variable_size, 0);
1233
31
  for (i = 0; i < source_variable_size; 
i++19
)
1234
19
  {
1235
19
    if (source_variables[i]->symbol.d < 0)
1236
0
      continue;
1237
19
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d);
1238
19
    if (bind->destinations && 
bind->destinations->rnum > 017
)
1239
42
      
for (j = 0; 17
j < bind->destinations->rnum;
j++25
)
1240
25
      {
1241
        // It is ok to have duplicate symbols.
1242
25
        const int d = *(int*)ccv_array_get(bind->destinations, j);
1243
25
        ccv_nnc_graph_exec_symbol_t symbol = {
1244
25
          .d = d,
1245
25
          .graph = graph->tape
1246
25
        };
1247
25
        ccv_array_push(sources_destinations, &symbol);
1248
25
      }
1249
19
  }
1250
12
  const int source_size = sources_destinations->rnum;
1251
24
  for (i = 0; i < destination_variable_size; 
i++12
)
1252
12
  {
1253
12
    if (destination_variables[i]->symbol.d < 0)
1254
0
      continue;
1255
12
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, destination_variables[i]->symbol.d);
1256
12
    if (bind->sources && bind->sources->rnum > 0)
1257
20
      
for (j = 0; 10
j < bind->sources->rnum;
j++10
)
1258
10
      {
1259
        // It is ok to have duplicate symbols.
1260
10
        const int d = *(int*)ccv_array_get(bind->sources, j);
1261
10
        ccv_nnc_graph_exec_symbol_t symbol = {
1262
10
          .d = d,
1263
10
          .graph = graph->tape
1264
10
        };
1265
10
        ccv_array_push(sources_destinations, &symbol);
1266
10
      }
1267
12
  }
1268
12
  const int destination_size = sources_destinations->rnum - source_size;
1269
12
  if (source_size == 0 || destination_size == 0)
1270
2
  {
1271
2
    ccv_array_free(sources_destinations);
1272
2
    return;
1273
2
  }
1274
10
  const int bitmask_size = ((source_size + 63) >> 6);
1275
10
  assert(bitmask_size < 256);
1276
10
  uint64_t exec_bitmask[bitmask_size];
1277
10
  ccv_nnc_symbolic_graph_sources_to_destinations(graph->tape, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, 0), source_size, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(sources_destinations, source_size), destination_size, exec_bitmask);
1278
10
  int k = 0;
1279
27
  for (i = 0; i < source_variable_size; 
i++17
)
1280
17
  {
1281
17
    if (source_variables[i]->symbol.d < 0)
1282
0
    {
1283
0
      bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63));
1284
0
      continue;
1285
0
    }
1286
17
    ccv_nnc_tensor_variable_graph_bind_t* const bind = (ccv_nnc_tensor_variable_graph_bind_t*)ccv_array_get(graph->binds, source_variables[i]->symbol.d);
1287
17
    int flag = 0;
1288
17
    if (bind->destinations && 
bind->destinations->rnum > 015
)
1289
15
    {
1290
15
      assert(k <= source_size - bind->destinations->rnum);
1291
32
      
for (j = 0; 15
!flag &&
j < bind->destinations->rnum20
;
j++17
)
1292
17
        flag = (((uint64_t)1 << ((k + j) & 63)) & exec_bitmask[(k + j) >> 6]);
1293
15
      k += bind->destinations->rnum;
1294
15
    }
1295
17
    if (flag)
1296
12
      bitmask[i >> 6] |= ((uint64_t)1 << (i & 63));
1297
5
    else
1298
5
      bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63));
1299
17
  }
1300
10
  ccv_array_free(sources_destinations);
1301
10
}
1302
1303
int ccv_nnc_dynamic_graph_bookkeeping_count(const ccv_nnc_dynamic_graph_t* const graph, const int type)
1304
451
{
1305
451
  return ccv_nnc_symbolic_graph_active_symbol_count(graph->tape, type);
1306
451
}
1307
1308
void ccv_nnc_dynamic_graph_dot(const ccv_nnc_dynamic_graph_t* const graph, const int flags, FILE* out)
1309
416
{
1310
416
  ccv_nnc_symbolic_graph_dot(graph->tape, flags, out);
1311
416
}