Coverage Report

Created: 2025-05-09 19:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_symbolic_graph.c
Line
Count
Source
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_internal.h"
5
#include "_ccv_nnc_symbolic_graph.h"
6
7
// MARK - Level-3 API
8
9
const ccv_nnc_tensor_param_t ccv_nnc_tensor_auto = {};
10
11
int ccv_nnc_is_tensor_auto(const ccv_nnc_tensor_param_t params)
12
715k
{
13
715k
  return (memcmp(&params, &ccv_nnc_tensor_auto, sizeof(ccv_nnc_tensor_param_t)) == 0);
14
715k
}
15
16
ccv_nnc_symbolic_graph_t* ccv_nnc_symbolic_graph_new(void)
17
2.64k
{
18
2.64k
  ccv_nnc_symbolic_graph_t* graph = cccalloc(1, sizeof(ccv_nnc_symbolic_graph_t));
19
2.64k
  graph->tensor_symbol_info = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_info_t), 5, 0);
20
2.64k
  graph->exec_symbol_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_info_t), 5, 0);
21
2.64k
  graph->reuse.exec = -1;
22
2.64k
  graph->reuse.tensor = -1;
23
2.64k
  return graph;
24
2.64k
}
25
26
ccv_nnc_symbolic_graph_t* ccv_nnc_symbolic_graph_dup(const ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_subst_f subst)
27
13
{
28
13
  ccv_nnc_symbolic_graph_t* new_graph = ccmalloc(sizeof(ccv_nnc_symbolic_graph_t));
29
13
  memcpy(new_graph, graph, sizeof(ccv_nnc_symbolic_graph_t));
30
13
  new_graph->tensor_symbol_info = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_info_t), graph->tensor_symbol_info->rnum, 0);
31
13
  new_graph->tensor_symbol_info->rnum = graph->tensor_symbol_info->rnum;
32
13
  memcpy(ccv_array_get(new_graph->tensor_symbol_info, 0), ccv_array_get(graph->tensor_symbol_info, 0), sizeof(ccv_nnc_tensor_symbol_info_t) * graph->tensor_symbol_info->rnum);
33
13
  int i;
34
91
  for (i = 0; i < new_graph->tensor_symbol_info->rnum; 
i++78
)
35
78
  {
36
78
    ccv_nnc_tensor_symbol_info_t* symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(new_graph->tensor_symbol_info, i);
37
78
    if (symbol_info->name)
38
70
    {
39
70
      char* const name = symbol_info->name;
40
70
      const size_t len = strnlen(name, 63);
41
70
      const size_t n = len + 1;
42
70
      symbol_info->name = (char*)ccmalloc(n);
43
      // Don't use strndup because this way I can have custom allocator (for ccmalloc).
44
70
      memcpy(symbol_info->name, name, n);
45
70
      symbol_info->name[len] = 0;
46
70
    }
47
78
    if (symbol_info->s_ref)
48
6
    {
49
6
      ccv_array_t* const s_ref = symbol_info->s_ref;
50
6
      symbol_info->s_ref = ccv_array_new(sizeof(int), s_ref->rnum, 0);
51
6
      symbol_info->s_ref->rnum = s_ref->rnum;
52
6
      memcpy(ccv_array_get(symbol_info->s_ref, 0), ccv_array_get(s_ref, 0), sizeof(int) * s_ref->rnum);
53
6
    }
54
78
  }
55
13
  new_graph->exec_symbol_info = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_info_t), graph->exec_symbol_info->rnum, 0);
56
13
  new_graph->exec_symbol_info->rnum = graph->exec_symbol_info->rnum;
57
13
  memcpy(ccv_array_get(new_graph->exec_symbol_info, 0), ccv_array_get(graph->exec_symbol_info, 0), sizeof(ccv_nnc_graph_exec_symbol_info_t) * graph->exec_symbol_info->rnum);
58
48
  for (i = 0; i < new_graph->exec_symbol_info->rnum; 
i++35
)
59
35
  {
60
35
    ccv_nnc_graph_exec_symbol_info_t* symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(new_graph->exec_symbol_info, i);
61
35
    if (symbol_info->name)
62
25
    {
63
25
      char* const name = symbol_info->name;
64
25
      const size_t len = strnlen(name, 63);
65
25
      const size_t n = len + 1;
66
25
      symbol_info->name = (char*)ccmalloc(n);
67
      // Don't use strndup because this way I can have custom allocator (for ccmalloc).
68
25
      memcpy(symbol_info->name, name, n);
69
25
      symbol_info->name[len] = 0;
70
25
    }
71
35
    if (symbol_info->outgoings)
72
20
    {
73
20
      ccv_array_t* const outgoings = symbol_info->outgoings;
74
20
      symbol_info->outgoings = ccv_array_new(sizeof(int), outgoings->rnum, 0);
75
20
      symbol_info->outgoings->rnum = outgoings->rnum;
76
20
      memcpy(ccv_array_get(symbol_info->outgoings, 0), ccv_array_get(outgoings, 0), sizeof(int) * outgoings->rnum);
77
20
    }
78
35
    if (symbol_info->inputs)
79
22
    {
80
22
      int* const inputs = symbol_info->inputs;
81
22
      symbol_info->inputs = (int*)ccmalloc(sizeof(int) * (symbol_info->input_size + symbol_info->output_size));
82
22
      symbol_info->outputs = symbol_info->inputs + symbol_info->input_size;
83
22
      memcpy(symbol_info->inputs, inputs, sizeof(int) * (symbol_info->input_size + symbol_info->output_size));
84
22
    }
85
35
    if (symbol_info->_heap_graph_ref)
86
2
    {
87
2
      int* const heap_graph_ref = symbol_info->_heap_graph_ref;
88
2
      symbol_info->_heap_graph_ref = (int*)ccmalloc(sizeof(int) * symbol_info->graph_ref_size);
89
2
      memcpy(symbol_info->_heap_graph_ref, heap_graph_ref, sizeof(int) * symbol_info->graph_ref_size);
90
2
    }
91
35
    if ((symbol_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE) && 
symbol_info->input_size > 01
)
92
1
    {
93
1
      int* const inputs = symbol_info->p_while.inputs;
94
1
      symbol_info->p_while.inputs = (int*)ccmalloc(sizeof(int) * symbol_info->p_while.input_size);
95
1
      memcpy(symbol_info->p_while.inputs, inputs, sizeof(int) * symbol_info->p_while.input_size);
96
1
    }
97
35
  }
98
13
  if (graph->sources)
99
13
  {
100
13
    new_graph->sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), graph->sources->rnum, 0);
101
13
    new_graph->sources->rnum = graph->sources->rnum;
102
13
    memcpy(ccv_array_get(new_graph->sources, 0), ccv_array_get(graph->sources, 0), sizeof(ccv_nnc_graph_exec_symbol_t) * graph->sources->rnum);
103
26
    for (i = 0; i < new_graph->sources->rnum; 
i++13
)
104
13
      ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(new_graph->sources, i))->graph = new_graph;
105
13
  }
106
13
  if (graph->destinations)
107
13
  {
108
13
    new_graph->destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), graph->destinations->rnum, 0);
109
13
    new_graph->destinations->rnum = graph->destinations->rnum;
110
13
    memcpy(ccv_array_get(new_graph->destinations, 0), ccv_array_get(graph->destinations, 0), sizeof(ccv_nnc_graph_exec_symbol_t) * graph->destinations->rnum);
111
26
    for (i = 0; i < new_graph->destinations->rnum; 
i++13
)
112
13
      ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(new_graph->destinations, i))->graph = new_graph;
113
13
  }
114
13
  if (graph->breakpoints)
115
13
  {
116
13
    new_graph->breakpoints = (ccv_nnc_graph_exec_symbol_t*)ccmalloc(sizeof(ccv_nnc_graph_exec_symbol_t) * graph->breakpoint_size);
117
13
    memcpy(new_graph->breakpoints, graph->breakpoints, sizeof(ccv_nnc_graph_exec_symbol_t) * graph->breakpoint_size);
118
26
    for (i = 0; i < graph->breakpoint_size; 
i++13
)
119
13
      new_graph->breakpoints[i].graph = new_graph;
120
13
  }
121
13
  if (graph->backward.tensor_symbol_idx)
122
1
  {
123
1
    new_graph->backward.tensor_symbol_idx = (int*)ccmalloc(sizeof(int) * (new_graph->backward.tensor_symbol_size + new_graph->backward.exec_symbol_size));
124
1
    if (new_graph->backward.tensor_symbol_size > 0)
125
1
      memcpy(new_graph->backward.tensor_symbol_idx, graph->backward.tensor_symbol_idx, sizeof(int) * new_graph->backward.tensor_symbol_size);
126
1
    new_graph->backward.exec_symbol_idx = new_graph->backward.tensor_symbol_idx + new_graph->backward.tensor_symbol_size;
127
1
    if (new_graph->backward.exec_symbol_size > 0)
128
1
      memcpy(new_graph->backward.exec_symbol_idx, graph->backward.exec_symbol_idx, sizeof(int) * new_graph->backward.exec_symbol_size);
129
1
  }
130
13
  if (subst)
131
13
  {
132
48
    for (i = 0; i < new_graph->exec_symbol_info->rnum; 
i++35
)
133
35
    {
134
35
      ccv_nnc_graph_exec_symbol_info_t* const symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(new_graph->exec_symbol_info, i);
135
35
      if (!CCV_NNC_GRAPH_EXEC_IS_DEAD(symbol_info->flags))
136
33
      {
137
33
        symbol_info->cmd = subst((ccv_nnc_graph_exec_symbol_t){
138
33
          .d = i,
139
33
          .graph = graph,
140
33
        }, symbol_info->cmd);
141
33
        if (symbol_info->cmd.cmd != CCV_NNC_GRAPH_FORWARD && symbol_info->cmd.cmd != CCV_NNC_GRAPH_BACKWARD)
142
33
        {
143
33
          symbol_info->graph_ref_size = 0;
144
33
          if (symbol_info->_heap_graph_ref)
145
2
          {
146
2
            ccfree(symbol_info->_heap_graph_ref);
147
2
            symbol_info->_heap_graph_ref = 0;
148
2
          }
149
33
        }
150
33
      }
151
35
    }
152
13
  }
153
  // TODO: See how and if I need to dup sub-graphs. I also need to figure out what's the relationship between this graph
154
  // and its parent graph (or how can we use the symbol from the graph properly).
155
13
  new_graph->sub_graphs = 0;
156
13
  return new_graph;
157
13
}
158
159
ccv_nnc_tensor_symbol_t ccv_nnc_tensor_symbol_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_param_t info, const char* const name)
160
100k
{
161
100k
  ccv_nnc_tensor_symbol_t symbol = {
162
100k
    .d = graph->tensor_symbol_info->rnum,
163
100k
    .graph = graph
164
100k
  };
165
100k
  ccv_nnc_tensor_symbol_info_t symbol_info = {
166
100k
    .info = info,
167
100k
  };
168
100k
  if (name)
169
4.79k
  {
170
4.79k
    const size_t len = strnlen(name, 63);
171
4.79k
    const size_t n = len + 1;
172
4.79k
    symbol_info.name = (char*)ccmalloc(n);
173
    // Don't use strndup because this way I can have custom allocator (for ccmalloc).
174
4.79k
    memcpy(symbol_info.name, name, n);
175
4.79k
    symbol_info.name[len] = 0;
176
4.79k
  }
177
100k
  if (graph->reuse.tensor >= 0)
178
16.2k
  {
179
16.2k
    const int reuse_tensor_d = graph->reuse.tensor;
180
16.2k
    assert(reuse_tensor_d < graph->tensor_symbol_info->rnum);
181
16.2k
    *(ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, reuse_tensor_d) = symbol_info;
182
16.2k
    int i;
183
16.2k
    graph->reuse.tensor = -1;
184
29.9k
    for (i = reuse_tensor_d + 1; i < graph->tensor_symbol_info->rnum && 
graph->reuse.tensor < 022.5k
;
i++13.6k
)
185
13.6k
      if (CCV_NNC_TENSOR_SYMBOL_IS_DEAD(((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, i))->flags))
186
11.4k
        graph->reuse.tensor = i;
187
16.2k
    symbol.d = reuse_tensor_d;
188
16.2k
  } else
189
84.5k
    ccv_array_push(graph->tensor_symbol_info, &symbol_info);
190
100k
  if (graph->hooks.tensor_symbol_new.func)
191
47.0k
    graph->hooks.tensor_symbol_new.func(graph->hooks.tensor_symbol_new.context, symbol, info, name);
192
100k
  return symbol;
193
100k
}
194
195
void* ccv_nnc_tensor_symbol_new_hook(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_tensor_symbol_new_hook_f hook, void* context, ccv_nnc_tensor_symbol_new_hook_f* previous_hook)
196
11.7k
{
197
11.7k
  if (previous_hook)
198
4
    *previous_hook = graph->hooks.tensor_symbol_new.func;
199
11.7k
  void* const prev = graph->hooks.tensor_symbol_new.context;
200
11.7k
  graph->hooks.tensor_symbol_new.func = hook;
201
11.7k
  graph->hooks.tensor_symbol_new.context = context;
202
11.7k
  return prev;
203
11.7k
}
204
205
ccv_nnc_tensor_symbol_t ccv_nnc_tensor_symbol_alias_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info, const char* const name)
206
4.77k
{
207
4.77k
  assert(tensor_symbol.graph == graph);
208
4.77k
  int d = tensor_symbol.d;
209
4.77k
  assert(d >= 0 && d < graph->tensor_symbol_info->rnum);
210
4.77k
  ccv_nnc_tensor_symbol_info_t* info_d = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, d);
211
  // Find the root tensor that is not an alias.
212
4.77k
  while (info_d->alias_ref)
213
1
  {
214
1
    d = info_d->alias_ref - 1;
215
1
    assert(d >= 0 && d < graph->tensor_symbol_info->rnum);
216
1
    info_d = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, d);
217
1
  }
218
4.77k
  ccv_nnc_tensor_symbol_t alias = {
219
4.77k
    .d = graph->tensor_symbol_info->rnum,
220
4.77k
    .graph = graph
221
4.77k
  };
222
  // Alias comes in two shapes: the total tensor count is strictly smaller or equal to.
223
  // If it is not auto, check dimensions.
224
4.77k
  if (!ccv_nnc_is_tensor_auto(info_d->info))
225
4.77k
    { assert((size_t)stride[0] * info.dim[0] <= ccv_nnc_tensor_count(info_d->info)); }
226
4.77k
  ccv_nnc_tensor_symbol_info_t alias_info = {
227
4.77k
    .alias_ref = d + 1,
228
4.77k
    .info = info,
229
4.77k
  };
230
4.77k
  if (name)
231
77
  {
232
77
    const size_t len = strnlen(name, 63);
233
77
    const size_t n = len + 1;
234
77
    alias_info.name = (char*)ccmalloc(n);
235
    // Don't use strndup because this way I can have custom allocator (for ccmalloc).
236
77
    memcpy(alias_info.name, name, n);
237
77
    alias_info.name[len] = 0;
238
77
  }
239
4.77k
  memcpy(alias_info.ofs, ofs, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
240
4.77k
  memcpy(alias_info.stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
241
4.77k
  if (graph->reuse.tensor >= 0)
242
6
  {
243
6
    const int reuse_tensor_d = graph->reuse.tensor;
244
6
    *(ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, reuse_tensor_d) = alias_info;
245
6
    int i;
246
6
    graph->reuse.tensor = -1;
247
12
    for (i = reuse_tensor_d + 1; i < graph->tensor_symbol_info->rnum && graph->reuse.tensor < 0; 
i++6
)
248
6
      if (CCV_NNC_TENSOR_SYMBOL_IS_DEAD(((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, i))->flags))
249
6
        graph->reuse.tensor = i;
250
6
    alias.d = reuse_tensor_d;
251
6
  } else
252
4.77k
    ccv_array_push(graph->tensor_symbol_info, &alias_info);
253
4.77k
  if (graph->hooks.tensor_symbol_alias_new.func)
254
1.49k
    graph->hooks.tensor_symbol_alias_new.func(graph->hooks.tensor_symbol_alias_new.context, alias, tensor_symbol, ofs, stride, info, name);
255
4.77k
  return alias;
256
4.77k
}
257
258
void* ccv_nnc_tensor_symbol_alias_new_hook(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_tensor_symbol_alias_new_hook_f hook, void* context, ccv_nnc_tensor_symbol_alias_new_hook_f* previous_hook)
259
11.7k
{
260
11.7k
  if (previous_hook)
261
4
    *previous_hook = graph->hooks.tensor_symbol_alias_new.func;
262
11.7k
  void* const prev = graph->hooks.tensor_symbol_alias_new.context;
263
11.7k
  graph->hooks.tensor_symbol_alias_new.func = hook;
264
11.7k
  graph->hooks.tensor_symbol_alias_new.context = context;
265
11.7k
  return prev;
266
11.7k
}
267
268
ccv_nnc_tensor_symbol_t ccv_nnc_tensor_symbol_alias_to(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol)
269
103k
{
270
103k
  assert(tensor_symbol.graph == graph);
271
103k
  int d = tensor_symbol.d;
272
103k
  assert(d >= 0 && d < graph->tensor_symbol_info->rnum);
273
103k
  ccv_nnc_tensor_symbol_info_t* info_d = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, d);
274
  // Find the root tensor that is not an alias.
275
105k
  while (info_d->alias_ref)
276
2.05k
  {
277
2.05k
    d = info_d->alias_ref - 1;
278
2.05k
    assert(d >= 0 && d < graph->tensor_symbol_info->rnum);
279
2.05k
    info_d = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, d);
280
2.05k
  }
281
103k
  if (d != tensor_symbol.d)
282
2.05k
    return (ccv_nnc_tensor_symbol_t){
283
2.05k
      .d = d,
284
2.05k
      .graph = graph
285
2.05k
    };
286
101k
  return (ccv_nnc_tensor_symbol_t){
287
101k
    .d = CCV_NNC_NO_TENSOR_SYMBOL,
288
101k
    .graph = 0
289
101k
  };
290
103k
}
291
292
// Resolve this tensor symbol to the current graph. If cannot find, return no symbol.
293
ccv_nnc_tensor_symbol_t ccv_nnc_tensor_symbol_resolve(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol)
294
190k
{
295
190k
  if (graph == tensor_symbol.graph)
296
190k
    return tensor_symbol;
297
38
  ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(tensor_symbol.graph->tensor_symbol_info, tensor_symbol.d);
298
38
  assert(!symbol_info->alias_ref);
299
  // Find if the symbol is in the sub-graph.
300
38
  const ccv_nnc_symbolic_graph_t* curr_graph = tensor_symbol.graph;
301
38
  assert(tensor_symbol.d >= 0 && tensor_symbol.d < curr_graph->tensor_symbol_info->rnum);
302
78
  
while (38
curr_graph &&
curr_graph != graph57
)
303
40
    curr_graph = curr_graph->p;
304
38
  if (curr_graph)
305
17
  {
306
    // The graph is a parent of the symbol passed in.
307
17
    curr_graph = tensor_symbol.graph;
308
17
    ccv_nnc_tensor_symbol_info_t* curr_symbol_info = symbol_info;
309
17
    ccv_nnc_tensor_symbol_t curr_symbol = tensor_symbol;
310
22
    while (curr_graph != graph)
311
17
    {
312
17
      ccv_nnc_symbolic_graph_t* const p = curr_graph->p;
313
      // Cannot find the relevant one in the parent graph, return no symbol.
314
17
      if (!curr_symbol_info->p_ref)
315
12
        return (ccv_nnc_tensor_symbol_t){
316
12
          .d = CCV_NNC_NO_TENSOR_SYMBOL,
317
12
          .graph = graph,
318
12
        };
319
5
      curr_symbol.d = curr_symbol_info->p_ref - 1;
320
5
      curr_symbol.graph = p;
321
5
      assert(curr_symbol.d >= 0 && curr_symbol.d < p->tensor_symbol_info->rnum);
322
5
      curr_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(p->tensor_symbol_info, curr_symbol.d);
323
      // Move on.
324
5
      curr_graph = p;
325
5
    }
326
5
    return curr_symbol;
327
17
  }
328
  // Otherwise, if the symbol is in the parent graph, this is a bit more expensive because I need to keep a trace stack.
329
21
  curr_graph = graph;
330
21
  int d;
331
42
  for (d = 0; curr_graph && curr_graph != tensor_symbol.graph; 
d++21
)
332
21
    curr_graph = curr_graph->p;
333
21
  curr_graph = graph;
334
21
  assert(d > 0);
335
21
  int trace[d];
336
42
  for (d = 0; curr_graph && curr_graph != tensor_symbol.graph; 
d++21
)
337
21
  {
338
21
    const int p_idx = curr_graph->p_idx - 1;
339
21
    trace[d] = p_idx;
340
21
    curr_graph = curr_graph->p;
341
21
  }
342
  // If it is not in both the parent graph and the sub-graph, the input is invalid.
343
21
  assert(curr_graph);
344
21
  curr_graph = tensor_symbol.graph;
345
21
  ccv_nnc_tensor_symbol_info_t* curr_symbol_info = symbol_info;
346
21
  ccv_nnc_tensor_symbol_t curr_symbol = tensor_symbol;
347
  // The graph is a sub graph of the symbol passed in.
348
21
  int i;
349
42
  for (i = d - 1; i >= 0; 
i--21
)
350
21
  {
351
21
    const int p_idx = trace[i];
352
21
    assert(p_idx >= 0);
353
    // Cannot find the relevant one in the sub-graph, return no symbol.
354
21
    if (!curr_graph->sub_graphs || !curr_symbol_info->s_ref ||
355
21
      curr_symbol_info->s_ref->rnum != curr_graph->sub_graphs->rnum)
356
0
        return (ccv_nnc_tensor_symbol_t){
357
0
          .d = CCV_NNC_NO_TENSOR_SYMBOL,
358
0
          .graph = graph,
359
0
        };
360
21
    assert(p_idx >= 0 && p_idx < curr_symbol_info->s_ref->rnum);
361
21
    const int s_idx = *(int*)ccv_array_get(curr_symbol_info->s_ref, p_idx);
362
21
    ccv_nnc_symbolic_graph_t* const s = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(curr_graph->sub_graphs, p_idx);
363
21
    curr_symbol.d = s_idx - 1;
364
21
    curr_symbol.graph = s;
365
21
    assert(curr_symbol.d >= 0 && curr_symbol.d < s->tensor_symbol_info->rnum);
366
21
    curr_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(s->tensor_symbol_info, curr_symbol.d);
367
    // Move on.
368
21
    curr_graph = s;
369
21
  }
370
21
  return curr_symbol;
371
21
}
372
373
// This method generate tensor symbols and their links along the way when traverse the graph.
374
enum {
375
  MAP_TENSOR_USE_AS_INPUT,
376
  MAP_TENSOR_USE_AS_OUTPUT,
377
};
378
379
static void _ccv_nnc_graph_exec_add_input_if_needed(ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const int d)
380
52
{
381
52
  int i;
382
83
  for (i = 0; i < exec_symbol_info->input_size; 
i++31
)
383
53
    if (exec_symbol_info->inputs[i] == d)
384
22
      return; // No need to continue, this symbol already exists as input.
385
  // Expand the array.
386
30
  if (!exec_symbol_info->input_size && 
!exec_symbol_info->output_size16
)
387
16
  {
388
16
    exec_symbol_info->inputs = (int*)ccmalloc(sizeof(int));
389
16
    exec_symbol_info->inputs[0] = d;
390
16
    exec_symbol_info->input_size = 1;
391
16
    exec_symbol_info->outputs = exec_symbol_info->inputs + 1;
392
16
    return;
393
16
  }
394
14
  exec_symbol_info->inputs = (int*)ccrealloc(exec_symbol_info->inputs, sizeof(int) * (exec_symbol_info->input_size + 1 + exec_symbol_info->output_size));
395
14
  exec_symbol_info->outputs = exec_symbol_info->inputs + exec_symbol_info->input_size;
396
14
  if (exec_symbol_info->output_size)
397
6
    memmove(exec_symbol_info->outputs + 1, exec_symbol_info->outputs, sizeof(int) * exec_symbol_info->output_size); 
398
14
  exec_symbol_info->inputs[exec_symbol_info->input_size] = d;
399
14
  ++exec_symbol_info->input_size;
400
14
  exec_symbol_info->outputs = exec_symbol_info->inputs + exec_symbol_info->input_size;
401
14
}
402
403
static void _ccv_nnc_graph_exec_add_output_if_needed(ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const int d)
404
52
{
405
52
  int i;
406
69
  for (i = 0; i < exec_symbol_info->output_size; 
i++17
)
407
45
    if (exec_symbol_info->outputs[i] == d)
408
28
      return; // No need to continue, this symbol already exists as output.
409
  // Expand the array.
410
24
  if (!exec_symbol_info->input_size && 
!exec_symbol_info->output_size7
)
411
3
  {
412
3
    exec_symbol_info->inputs = (int*)ccmalloc(sizeof(int));
413
3
    exec_symbol_info->outputs = exec_symbol_info->inputs;
414
3
    exec_symbol_info->outputs[0] = d;
415
3
    exec_symbol_info->output_size = 1;
416
3
    return;
417
3
  }
418
21
  exec_symbol_info->inputs = (int*)ccrealloc(exec_symbol_info->inputs, sizeof(int) * (exec_symbol_info->input_size + exec_symbol_info->output_size + 1));
419
21
  exec_symbol_info->outputs = exec_symbol_info->inputs + exec_symbol_info->input_size;
420
21
  exec_symbol_info->outputs[exec_symbol_info->output_size] = d;
421
21
  ++exec_symbol_info->output_size;
422
21
}
423
424
void ccv_nnc_tensor_symbol_pair_with(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol, const ccv_nnc_tensor_symbol_t pair_tensor_symbol)
425
11
{
426
11
  assert(tensor_symbol.graph == graph);
427
11
  assert(tensor_symbol.d >= 0);
428
11
  assert(tensor_symbol.d < graph->tensor_symbol_info->rnum);
429
11
  assert(pair_tensor_symbol.graph == graph->pair);
430
11
  assert(pair_tensor_symbol.d >= 0);
431
11
  assert(pair_tensor_symbol.d < graph->pair->tensor_symbol_info->rnum);
432
11
  ccv_nnc_tensor_symbol_info_t* const tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, tensor_symbol.d);
433
11
  tensor_info->pair_ref = pair_tensor_symbol.d + 1;
434
11
}
435
436
static int _ccv_nnc_symbolic_graph_map_tensor_symbol_no_alias(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol, const int map_use)
437
68
{
438
68
  assert(graph && symbol.graph);
439
68
  assert(symbol.graph != graph);
440
68
  ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(symbol.graph->tensor_symbol_info, symbol.d);
441
68
  assert(!symbol_info->alias_ref);
442
  // Find if the symbol is in the sub-graph.
443
68
  const ccv_nnc_symbolic_graph_t* curr_graph = symbol.graph;
444
68
  assert(symbol.d >= 0 && symbol.d < curr_graph->tensor_symbol_info->rnum);
445
139
  
while (68
curr_graph &&
curr_graph != graph90
)
446
71
    curr_graph = curr_graph->p;
447
68
  if (curr_graph)
448
19
  {
449
    // The graph is a parent of the symbol passed in. For this case, if we are connecting this symbol to an exec as input,
450
    // that means it must be an output in these sub-graphs. Otherwise, if we are connecting this symbol to an exec as output,
451
    // it must be an input in these sub-graphs.
452
19
    curr_graph = symbol.graph;
453
19
    ccv_nnc_tensor_symbol_info_t* curr_symbol_info = symbol_info;
454
19
    ccv_nnc_tensor_symbol_t curr_symbol = symbol;
455
38
    while (curr_graph != graph)
456
19
    {
457
19
      ccv_nnc_symbolic_graph_t* const p = curr_graph->p;
458
      // I need to find the symbol whether it exists or not before creating new one.
459
19
      ccv_nnc_tensor_symbol_t new_symbol;
460
19
      ccv_nnc_tensor_symbol_info_t* new_symbol_info;
461
19
      if (!curr_symbol_info->p_ref)
462
18
      {
463
18
        new_symbol = ccv_nnc_tensor_symbol_new(p, curr_symbol_info->info, curr_symbol_info->name);
464
18
        new_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(p->tensor_symbol_info, new_symbol.d);
465
18
        curr_symbol_info->p_ref = new_symbol.d + 1;
466
18
        new_symbol_info->s_ref = ccv_array_new(sizeof(int), p->sub_graphs->rnum, 0);
467
18
        new_symbol_info->s_ref->rnum = p->sub_graphs->rnum;
468
18
        ccv_array_zero(new_symbol_info->s_ref);
469
18
        *(int*)ccv_array_get(new_symbol_info->s_ref, curr_graph->p_idx - 1) = curr_symbol.d + 1;
470
18
      } else {
471
1
        new_symbol.d = curr_symbol_info->p_ref - 1;
472
1
        new_symbol.graph = p;
473
1
        assert(new_symbol.d >= 0 && new_symbol.d < p->tensor_symbol_info->rnum);
474
1
        new_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(p->tensor_symbol_info, new_symbol.d);
475
1
      }
476
19
      if (curr_graph->exec_idx)
477
19
      {
478
        // This is a sub-graph.
479
19
        assert(p);
480
19
        assert(curr_graph->exec_idx > 0 && curr_graph->exec_idx <= p->exec_symbol_info->rnum);
481
19
        ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(p->exec_symbol_info, curr_graph->exec_idx - 1);
482
19
        switch (map_use)
483
19
        {
484
19
          case MAP_TENSOR_USE_AS_INPUT:
485
19
            _ccv_nnc_graph_exec_add_output_if_needed(exec_symbol_info, new_symbol.d);
486
19
            break;
487
0
          case MAP_TENSOR_USE_AS_OUTPUT:
488
0
            _ccv_nnc_graph_exec_add_input_if_needed(exec_symbol_info, new_symbol.d);
489
0
            break;
490
19
        }
491
19
      }
492
      // Move on.
493
19
      curr_symbol = new_symbol;
494
19
      curr_symbol_info = new_symbol_info;
495
19
      curr_graph = p;
496
19
    }
497
19
    return curr_symbol.d;
498
19
  }
499
  // Otherwise, if the symbol is in the parent graph, this is a bit more expensive because I need to keep a trace stack.
500
49
  curr_graph = graph;
501
49
  int d;
502
99
  for (d = 0; curr_graph && curr_graph != symbol.graph; 
d++50
)
503
50
    curr_graph = curr_graph->p;
504
49
  curr_graph = graph;
505
49
  assert(d > 0);
506
49
  int trace[d];
507
99
  for (d = 0; curr_graph && curr_graph != symbol.graph; 
d++50
)
508
50
  {
509
50
    const int p_idx = curr_graph->p_idx - 1;
510
50
    trace[d] = p_idx;
511
50
    curr_graph = curr_graph->p;
512
50
  }
513
  // If it is not in both the parent graph and the sub-graph, the input is invalid.
514
49
  assert(curr_graph);
515
49
  curr_graph = symbol.graph;
516
49
  ccv_nnc_tensor_symbol_info_t* curr_symbol_info = symbol_info;
517
49
  ccv_nnc_tensor_symbol_t curr_symbol = symbol;
518
  // The graph is a sub graph of the symbol passed in. For this case, if we are connecting this symbol to an exec as input,
519
  // that means it must be an input in these parent graphs. Otherwise, if we are connecting this symbol to an exec as output,
520
  // it must be an output in these parent graphs.
521
49
  int i;
522
99
  for (i = d - 1; i >= 0; 
i--50
)
523
50
  {
524
50
    const int p_idx = trace[i];
525
50
    assert(p_idx >= 0);
526
50
    assert(curr_graph->sub_graphs);
527
50
    if (!curr_symbol_info->s_ref)
528
36
    {
529
36
      curr_symbol_info->s_ref = ccv_array_new(sizeof(int), curr_graph->sub_graphs->rnum, 0);
530
36
      curr_symbol_info->s_ref->rnum = curr_graph->sub_graphs->rnum;
531
36
      ccv_array_zero(curr_symbol_info->s_ref);
532
36
    } else 
if (14
curr_symbol_info->s_ref->rnum != curr_graph->sub_graphs->rnum14
)
533
8
      ccv_array_resize(curr_symbol_info->s_ref, curr_graph->sub_graphs->rnum);
534
50
    assert(p_idx >= 0 && p_idx < curr_symbol_info->s_ref->rnum);
535
50
    const int s_idx = *(int*)ccv_array_get(curr_symbol_info->s_ref, p_idx);
536
50
    ccv_nnc_symbolic_graph_t* const s = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(curr_graph->sub_graphs, p_idx);
537
50
    ccv_nnc_tensor_symbol_t new_symbol;
538
50
    ccv_nnc_tensor_symbol_info_t* new_symbol_info;
539
    // I need to find the symbol whether it exists or not before creating new one.
540
50
    if (!s_idx)
541
44
    {
542
44
      new_symbol = ccv_nnc_tensor_symbol_new(s, symbol_info->info, symbol_info->name);
543
44
      new_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(s->tensor_symbol_info, new_symbol.d);
544
44
      new_symbol_info->p_ref = curr_symbol.d + 1;
545
44
      *(int*)ccv_array_get(curr_symbol_info->s_ref, p_idx) = new_symbol.d + 1;
546
44
    } else {
547
6
      new_symbol.d = s_idx - 1;
548
6
      new_symbol.graph = s;
549
6
      assert(new_symbol.d >= 0 && new_symbol.d < s->tensor_symbol_info->rnum);
550
6
      new_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(s->tensor_symbol_info, new_symbol.d);
551
6
    }
552
50
    if (s->exec_idx)
553
50
    {
554
50
      assert(s->p); // This is a sub-graph.
555
50
      assert(s->exec_idx > 0 && s->exec_idx <= curr_graph->exec_symbol_info->rnum);
556
50
      ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(curr_graph->exec_symbol_info, s->exec_idx - 1);
557
50
      switch (map_use)
558
50
      {
559
45
        case MAP_TENSOR_USE_AS_INPUT:
560
45
          _ccv_nnc_graph_exec_add_input_if_needed(exec_symbol_info, curr_symbol.d);
561
45
          break;
562
5
        case MAP_TENSOR_USE_AS_OUTPUT:
563
5
          _ccv_nnc_graph_exec_add_output_if_needed(exec_symbol_info, curr_symbol.d);
564
5
          break;
565
50
      }
566
50
    }
567
    // Move on.
568
50
    curr_symbol = new_symbol;
569
50
    curr_symbol_info = new_symbol_info;
570
50
    curr_graph = s;
571
50
  }
572
49
  return curr_symbol.d;
573
49
}
574
575
static int _ccv_nnc_symbolic_graph_map_tensor_symbol(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol, const int map_use)
576
68
{
577
68
  assert(graph && symbol.graph);
578
68
  assert(symbol.graph != graph);
579
68
  ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(symbol.graph->tensor_symbol_info, symbol.d);
580
68
  if (!symbol_info->alias_ref)
581
62
    return _ccv_nnc_symbolic_graph_map_tensor_symbol_no_alias(graph, symbol, map_use);
582
6
  const int d = symbol_info->alias_ref - 1;
583
6
  assert(d >= 0 && d < symbol.graph->tensor_symbol_info->rnum);
584
6
  const int map_d = _ccv_nnc_symbolic_graph_map_tensor_symbol_no_alias(graph, (ccv_nnc_tensor_symbol_t){
585
6
    .graph = symbol.graph,
586
6
    .d = d
587
6
  }, map_use);
588
6
  const ccv_nnc_tensor_symbol_t alias = ccv_nnc_tensor_symbol_alias_new(graph, (ccv_nnc_tensor_symbol_t){
589
6
    .graph = graph,
590
6
    .d = map_d
591
6
  }, symbol_info->ofs, symbol_info->stride, symbol_info->info, symbol_info->name);
592
6
  return alias.d;
593
6
}
594
595
int ccv_nnc_tensor_symbol_map_raw(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol)
596
146k
{
597
146k
  if (symbol.d >= 0)
598
115k
    return symbol.graph != graph ? 
_ccv_nnc_symbolic_graph_map_tensor_symbol(graph, symbol, MAP_TENSOR_USE_AS_INPUT)61
:
symbol.d115k
;
599
31.6k
  if (symbol.graph == graph || 
symbol.d == CCV_NNC_NO_TENSOR_SYMBOL31.6k
)
600
31.6k
    return symbol.d;
601
1
  ccv_nnc_symbolic_graph_t* curr_graph = graph;
602
1
  int d;
603
2
  for (d = 0; curr_graph && curr_graph != symbol.graph; 
d++1
)
604
1
    curr_graph = curr_graph->p;
605
1
  assert(curr_graph == symbol.graph);
606
1
  return CCV_NNC_ENCODE_WHILE_COUNT_SYMBOL(d);
607
1
}
608
609
void ccv_nnc_tensor_symbol_hookup(ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const ccv_nnc_tensor_symbol_t src_tensor_symbol, const ccv_nnc_tensor_symbol_t dest_tensor_symbol)
610
35
{
611
35
  assert(src_graph != dest_graph);
612
35
  assert(src_graph->p == dest_graph || dest_graph->p == src_graph);
613
35
  assert(src_tensor_symbol.d >= 0);
614
35
  assert(dest_tensor_symbol.d >= 0);
615
35
  ccv_nnc_tensor_symbol_t tensor_symbol = src_tensor_symbol;
616
35
  if (tensor_symbol.graph != src_graph)
617
2
    tensor_symbol = (ccv_nnc_tensor_symbol_t){
618
2
      .graph = src_graph,
619
2
      .d = _ccv_nnc_symbolic_graph_map_tensor_symbol(src_graph, tensor_symbol, MAP_TENSOR_USE_AS_INPUT),
620
2
    };
621
35
  ccv_nnc_tensor_symbol_t sub_tensor_symbol = dest_tensor_symbol;
622
35
  if (sub_tensor_symbol.graph != dest_graph)
623
0
    sub_tensor_symbol = (ccv_nnc_tensor_symbol_t){
624
0
      .graph = dest_graph,
625
0
      .d = _ccv_nnc_symbolic_graph_map_tensor_symbol(dest_graph, sub_tensor_symbol, MAP_TENSOR_USE_AS_OUTPUT),
626
0
    };
627
35
  ccv_nnc_symbolic_graph_t* curr_graph = src_graph;
628
70
  while (curr_graph && 
curr_graph != dest_graph63
)
629
35
    curr_graph = curr_graph->p;
630
35
  ccv_nnc_symbolic_graph_t* graph;
631
35
  ccv_nnc_symbolic_graph_t* sub_graph;
632
35
  int map_use;
633
35
  if (curr_graph)
634
28
  {
635
    // src_graph is the sub graph, dest_graph is the parent graph.
636
28
    graph = dest_graph;
637
28
    sub_graph = src_graph;
638
    // Swap tensor_symbol and sub_tensor_symbol
639
28
    ccv_nnc_tensor_symbol_t x;
640
28
    CCV_SWAP(tensor_symbol, sub_tensor_symbol, x);
641
28
    map_use = MAP_TENSOR_USE_AS_OUTPUT;
642
28
  } else {
643
7
    graph = src_graph;
644
7
    sub_graph = dest_graph;
645
7
    map_use = MAP_TENSOR_USE_AS_INPUT;
646
7
  }
647
35
  ccv_nnc_symbolic_graph_t* p_graph = sub_graph;
648
35
  while (p_graph && p_graph->p != graph)
649
0
    p_graph = p_graph->p;
650
35
  assert(p_graph);
651
35
  if (p_graph != sub_graph)
652
0
  {
653
0
    sub_tensor_symbol.d = _ccv_nnc_symbolic_graph_map_tensor_symbol(p_graph, sub_tensor_symbol, map_use);
654
0
    sub_tensor_symbol.graph = p_graph;
655
0
    sub_graph = p_graph;
656
0
  }
657
35
  assert(tensor_symbol.d < graph->tensor_symbol_info->rnum);
658
35
  assert(sub_tensor_symbol.d < sub_graph->tensor_symbol_info->rnum);
659
35
  ccv_nnc_tensor_symbol_info_t* const sub_tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(sub_graph->tensor_symbol_info, sub_tensor_symbol.d);
660
35
  sub_tensor_info->p_ref = tensor_symbol.d + 1;
661
35
  ccv_nnc_tensor_symbol_info_t* const tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, tensor_symbol.d);
662
35
  if (!tensor_info->s_ref)
663
15
  {
664
15
    tensor_info->s_ref = ccv_array_new(sizeof(int), graph->sub_graphs->rnum, 0);
665
15
    tensor_info->s_ref->rnum = graph->sub_graphs->rnum;
666
15
    ccv_array_zero(tensor_info->s_ref);
667
20
  } else if (tensor_info->s_ref->rnum != graph->sub_graphs->rnum)
668
20
    ccv_array_resize(tensor_info->s_ref, graph->sub_graphs->rnum);
669
35
  const int p_idx = sub_graph->p_idx - 1;
670
35
  assert(p_idx >= 0 && p_idx < tensor_info->s_ref->rnum);
671
35
  const int s_idx = *(int*)ccv_array_get(tensor_info->s_ref, p_idx);
672
35
  assert(s_idx == 0); // Otherwise it is assigned before
673
35
  *(int*)ccv_array_get(tensor_info->s_ref, p_idx) = sub_tensor_symbol.d + 1;
674
35
  ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, sub_graph->exec_idx - 1);
675
35
  switch (map_use)
676
35
  {
677
7
    case MAP_TENSOR_USE_AS_INPUT:
678
7
      _ccv_nnc_graph_exec_add_input_if_needed(exec_symbol_info, tensor_symbol.d);
679
7
      break;
680
28
    case MAP_TENSOR_USE_AS_OUTPUT:
681
28
      _ccv_nnc_graph_exec_add_output_if_needed(exec_symbol_info, tensor_symbol.d);
682
28
      break;
683
35
  }
684
35
}
685
686
void ccv_nnc_tensor_symbol_set_bypasses(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_map_t* const symbol_map, const int symbol_map_size)
687
11
{
688
11
  int i;
689
22
  for (i = 0; i < symbol_map_size; 
i++11
)
690
11
  {
691
11
    const ccv_nnc_tensor_symbol_t source = ccv_nnc_tensor_symbol_resolve(graph, symbol_map[i].source);
692
11
    const ccv_nnc_tensor_symbol_t destination = ccv_nnc_tensor_symbol_resolve(graph, symbol_map[i].destination);
693
11
    assert(source.graph == graph);
694
11
    assert(destination.graph == graph);
695
11
    assert(source.d < graph->tensor_symbol_info->rnum);
696
11
    assert(destination.d < graph->tensor_symbol_info->rnum);
697
11
    ccv_nnc_tensor_symbol_info_t* source_tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, source.d);
698
11
    ccv_nnc_tensor_symbol_info_t* destination_tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, destination.d);
699
    // Don't support parameterize with alias. The reason is that to support parameterized loop (for SSA), I choose
700
    // to simply reuse the piece of memory (allocating the same memory region to both, therefore to enable parameter
701
    // passing). For alias, it is not possible because alias can pointing to the tensors with different sizes, thus,
702
    // these pointed tensors cannot share the same memory region. The best way for alias to be parameterized is to
703
    // create a new tensor of the same size, transfer value over, and parameterized on that tensor instead.
704
11
    assert(!destination_tensor_symbol_info->alias_ref);
705
11
    assert(!source_tensor_symbol_info->alias_ref);
706
11
    destination_tensor_symbol_info->bypass_ref = source.d + 1;
707
11
    source_tensor_symbol_info->r_bypass_ref = destination.d + 1;
708
11
  }
709
11
}
710
711
int ccv_nnc_tensor_symbol_set(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, const ccv_nnc_tensor_param_t info)
712
38.5k
{
713
38.5k
  assert(graph == tensor.graph);
714
38.5k
  assert(tensor.d < graph->tensor_symbol_info->rnum);
715
38.5k
  ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, tensor.d);
716
38.5k
  symbol_info->info = info;
717
  // It also need to propagate to assign_ref if needed.
718
38.5k
  if (symbol_info->assign_ref)
719
0
  {
720
0
    ccv_nnc_tensor_symbol_info_t* const assign_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, symbol_info->assign_ref - 1);
721
0
    assign_info->info = info;
722
0
  }
723
38.5k
  return 0;
724
38.5k
}
725
726
ccv_nnc_tensor_param_t ccv_nnc_tensor_symbol_params(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor)
727
72.7k
{
728
72.7k
  assert(graph == tensor.graph);
729
72.7k
  assert(tensor.d < graph->tensor_symbol_info->rnum);
730
72.7k
  ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, tensor.d);
731
72.7k
  return symbol_info->info;
732
72.7k
}
733
734
const char* ccv_nnc_tensor_symbol_name(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor)
735
0
{
736
0
  assert(graph == tensor.graph);
737
0
  assert(tensor.d < graph->tensor_symbol_info->rnum);
738
0
  ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, tensor.d);
739
0
  return symbol_info->name;
740
0
}
741
742
int ccv_nnc_tensor_symbol_alias_set(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
743
2.00k
{
744
2.00k
  assert(graph == tensor.graph);
745
2.00k
  assert(tensor.d < graph->tensor_symbol_info->rnum);
746
2.00k
  ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, tensor.d);
747
2.00k
  if (!symbol_info->alias_ref)
748
0
    return -1;
749
2.00k
  memcpy(symbol_info->ofs, ofs, sizeof(symbol_info->ofs));
750
2.00k
  memcpy(symbol_info->stride, stride, sizeof(symbol_info->stride));
751
  // We don't need to propagate to assign_ref because alias cannot be loop carry-overs.
752
2.00k
  assert(!symbol_info->assign_ref);
753
2.00k
  return 0;
754
2.00k
}
755
756
int ccv_nnc_tensor_symbol_alias_params(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, int ofs[CCV_NNC_MAX_DIM_ALLOC], int stride[CCV_NNC_MAX_DIM_ALLOC])
757
38.5k
{
758
38.5k
  assert(graph == tensor.graph);
759
38.5k
  assert(tensor.d < graph->tensor_symbol_info->rnum);
760
38.5k
  ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, tensor.d);
761
38.5k
  if (!symbol_info->alias_ref)
762
36.5k
    return -1;
763
2.00k
  if (ofs)
764
2.00k
    memcpy(ofs, symbol_info->ofs, sizeof(symbol_info->ofs));
765
2.00k
  if (stride)
766
2.00k
    memcpy(stride, symbol_info->stride, sizeof(symbol_info->stride));
767
2.00k
  return 0;
768
38.5k
}
769
770
void ccv_nnc_tensor_symbol_set_flags(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, const int flags)
771
43.4k
{
772
43.4k
  assert(graph == tensor.graph);
773
43.4k
  assert(tensor.d < graph->tensor_symbol_info->rnum);
774
43.4k
  ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, tensor.d);
775
43.4k
  symbol_info->flags = flags;
776
  // It also need to propagate to assign_ref if needed.
777
43.4k
  if (symbol_info->assign_ref)
778
1
  {
779
1
    ccv_nnc_tensor_symbol_info_t* const assign_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, symbol_info->assign_ref - 1);
780
1
    assign_info->flags = flags;
781
1
  }
782
43.4k
}
783
784
int ccv_nnc_tensor_symbol_flags(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor)
785
11
{
786
11
  assert(graph == tensor.graph);
787
11
  assert(tensor.d < graph->tensor_symbol_info->rnum);
788
11
  ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, tensor.d);
789
11
  return symbol_info->flags;
790
11
}
791
792
void ccv_nnc_tensor_symbol_free(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_tensor_symbol_t tensor)
793
69.4k
{
794
69.4k
  assert(graph == tensor.graph);
795
69.4k
  assert(tensor.d < graph->tensor_symbol_info->rnum);
796
69.4k
  ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, tensor.d);
797
69.4k
  if (symbol_info->s_ref)
798
0
  {
799
0
    ccv_array_free(symbol_info->s_ref);
800
0
    symbol_info->s_ref = 0;
801
0
  }
802
69.4k
  if (symbol_info->name)
803
8
  {
804
8
    ccfree(symbol_info->name);
805
8
    symbol_info->name = 0;
806
8
  }
807
69.4k
  symbol_info->flags |= CCV_NNC_TENSOR_SYMBOL_DEAD;
808
69.4k
  int i;
809
136k
  for (i = graph->tensor_symbol_info->rnum - 1; i >= 0; 
i--66.7k
)
810
133k
    if (!CCV_NNC_TENSOR_SYMBOL_IS_DEAD(((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, i))->flags))
811
66.8k
    {
812
66.8k
      graph->tensor_symbol_info->rnum = i + 1;
813
66.8k
      break;
814
66.8k
    }
815
69.4k
  if (tensor.d < graph->tensor_symbol_info->rnum &&
816
69.4k
    
(55.3k
tensor.d < graph->reuse.tensor55.3k
||
graph->reuse.tensor < 055.3k
))
817
13.2k
    graph->reuse.tensor = tensor.d;
818
56.1k
  else if (graph->reuse.tensor >= graph->tensor_symbol_info->rnum)
819
8.43k
    graph->reuse.tensor = -1;
820
69.4k
}
821
822
static void _ccv_nnc_graph_exec_symbol_set_io(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_graph_exec_symbol_info_t* const exec_info, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
823
57.3k
{
824
57.3k
  exec_info->input_size = input_size;
825
57.3k
  exec_info->output_size = output_size;
826
57.3k
  if (input_size > 0 || 
output_size > 04.40k
)
827
57.2k
  {
828
57.2k
    if (!exec_info->inputs)
829
56.6k
      exec_info->inputs = ccmalloc(sizeof(int) * (input_size + output_size));
830
636
    else
831
636
      exec_info->inputs = ccrealloc(exec_info->inputs, sizeof(int) * (input_size + output_size));
832
57.2k
    exec_info->outputs = exec_info->inputs + input_size;
833
57.2k
  }
834
57.3k
  int i;
835
57.3k
  int tensor_memory = 0, tensor_formats = 0, tensor_datatypes = 0, tensor_auto = 0;
836
204k
  for (i = 0; i < input_size; 
i++146k
)
837
146k
  {
838
146k
    const int d = ccv_nnc_tensor_symbol_map_raw(graph, inputs[i]);
839
146k
    exec_info->inputs[i] = d;
840
146k
    if (d >= 0)
841
115k
    {
842
115k
      const ccv_nnc_tensor_symbol_info_t* const tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, d);
843
115k
      tensor_auto = tensor_auto || 
ccv_nnc_is_tensor_auto(tensor_info->info)115k
;
844
115k
      tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor_info->info.type), tensor_formats |= tensor_info->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor_info->info.datatype);
845
115k
    }
846
146k
  }
847
138k
  for (i = 0; i < output_size; 
i++80.9k
)
848
80.9k
  {
849
80.9k
    const int d = (outputs[i].graph != graph && 
outputs[i].d >= 09.34k
) ?
_ccv_nnc_symbolic_graph_map_tensor_symbol(graph, outputs[i], MAP_TENSOR_USE_AS_OUTPUT)5
:
outputs[i].d80.9k
;
850
80.9k
    exec_info->outputs[i] = d;
851
80.9k
    if (d >= 0)
852
71.6k
    {
853
71.6k
      const ccv_nnc_tensor_symbol_info_t* const tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, d);
854
71.6k
      tensor_auto = tensor_auto || 
ccv_nnc_is_tensor_auto(tensor_info->info)71.5k
;
855
71.6k
      tensor_memory |= CCV_TENSOR_GET_MEMORY(tensor_info->info.type), tensor_formats |= tensor_info->info.format, tensor_datatypes |= CCV_GET_DATA_TYPE(tensor_info->info.datatype);
856
71.6k
    }
857
80.9k
  }
858
  // If there is no auto tensor, we try to find backend (we don't know which backend if the tensor is auto).
859
57.3k
  if (!tensor_auto)
860
57.2k
    exec_info->cmd.backend = ccv_nnc_cmd_find_backend(exec_info->cmd, tensor_memory, tensor_formats, tensor_datatypes);
861
57.3k
}
862
863
ccv_nnc_graph_exec_symbol_t ccv_nnc_graph_exec_symbol_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
864
56.6k
{
865
56.6k
  ccv_nnc_graph_exec_symbol_t symbol = {
866
56.6k
    .d = graph->exec_symbol_info->rnum,
867
56.6k
    .graph = graph
868
56.6k
  };
869
56.6k
  ccv_nnc_graph_exec_symbol_info_t symbol_info = {
870
56.6k
    .cmd = cmd,
871
56.6k
    .hint = ccv_nnc_no_hint,
872
56.6k
  };
873
56.6k
  if (name)
874
4.50k
  {
875
4.50k
    const size_t len = strnlen(name, 63);
876
4.50k
    const size_t n = len + 1;
877
4.50k
    symbol_info.name = (char*)ccmalloc(n);
878
    // Don't use strndup because this way I can have custom allocator (for ccmalloc).
879
4.50k
    memcpy(symbol_info.name, name, n);
880
4.50k
    symbol_info.name[len] = 0;
881
4.50k
  }
882
56.6k
  _ccv_nnc_graph_exec_symbol_set_io(graph, &symbol_info, inputs, input_size, outputs, output_size);
883
56.6k
  if (graph->reuse.exec >= 0)
884
9.61k
  {
885
9.61k
    const int reuse_exec_d = graph->reuse.exec;
886
9.61k
    assert(reuse_exec_d < graph->exec_symbol_info->rnum);
887
9.61k
    *(ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, reuse_exec_d) = symbol_info;
888
9.61k
    int i;
889
9.61k
    graph->reuse.exec = -1;
890
14.4k
    for (i = reuse_exec_d + 1; i < graph->exec_symbol_info->rnum && 
graph->reuse.exec < 07.22k
;
i++4.81k
)
891
4.81k
      if (CCV_NNC_GRAPH_EXEC_IS_DEAD(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, i))->flags))
892
4.80k
        graph->reuse.exec = i;
893
9.61k
    symbol.d = reuse_exec_d;
894
9.61k
  } else
895
47.0k
    ccv_array_push(graph->exec_symbol_info, &symbol_info);
896
56.6k
  if (graph->hooks.graph_exec_symbol_new.func)
897
35.8k
    graph->hooks.graph_exec_symbol_new.func(graph->hooks.graph_exec_symbol_new.context, symbol, cmd, inputs, input_size, outputs, output_size, name);
898
56.6k
  return symbol;
899
56.6k
}
900
901
void* ccv_nnc_graph_exec_symbol_new_hook(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_graph_exec_symbol_new_hook_f hook, void* context, ccv_nnc_graph_exec_symbol_new_hook_f* previous_hook)
902
20.7k
{
903
20.7k
  if (previous_hook)
904
2.31k
    *previous_hook = graph->hooks.graph_exec_symbol_new.func;
905
20.7k
  void* const prev = graph->hooks.graph_exec_symbol_new.context;
906
20.7k
  graph->hooks.graph_exec_symbol_new.func = hook;
907
20.7k
  graph->hooks.graph_exec_symbol_new.context = context;
908
20.7k
  return prev;
909
20.7k
}
910
911
void ccv_nnc_graph_exec_symbol_set_io(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
912
636
{
913
636
  assert(exec.graph == graph);
914
636
  assert(exec.d >= 0);
915
636
  assert(exec.d < graph->exec_symbol_info->rnum);
916
636
  ccv_nnc_graph_exec_symbol_info_t* const exec_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, exec.d);
917
636
  _ccv_nnc_graph_exec_symbol_set_io(graph, exec_info, inputs, input_size, outputs, output_size);
918
636
}
919
920
void ccv_nnc_graph_exec_symbol_pair_with(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_graph_exec_symbol_t pair_exec_symbol)
921
19.1k
{
922
19.1k
  assert(exec_symbol.graph == graph);
923
19.1k
  assert(exec_symbol.d >= 0);
924
19.1k
  assert(exec_symbol.d < graph->exec_symbol_info->rnum);
925
19.1k
  assert(pair_exec_symbol.graph == graph || pair_exec_symbol.graph == graph->pair);
926
19.1k
  assert(pair_exec_symbol.d >= 0);
927
19.1k
  if (pair_exec_symbol.graph == graph)
928
19.1k
    { assert(pair_exec_symbol.d < graph->exec_symbol_info->rnum); }
929
4
  else
930
4
    { assert(pair_exec_symbol.d < graph->pair->exec_symbol_info->rnum); }
931
19.1k
  ccv_nnc_graph_exec_symbol_info_t* const exec_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, exec_symbol.d);
932
19.1k
  exec_info->pair_ref = pair_exec_symbol.d + 1;
933
19.1k
}
934
935
void ccv_nnc_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const ccv_nnc_cmd_t cmd)
936
43.7k
{
937
43.7k
  assert(graph == exec.graph);
938
43.7k
  assert(exec.d < graph->exec_symbol_info->rnum);
939
43.7k
  ccv_nnc_graph_exec_symbol_info_t* symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, exec.d);
940
43.7k
  symbol_info->cmd = cmd;
941
43.7k
}
942
943
void ccv_nnc_graph_exec_symbol_set_flags(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const int flags)
944
1.02k
{
945
1.02k
  assert(graph == exec.graph);
946
1.02k
  assert(exec.d < graph->exec_symbol_info->rnum);
947
1.02k
  ccv_nnc_graph_exec_symbol_info_t* symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, exec.d);
948
1.02k
  assert(!(flags & 0xffff)); // the pass-in flag shouldn't set the lower 16-bit.
949
1.02k
  symbol_info->flags = flags | (symbol_info->flags & 0xffff);
950
1.02k
}
951
952
int ccv_nnc_graph_exec_symbol_flags(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec)
953
0
{
954
0
  assert(graph == exec.graph);
955
0
  assert(exec.d < graph->exec_symbol_info->rnum);
956
0
  ccv_nnc_graph_exec_symbol_info_t* symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, exec.d);
957
0
  return (symbol_info->flags & 0xffff0000);
958
0
}
959
960
ccv_nnc_cmd_t ccv_nnc_graph_exec_symbol_cmd(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec)
961
51.6k
{
962
51.6k
  assert(graph == exec.graph);
963
51.6k
  assert(exec.d < graph->exec_symbol_info->rnum);
964
51.6k
  ccv_nnc_graph_exec_symbol_info_t* symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, exec.d);
965
51.6k
  return symbol_info->cmd;
966
51.6k
}
967
968
const char* ccv_nnc_graph_exec_symbol_name(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec)
969
0
{
970
0
  assert(graph == exec.graph);
971
0
  assert(exec.d < graph->exec_symbol_info->rnum);
972
0
  ccv_nnc_graph_exec_symbol_info_t* symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, exec.d);
973
0
  return symbol_info->name;
974
0
}
975
976
void ccv_nnc_graph_exec_symbol_set_hint(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const ccv_nnc_hint_t hint)
977
20.8k
{
978
20.8k
  assert(graph == exec.graph);
979
20.8k
  assert(exec.d < graph->exec_symbol_info->rnum);
980
20.8k
  ccv_nnc_graph_exec_symbol_info_t* symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, exec.d);
981
20.8k
  symbol_info->hint = hint;
982
20.8k
}
983
984
int ccv_nnc_graph_exec_symbol_concat(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t source, const ccv_nnc_graph_exec_symbol_t destination)
985
74.5k
{
986
74.5k
  assert(graph == source.graph);
987
74.5k
  assert(graph == destination.graph);
988
74.5k
  assert(source.d < graph->exec_symbol_info->rnum);
989
74.5k
  assert(destination.d < graph->exec_symbol_info->rnum);
990
74.5k
  ccv_nnc_graph_exec_symbol_info_t* src_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, source.d);
991
74.5k
  if (!src_symbol_info->outgoings)
992
47.2k
    src_symbol_info->outgoings = ccv_array_new(sizeof(int32_t), 1, 0);
993
27.3k
  else {
994
27.3k
    int i;
995
    // Check if this is already connected, if so, skip.
996
46.2k
    for (i = 0; i < src_symbol_info->outgoings->rnum; 
i++18.9k
)
997
30.7k
      if (*(int*)ccv_array_get(src_symbol_info->outgoings, i) == destination.d)
998
11.8k
        return -1;
999
27.3k
  }
1000
62.7k
  ccv_array_push(src_symbol_info->outgoings, &destination.d);
1001
62.7k
  return 0;
1002
74.5k
}
1003
1004
void ccv_nnc_graph_exec_symbol_io(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, const int** const inputs, int* const input_size, const int** const outputs, int* const output_size)
1005
87.0k
{
1006
87.0k
  assert(graph == symbol.graph);
1007
87.0k
  assert(symbol.d < graph->exec_symbol_info->rnum);
1008
87.0k
  const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, symbol.d);
1009
87.0k
  if (inputs)
1010
61.6k
    *inputs = symbol_info->inputs;
1011
87.0k
  if (input_size)
1012
69.5k
    *input_size = symbol_info->input_size;
1013
87.0k
  if (outputs)
1014
74.4k
    *outputs = symbol_info->outputs;
1015
87.0k
  if (output_size)
1016
82.3k
    *output_size = symbol_info->output_size;
1017
87.0k
}
1018
1019
void ccv_nnc_graph_exec_symbol_replace_io(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_tensor_symbol_t old_symbol, const ccv_nnc_tensor_symbol_t new_symbol)
1020
4
{
1021
4
  assert(graph == symbol.graph);
1022
4
  assert(symbol.d < graph->exec_symbol_info->rnum);
1023
4
  assert(graph == old_symbol.graph);
1024
4
  assert(old_symbol.d < graph->tensor_symbol_info->rnum);
1025
4
  assert(graph == new_symbol.graph);
1026
4
  assert(new_symbol.d < graph->tensor_symbol_info->rnum);
1027
4
  const ccv_nnc_tensor_symbol_info_t* const old_tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, old_symbol.d);
1028
4
  const ccv_nnc_tensor_symbol_info_t* const new_tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, new_symbol.d);
1029
4
  if (old_tensor_info != new_tensor_info)
1030
4
  {
1031
    // These need to be the same, otherwise we need to find the backend again for this exec. See _ccv_nnc_graph_exec_symbol_set_io
1032
4
    assert(ccv_nnc_is_tensor_auto(old_tensor_info->info) == ccv_nnc_is_tensor_auto(new_tensor_info->info));
1033
4
    assert(old_tensor_info->info.type == new_tensor_info->info.type);
1034
4
    assert(old_tensor_info->info.format == new_tensor_info->info.format);
1035
4
    assert(old_tensor_info->info.datatype == new_tensor_info->info.datatype);
1036
4
  }
1037
4
  const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, symbol.d);
1038
4
  int i;
1039
12
  for (i = 0; i < symbol_info->input_size; 
i++8
)
1040
8
    if (symbol_info->inputs[i] == old_symbol.d)
1041
4
      symbol_info->inputs[i] = new_symbol.d;
1042
8
  for (i = 0; i < symbol_info->output_size; 
i++4
)
1043
4
    if (symbol_info->outputs[i] == old_symbol.d)
1044
0
      symbol_info->outputs[i] = new_symbol.d;
1045
4
}
1046
1047
void ccv_nnc_graph_exec_symbol_to(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, const int** const tos, int* const to_size)
1048
37.6k
{
1049
37.6k
  assert(graph == symbol.graph);
1050
37.6k
  assert(symbol.d < graph->exec_symbol_info->rnum);
1051
37.6k
  assert(tos);
1052
37.6k
  assert(to_size);
1053
37.6k
  const ccv_nnc_graph_exec_symbol_info_t* const symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, symbol.d);
1054
37.6k
  if (!symbol_info->outgoings)
1055
4.51k
  {
1056
4.51k
    *tos = 0;
1057
4.51k
    *to_size = 0;
1058
4.51k
    return;
1059
4.51k
  }
1060
33.1k
  *to_size = symbol_info->outgoings->rnum;
1061
33.1k
  *tos = (int*)ccv_array_get(symbol_info->outgoings, 0);
1062
33.1k
}
1063
1064
int ccv_nnc_graph_exec_symbol_count(const ccv_nnc_symbolic_graph_t* const graph)
1065
22.0k
{
1066
22.0k
  return graph->exec_symbol_info->rnum;
1067
22.0k
}
1068
1069
int ccv_nnc_symbolic_graph_active_symbol_count(const ccv_nnc_symbolic_graph_t* const graph, const int type)
1070
451
{
1071
451
  assert(type == CCV_NNC_SYMBOL_TENSOR || type == CCV_NNC_SYMBOL_GRAPH_EXEC);
1072
451
  if (type == CCV_NNC_SYMBOL_GRAPH_EXEC)
1073
422
  {
1074
422
    int i, count = graph->exec_symbol_info->rnum;
1075
864
    for (i = 0; i < graph->exec_symbol_info->rnum; 
i++442
)
1076
442
      if (CCV_NNC_GRAPH_EXEC_IS_DEAD(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, i))->flags))
1077
416
        --count;
1078
422
    return count;
1079
422
  } else 
if (29
type == CCV_NNC_SYMBOL_TENSOR29
) {
1080
29
    int i, count = graph->tensor_symbol_info->rnum;
1081
134
    for (i = 0; i < graph->tensor_symbol_info->rnum; 
i++105
)
1082
105
      if (CCV_NNC_TENSOR_SYMBOL_IS_DEAD(((ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, i))->flags))
1083
54
        --count;
1084
29
    return count;
1085
29
  }
1086
0
  return 0;
1087
451
}
1088
1089
int ccv_nnc_tensor_symbol_count(const ccv_nnc_symbolic_graph_t* const graph)
1090
94
{
1091
94
  return graph->tensor_symbol_info->rnum;
1092
94
}
1093
1094
static inline void _ccv_nnc_graph_exec_symbol_free(ccv_nnc_graph_exec_symbol_info_t* const symbol_info, const int zeroing)
1095
56.8k
{
1096
56.8k
  if (symbol_info->name)
1097
4.52k
    ccfree(symbol_info->name);
1098
56.8k
  if (symbol_info->_heap_graph_ref)
1099
7
    ccfree(symbol_info->_heap_graph_ref);
1100
56.8k
  ccv_array_t* outgoings = symbol_info->outgoings;
1101
56.8k
  if (outgoings)
1102
47.2k
    ccv_array_free(outgoings);
1103
  // We allocate inputs & outputs in continuous fashion, therefore, only need to free the input array.
1104
56.8k
  if (symbol_info->inputs)
1105
56.6k
    ccfree(symbol_info->inputs);
1106
56.8k
  if (symbol_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
1107
24
    if (symbol_info->p_while.inputs)
1108
19
      ccfree(symbol_info->p_while.inputs);
1109
56.8k
  if (zeroing)
1110
43.5k
  {
1111
43.5k
    symbol_info->name = 0;
1112
43.5k
    symbol_info->_heap_graph_ref = 0;
1113
43.5k
    symbol_info->outgoings = 0;
1114
43.5k
    symbol_info->inputs = 0;
1115
43.5k
    symbol_info->input_size = 0;
1116
43.5k
    symbol_info->outputs = 0;
1117
43.5k
    symbol_info->output_size = 0;
1118
43.5k
  }
1119
56.8k
}
1120
1121
void ccv_nnc_graph_exec_symbol_free(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol)
1122
43.5k
{
1123
43.5k
  assert(graph == symbol.graph);
1124
43.5k
  assert(symbol.d < graph->exec_symbol_info->rnum);
1125
  // If any of the exec symbol have reference to it, has to remove that.
1126
43.5k
  int i, j, k;
1127
43.5k
  ccv_nnc_graph_exec_symbol_info_t* const free_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, symbol.d);
1128
334k
  for (i = 0; i < graph->exec_symbol_info->rnum; 
i++290k
)
1129
290k
    if (i != symbol.d)
1130
247k
    {
1131
247k
      ccv_nnc_graph_exec_symbol_info_t* const symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, i);
1132
247k
      if (symbol_info->outgoings)
1133
349k
        
for (j = 0; 171k
j < symbol_info->outgoings->rnum;
j++177k
)
1134
227k
          if (*(int*)ccv_array_get(symbol_info->outgoings, j) == symbol.d)
1135
49.4k
          {
1136
49.4k
            if (j < symbol_info->outgoings->rnum - 1)
1137
65
              *(int*)ccv_array_get(symbol_info->outgoings, j) = *(int*)ccv_array_get(symbol_info->outgoings, symbol_info->outgoings->rnum - 1);
1138
49.4k
            --symbol_info->outgoings->rnum;
1139
49.4k
            if (free_symbol_info->outgoings)
1140
61.8k
              
for (k = 0; 35.5k
k < free_symbol_info->outgoings->rnum;
k++26.3k
)
1141
26.3k
                ccv_array_add_unique_int(symbol_info->outgoings, *(int*)ccv_array_get(free_symbol_info->outgoings, k));
1142
49.4k
            break;
1143
49.4k
          }
1144
247k
    }
1145
  // Deallocate any memory for exec symbol.
1146
43.5k
  _ccv_nnc_graph_exec_symbol_free(free_symbol_info, 1);
1147
43.5k
  free_symbol_info->flags = CCV_NNC_GRAPH_EXEC_DEAD; // Mark this as dead.
1148
  // If everything from symbol.d to the end of the graph is dead, we can reclaim this memory.
1149
87.0k
  for (i = graph->exec_symbol_info->rnum - 1; i >= 0; 
i--43.5k
)
1150
82.2k
    if (!CCV_NNC_GRAPH_EXEC_IS_DEAD(((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, i))->flags))
1151
38.6k
    {
1152
38.6k
      graph->exec_symbol_info->rnum = i + 1;
1153
38.6k
      break;
1154
38.6k
    }
1155
  // Loop over sources and destinations to remove this.
1156
43.5k
  if (graph->sources)
1157
114
    
for (i = 0; 42
i < graph->sources->rnum;
i++72
)
1158
80
      if (*(int*)ccv_array_get(graph->sources, i) == symbol.d)
1159
8
      {
1160
8
        if (i < graph->sources->rnum - 1)
1161
1
          *(int*)ccv_array_get(graph->sources, i) = *(int*)ccv_array_get(graph->sources, graph->sources->rnum - 1);
1162
8
        --graph->sources->rnum;
1163
8
        break;
1164
8
      }
1165
43.5k
  if (graph->destinations)
1166
85
    
for (i = 0; 42
i < graph->destinations->rnum;
i++43
)
1167
54
      if (*(int*)ccv_array_get(graph->destinations, i) == symbol.d)
1168
11
      {
1169
11
        if (i < graph->destinations->rnum - 1)
1170
4
          *(int*)ccv_array_get(graph->destinations, i) = *(int*)ccv_array_get(graph->destinations, graph->destinations->rnum - 1);
1171
11
        --graph->destinations->rnum;
1172
11
        break;
1173
11
      }
1174
43.5k
  if (symbol.d < graph->exec_symbol_info->rnum &&
1175
43.5k
    
(27.8k
symbol.d < graph->reuse.exec27.8k
||
graph->reuse.exec < 027.8k
))
1176
9.28k
    graph->reuse.exec = symbol.d;
1177
34.2k
  else if (graph->reuse.exec >= graph->exec_symbol_info->rnum)
1178
4.43k
    graph->reuse.exec = -1;
1179
43.5k
}
1180
1181
int ccv_nnc_graph_exec_symbol_disjoin(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t source, const ccv_nnc_graph_exec_symbol_t destination)
1182
10
{
1183
10
  assert(graph == source.graph);
1184
10
  assert(graph == destination.graph);
1185
10
  assert(source.d < graph->exec_symbol_info->rnum);
1186
10
  assert(destination.d < graph->exec_symbol_info->rnum);
1187
10
  ccv_nnc_graph_exec_symbol_info_t* src_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, source.d);
1188
10
  if (!src_symbol_info->outgoings)
1189
0
    return -1;
1190
10
  int i;
1191
  // Check if this is already disjoined, if so, skip.
1192
10
  for (i = 0; i < src_symbol_info->outgoings->rnum; 
i++0
)
1193
10
    if (*(int*)ccv_array_get(src_symbol_info->outgoings, i) == destination.d)
1194
10
    {
1195
10
      if (i < src_symbol_info->outgoings->rnum - 1)
1196
1
        *(int*)ccv_array_get(src_symbol_info->outgoings, i) = *(int*)ccv_array_get(src_symbol_info->outgoings, src_symbol_info->outgoings->rnum - 1);
1197
10
      --src_symbol_info->outgoings->rnum;
1198
10
      return 0;
1199
10
    }
1200
0
  return -1;
1201
10
}
1202
1203
467k
#define CCV_NNC_IS_AUTOGEN_ALL_EXECS(x) ((x) & CCV_NNC_AUTOGEN_ALL_EXECS)
1204
11.8k
#define CCV_NNC_IS_AUTOGEN_SOURCES_AND_DESTINATIONS(x) ((x) & CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS)
1205
1206
int ccv_nnc_over_tensor_symbol_aliases(const ccv_nnc_tensor_symbol_info_t* const tensor_a, const ccv_nnc_tensor_symbol_info_t* const tensor_b)
1207
84
{
1208
84
  int i;
1209
84
  const int* stride = tensor_a->stride;
1210
  // Only can compare if the stride is the same, otherwise, we can only assume it overlaps.
1211
84
  if (memcmp(stride, tensor_b->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC) != 0)
1212
0
    return 1;
1213
84
  const int* ofs = tensor_a->ofs;
1214
84
  const int* dim = tensor_a->info.dim;
1215
200
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && dim[i] && 
tensor_b->info.dim[i]147
;
i++116
)
1216
147
    if (ccv_min(ofs[i] + dim[i], tensor_b->ofs[i] + tensor_b->info.dim[i]) <= ccv_max(ofs[i], tensor_b->ofs[i]))
1217
31
      return 0; // Cannot overlap.
1218
53
  return 1;
1219
84
}
1220
1221
int ccv_nnc_graph_exec_symbol_autogen(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const execs, const int exec_size, const int flags)
1222
11.8k
{
1223
11.8k
  int i, j, x, y;
1224
11.9k
  for (i = 0; i < exec_size; 
i++83
)
1225
83
    if (execs[i].graph == graph)
1226
83
    {
1227
83
      assert(execs[i].d >= 0);
1228
83
      assert(execs[i].d < graph->exec_symbol_info->rnum);
1229
83
    }
1230
11.8k
  if (!CCV_NNC_IS_AUTOGEN_ALL_EXECS(flags) && 
exec_size4.65k
)
1231
16
    { assert(execs); }
1232
11.8k
  const int exec_total_size = CCV_NNC_IS_AUTOGEN_ALL_EXECS(flags) ? 
graph->exec_symbol_info->rnum7.21k
:
exec_size4.65k
;
1233
30.8k
  for (i = 0; i < exec_total_size; 
i++18.9k
)
1234
18.9k
  {
1235
18.9k
    if (!CCV_NNC_IS_AUTOGEN_ALL_EXECS(flags) && 
execs[i].graph != graph83
)
1236
0
      continue;
1237
18.9k
    int idx = CCV_NNC_IS_AUTOGEN_ALL_EXECS(flags) ? 
i18.8k
:
execs[i].d83
;
1238
18.9k
    ccv_nnc_graph_exec_symbol_info_t* symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, idx);
1239
18.9k
    if (CCV_NNC_GRAPH_EXEC_IS_DEAD(symbol_info->flags))
1240
5
      continue;
1241
    // Autogen for sub-graphs.
1242
18.9k
    if (CCV_NNC_GRAPH_REF(symbol_info)[0])
1243
26
      ccv_nnc_graph_exec_symbol_autogen(*(ccv_nnc_symbolic_graph_t**)ccv_array_get(graph->sub_graphs, CCV_NNC_GRAPH_REF(symbol_info)[0] - 1), execs, exec_size, flags);
1244
18.9k
  }
1245
30.8k
  for (i = 0; i < exec_total_size; 
i++18.9k
)
1246
18.9k
  {
1247
18.9k
    if (!CCV_NNC_IS_AUTOGEN_ALL_EXECS(flags) && 
execs[i].graph != graph83
)
1248
0
      continue;
1249
18.9k
    int a_idx = CCV_NNC_IS_AUTOGEN_ALL_EXECS(flags) ? 
i18.8k
:
execs[i].d83
;
1250
18.9k
    ccv_nnc_graph_exec_symbol_info_t* a_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, a_idx);
1251
18.9k
    if (CCV_NNC_GRAPH_EXEC_IS_DEAD(a_symbol_info->flags))
1252
5
      continue;
1253
124k
    
for (j = i + 1; 18.9k
j < exec_total_size;
j++105k
)
1254
105k
    {
1255
105k
      if (!CCV_NNC_IS_AUTOGEN_ALL_EXECS(flags) && 
execs[j].graph != graph269
)
1256
0
        continue;
1257
105k
      int b_idx = CCV_NNC_IS_AUTOGEN_ALL_EXECS(flags) ? 
j105k
:
execs[j].d269
;
1258
      // Skip if they are the same.
1259
105k
      if (a_idx == b_idx)
1260
0
        continue;
1261
105k
      ccv_nnc_graph_exec_symbol_info_t* b_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, b_idx);
1262
105k
      if (CCV_NNC_GRAPH_EXEC_IS_DEAD(b_symbol_info->flags))
1263
9
        continue;
1264
105k
      int b_to_a = 0;
1265
394k
      for (x = 0; x < a_symbol_info->input_size && 
!b_to_a289k
;
x++288k
)
1266
288k
      {
1267
288k
        int a = a_symbol_info->inputs[x];
1268
288k
        if (a < 0)
1269
37.5k
          continue;
1270
        // Handle alias as well.
1271
250k
        ccv_nnc_tensor_symbol_info_t* a_tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, a);
1272
250k
        if (a_tensor_info->alias_ref)
1273
17.0k
          a = a_tensor_info->alias_ref - 1;
1274
701k
        for (y = 0; y < b_symbol_info->output_size && 
!b_to_a451k
;
y++451k
)
1275
451k
        {
1276
451k
          int b = b_symbol_info->outputs[y];
1277
451k
          if (b < 0)
1278
7.91k
            continue;
1279
443k
          ccv_nnc_tensor_symbol_info_t* b_tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, b);
1280
443k
          if (b_tensor_info->alias_ref)
1281
14.1k
            b = b_tensor_info->alias_ref - 1;
1282
443k
          if (a == b && // This two have matching inputs and outputs.
1283
443k
            
(1.46k
!a_tensor_info->alias_ref1.46k
||
1284
1.46k
             
!b_tensor_info->alias_ref7
|| // If any of them are not alias, the must overlap, you can concatenate.
1285
1.46k
             
ccv_nnc_over_tensor_symbol_aliases(a_tensor_info, b_tensor_info)7
)) // Otherwise, we explicitly check whether it overlaps, if it does, concatenate.
1286
1.45k
            b_to_a = 1;
1287
443k
        }
1288
250k
      }
1289
105k
      if (b_to_a)
1290
1.45k
      {
1291
1.45k
        if (execs)
1292
0
          ccv_nnc_graph_exec_symbol_concat(graph, execs[j], execs[i]);
1293
1.45k
        else
1294
1.45k
          ccv_nnc_graph_exec_symbol_concat(graph,
1295
1.45k
            (ccv_nnc_graph_exec_symbol_t) {
1296
1.45k
              .d = j,
1297
1.45k
              .graph = graph
1298
1.45k
            }, (ccv_nnc_graph_exec_symbol_t) {
1299
1.45k
              .d = i,
1300
1.45k
              .graph = graph
1301
1.45k
            }
1302
1.45k
          );
1303
1.45k
      }
1304
105k
      int a_to_b = 0;
1305
290k
      for (x = 0; x < a_symbol_info->output_size && 
!a_to_b185k
;
x++184k
)
1306
184k
      {
1307
184k
        int a = a_symbol_info->outputs[x];
1308
184k
        if (a < 0)
1309
3.28k
          continue;
1310
        // Handle alias as well.
1311
181k
        ccv_nnc_tensor_symbol_info_t* a_tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, a);
1312
181k
        if (a_tensor_info->alias_ref)
1313
5.27k
          a = a_tensor_info->alias_ref - 1;
1314
710k
        for (y = 0; y < b_symbol_info->input_size && 
!a_to_b533k
;
y++529k
)
1315
529k
        {
1316
529k
          int b = b_symbol_info->inputs[y];
1317
529k
          if (b < 0)
1318
73.5k
            continue;
1319
455k
          ccv_nnc_tensor_symbol_info_t* b_tensor_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, b);
1320
455k
          if (b_tensor_info->alias_ref)
1321
13.5k
            b = b_tensor_info->alias_ref - 1;
1322
455k
          if (a == b && // This two have matching inputs and outputs.
1323
455k
            
(8.53k
!a_tensor_info->alias_ref8.53k
||
1324
8.53k
             
!b_tensor_info->alias_ref59
|| // If any of them are not alias, the must overlap, you can concatenate.
1325
8.53k
             
ccv_nnc_over_tensor_symbol_aliases(a_tensor_info, b_tensor_info)23
)) // Otherwise, we explicitly check whether it overlaps, if it does, concatenate.
1326
8.52k
            a_to_b = 1;
1327
455k
        }
1328
181k
      }
1329
105k
      if (a_to_b)
1330
8.52k
      {
1331
8.52k
        if (execs)
1332
79
          ccv_nnc_graph_exec_symbol_concat(graph, execs[i], execs[j]);
1333
8.44k
        else
1334
8.44k
          ccv_nnc_graph_exec_symbol_concat(graph,
1335
8.44k
            (ccv_nnc_graph_exec_symbol_t) {
1336
8.44k
              .d = i,
1337
8.44k
              .graph = graph
1338
8.44k
            }, (ccv_nnc_graph_exec_symbol_t) {
1339
8.44k
              .d = j,
1340
8.44k
              .graph = graph
1341
8.44k
            }
1342
8.44k
          );
1343
8.52k
      }
1344
105k
    }
1345
18.9k
  }
1346
  // If flag says so, loop over to find sources / destinations too.
1347
11.8k
  if (CCV_NNC_IS_AUTOGEN_SOURCES_AND_DESTINATIONS(flags))
1348
9.60k
  {
1349
9.60k
    uint8_t* flags = (uint8_t*)cccalloc(sizeof(uint8_t), graph->exec_symbol_info->rnum);
1350
28.8k
    for (i = 0; i < graph->exec_symbol_info->rnum; 
i++19.2k
)
1351
19.2k
    {
1352
19.2k
      ccv_nnc_graph_exec_symbol_info_t* symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, i);
1353
19.2k
      if (CCV_NNC_GRAPH_EXEC_IS_DEAD(symbol_info->flags))
1354
19
      {
1355
19
        flags[i] = 3; // Skip.
1356
19
        continue;
1357
19
      }
1358
19.2k
      if (symbol_info->outgoings && 
symbol_info->outgoings->rnum9.01k
)
1359
9.00k
      {
1360
9.00k
        flags[i] |= 2;
1361
20.5k
        for (j = 0; j < symbol_info->outgoings->rnum; 
j++11.5k
)
1362
11.5k
          flags[*(int*)ccv_array_get(symbol_info->outgoings, j)] |= 1;
1363
9.00k
      }
1364
19.2k
    }
1365
9.60k
    if (!graph->sources)
1366
2.56k
      graph->sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
1367
7.04k
    else
1368
7.04k
      ccv_array_clear(graph->sources);
1369
9.60k
    if (!graph->destinations)
1370
2.56k
      graph->destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
1371
7.04k
    else
1372
7.04k
      ccv_array_clear(graph->destinations);
1373
28.8k
    for (i = 0; i < graph->exec_symbol_info->rnum; 
i++19.2k
)
1374
19.2k
    {
1375
19.2k
      if (flags[i] == 3)
1376
4.25k
        continue;
1377
15.0k
      ccv_nnc_graph_exec_symbol_t exec = {
1378
15.0k
        .d = i,
1379
15.0k
        .graph = graph,
1380
15.0k
      };
1381
15.0k
      if (!(flags[i] & 1))
1382
9.88k
        ccv_array_push(graph->sources, &exec);
1383
15.0k
      if (!(flags[i] & 2))
1384
10.2k
        ccv_array_push(graph->destinations, &exec);
1385
15.0k
    }
1386
9.60k
    ccfree(flags);
1387
9.60k
  }
1388
11.8k
  return 0;
1389
11.8k
}
1390
1391
ccv_nnc_graph_exec_symbol_t* ccv_nnc_symbolic_graph_sources(const ccv_nnc_symbolic_graph_t* const graph)
1392
7.35k
{
1393
7.35k
  return graph->sources ? (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(graph->sources, 0) : 
00
;
1394
7.35k
}
1395
1396
void ccv_nnc_symbolic_graph_add_source(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t source)
1397
17
{
1398
17
  if (!graph->sources)
1399
0
    graph->sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
1400
17
  assert(source.graph == graph);
1401
17
  ccv_array_push(graph->sources, &source);
1402
17
}
1403
1404
void ccv_nnc_symbolic_graph_set_sources(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size)
1405
15
{
1406
15
  if (!graph->sources)
1407
11
    graph->sources = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
1408
4
  else
1409
4
    ccv_array_clear(graph->sources);
1410
15
  int i;
1411
30
  for (i = 0; i < source_size; 
i++15
)
1412
15
    ccv_nnc_symbolic_graph_add_source(graph, sources[i]);
1413
15
}
1414
1415
int ccv_nnc_symbolic_graph_source_size(const ccv_nnc_symbolic_graph_t* const graph)
1416
7.35k
{
1417
7.35k
  return graph->sources ? graph->sources->rnum : 
00
;
1418
7.35k
}
1419
1420
ccv_nnc_graph_exec_symbol_t* ccv_nnc_symbolic_graph_destinations(const ccv_nnc_symbolic_graph_t* const graph)
1421
9.58k
{
1422
9.58k
  return graph->destinations ? (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(graph->destinations, 0) : 
00
;
1423
9.58k
}
1424
1425
void ccv_nnc_symbolic_graph_add_destination(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t destination)
1426
2.69k
{
1427
2.69k
  if (!graph->destinations)
1428
0
    graph->destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
1429
2.69k
  assert(destination.graph == graph);
1430
2.69k
  ccv_array_push(graph->destinations, &destination);
1431
2.69k
}
1432
1433
void ccv_nnc_symbolic_graph_set_destinations(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
1434
2.25k
{
1435
2.25k
  if (!graph->destinations)
1436
11
    graph->destinations = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
1437
2.24k
  else
1438
2.24k
    ccv_array_clear(graph->destinations);
1439
2.25k
  int i;
1440
4.93k
  for (i = 0; i < destination_size; 
i++2.67k
)
1441
2.67k
    if (destinations[i].d >= 0)
1442
2.67k
      ccv_nnc_symbolic_graph_add_destination(graph, destinations[i]);
1443
2.25k
}
1444
1445
int ccv_nnc_symbolic_graph_destination_size(const ccv_nnc_symbolic_graph_t* const graph)
1446
9.58k
{
1447
9.58k
  return graph->destinations ? graph->destinations->rnum : 
00
;
1448
9.58k
}
1449
1450
static void _ccv_nnc_symbolic_graph_dot_exec_symbol(const int index, const ccv_nnc_graph_exec_symbol_info_t* const symbol_info, const int flags, FILE* out)
1451
1.29k
{
1452
1.29k
  if (flags == CCV_NNC_LONG_DOT_GRAPH)
1453
1.26k
    fputc('{', out);
1454
1.29k
  if (symbol_info->name)
1455
689
    fputs(symbol_info->name, out);
1456
609
  else
1457
609
    fprintf(out, "node%d", index);
1458
1.29k
  if (flags == CCV_NNC_LONG_DOT_GRAPH)
1459
1.26k
  {
1460
1.26k
    fputs("|Command: ", out);
1461
1.26k
    fputs(ccv_nnc_cmd_name(symbol_info->cmd.cmd), out);
1462
1.26k
    fputc('}', out);
1463
1.26k
  }
1464
1.29k
}
1465
1466
static void _ccv_nnc_symbolic_graph_dot_tensor_symbol(const int index, const ccv_nnc_tensor_symbol_info_t* const symbol_info, const ccv_nnc_tensor_symbol_info_t* const alias_info, const int html_like, const int flags, FILE* out)
1467
4.05k
{
1468
  // if it has an alias pointer, or, it is a long form.
1469
4.05k
  if ((flags == CCV_NNC_LONG_DOT_GRAPH || 
alias_info79
) &&
!html_like4.00k
)
1470
3.91k
    fputc('{', out);
1471
4.05k
  if (symbol_info->name)
1472
1.95k
    fputs(symbol_info->name, out);
1473
2.09k
  else
1474
2.09k
    fprintf(out, "tensor%d", index);
1475
4.05k
  if (flags == CCV_NNC_LONG_DOT_GRAPH)
1476
3.97k
  {
1477
3.97k
    int flag = -1;
1478
3.97k
    if (symbol_info->flags & CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS)
1479
16
      flag = fputs(" (0", out); // Output if it is zero init'ed.
1480
3.95k
    else if (symbol_info->flags & CCV_NNC_TENSOR_SYMBOL_INIT_ONES)
1481
8
        flag = fputs(" (1", out); // Output if it is one init'ed.
1482
3.97k
    if (symbol_info->flags & CCV_NNC_TENSOR_SYMBOL_TAPE_VAR)
1483
16
      flag = (flag >= 0) ? 
fputs(",t", out)0
: fputs(" (t", out); // Output is a tape variable
1484
3.97k
    if (CCV_TENSOR_GET_MEMORY(symbol_info->info.type) == CCV_TENSOR_GPU_MEMORY &&
1485
3.97k
      
CCV_TENSOR_GET_DEVICE1.06k
(symbol_info->info.type) != CCV_COMPUTE_DEVICE_ANY1.06k
)
1486
1.06k
      flag = (flag >= 0) ? 
fprintf(out, ",d%d", 8
CCV_TENSOR_GET_DEVICE_ID8
(symbol_info->info.type)) :
fprintf(out, " (d%d", 1.06k
CCV_TENSOR_GET_DEVICE_ID1.06k
(symbol_info->info.type));
1487
3.97k
    if (flag >= 0)
1488
1.10k
      fputs(")", out);
1489
3.97k
  }
1490
4.05k
  if (flags == CCV_NNC_LONG_DOT_GRAPH)
1491
3.97k
  {
1492
3.97k
    int i;
1493
3.97k
    if (html_like)
1494
86
      fprintf(out, "</td><td>%d", symbol_info->info.dim[0]);
1495
3.88k
    else
1496
3.88k
      fprintf(out, "|%d", symbol_info->info.dim[0]);
1497
9.11k
    for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && symbol_info->info.dim[i]; 
i++5.14k
)
1498
5.14k
      fprintf(out, "x%d", symbol_info->info.dim[i]);
1499
3.97k
  }
1500
4.05k
  if (alias_info)
1501
198
  {
1502
198
    if (html_like)
1503
0
      fputs("</td><td border=\"0\">as. ", out);
1504
198
    else
1505
198
      fputs("|as. ", out);
1506
198
    if (alias_info->name)
1507
107
      fputs(alias_info->name, out);
1508
91
    else
1509
91
      fprintf(out, "tensor%d", symbol_info->alias_ref - 1);
1510
198
    if (flags == CCV_NNC_LONG_DOT_GRAPH)
1511
166
    {
1512
166
      int flag = -1;
1513
166
      if (alias_info->flags & CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS)
1514
7
        flag = fputs(" (0", out); // Output if it is zero init'ed.
1515
159
      else if (alias_info->flags & CCV_NNC_TENSOR_SYMBOL_INIT_ONES)
1516
0
        flag = fputs(" (1", out); // Output if it is one init'ed.
1517
166
      if (alias_info->flags & CCV_NNC_TENSOR_SYMBOL_TAPE_VAR)
1518
0
        flag = (flag >= 0) ? fputs(",t", out) : fputs(" (t", out); // Output is a tape variable
1519
166
      if (CCV_TENSOR_GET_MEMORY(alias_info->info.type) == CCV_TENSOR_GPU_MEMORY &&
1520
166
        
CCV_TENSOR_GET_DEVICE12
(alias_info->info.type) != CCV_COMPUTE_DEVICE_ANY12
)
1521
12
        flag = (flag >= 0) ? 
fprintf(out, ",d%d", 0
CCV_TENSOR_GET_DEVICE_ID0
(alias_info->info.type)) : fprintf(out, " (d%d", CCV_TENSOR_GET_DEVICE_ID(alias_info->info.type));
1522
166
      if (flag >= 0)
1523
19
        fputs(")", out);
1524
166
    }
1525
198
  }
1526
4.05k
  if ((flags == CCV_NNC_LONG_DOT_GRAPH || 
alias_info79
) &&
!html_like4.00k
)
1527
3.91k
    fputc('}', out);
1528
4.05k
}
1529
1530
static void _ccv_nnc_symbolic_graph_dot_node(const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const int index, const ccv_array_t* const tensor_symbol_info, const int flags, FILE* out)
1531
1.29k
{
1532
1.29k
  fprintf(out, "node%d [shape=record,label=\"", index);
1533
1.29k
  _ccv_nnc_symbolic_graph_dot_exec_symbol(index, exec_symbol_info, flags, out);
1534
1.29k
  int i;
1535
1.29k
  if (exec_symbol_info->input_size > 0)
1536
1.22k
  {
1537
1.22k
    fputs("|{Input", out);
1538
4.16k
    for (i = 0; i < exec_symbol_info->input_size; 
i++2.94k
)
1539
2.94k
    {
1540
2.94k
      if (exec_symbol_info->inputs[i] >= 0)
1541
2.39k
      {
1542
2.39k
        fputc('|', out);
1543
2.39k
        const ccv_nnc_tensor_symbol_info_t* const tensor_symbol = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(tensor_symbol_info, exec_symbol_info->inputs[i]);
1544
2.39k
        const ccv_nnc_tensor_symbol_info_t* const alias_symbol = tensor_symbol->alias_ref ? 
(ccv_nnc_tensor_symbol_info_t*)121
ccv_array_get121
(tensor_symbol_info, tensor_symbol->alias_ref - 1) :
02.26k
;
1545
2.39k
        _ccv_nnc_symbolic_graph_dot_tensor_symbol(exec_symbol_info->inputs[i], tensor_symbol, alias_symbol, 0, flags, out);
1546
2.39k
      } else
1547
552
        fputs("|-", out);
1548
2.94k
    }
1549
1.22k
    fputc('}', out);
1550
1.22k
  }
1551
1.29k
  if (exec_symbol_info->output_size > 0)
1552
1.27k
  {
1553
1.27k
    fputs("|{Output", out);
1554
2.93k
    for (i = 0; i < exec_symbol_info->output_size; 
i++1.65k
)
1555
1.65k
    {
1556
1.65k
      if (exec_symbol_info->outputs[i] >= 0)
1557
1.57k
      {
1558
1.57k
        fputc('|', out);
1559
1.57k
        const ccv_nnc_tensor_symbol_info_t* const tensor_symbol = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(tensor_symbol_info, exec_symbol_info->outputs[i]);
1560
1.57k
        const ccv_nnc_tensor_symbol_info_t* const alias_symbol = tensor_symbol->alias_ref ? 
(ccv_nnc_tensor_symbol_info_t*)77
ccv_array_get77
(tensor_symbol_info, tensor_symbol->alias_ref - 1) :
01.49k
;
1561
1.57k
        _ccv_nnc_symbolic_graph_dot_tensor_symbol(exec_symbol_info->outputs[i], tensor_symbol, alias_symbol, 0, flags, out);
1562
1.57k
      } else
1563
82
        fputs("|-", out);
1564
1.65k
    }
1565
1.27k
    fputc('}', out);
1566
1.27k
  }
1567
1.29k
  fputs("\"];\n", out);
1568
1.29k
}
1569
1570
static void _ccv_nnc_symbolic_graph_dot_while_label(const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const int index, const ccv_array_t* const tensor_symbol_info, const ccv_nnc_symbolic_graph_t* const while_graph, const int flags, FILE* out)
1571
21
{
1572
21
  int i;
1573
21
  if (flags == CCV_NNC_LONG_DOT_GRAPH)
1574
21
    fputs("<table border=\"0\" cellborder=\"1\" cellspacing=\"0\"><tr><td colspan=\"3\" border=\"0\"><b>", out);
1575
0
  else
1576
0
    fputs("<table border=\"0\" cellborder=\"1\" cellspacing=\"0\"><tr><td colspan=\"2\" border=\"0\"><b>", out);
1577
21
  if (exec_symbol_info->name)
1578
21
    fputs(exec_symbol_info->name, out);
1579
0
  else
1580
0
    fprintf(out, "while%d", index);
1581
21
  fputs(" </b>Command: ", out);
1582
21
  fputs(ccv_nnc_cmd_name(exec_symbol_info->cmd.cmd), out);
1583
21
  fputs("</td></tr>", out);
1584
21
  const int p_idx = while_graph->p_idx - 1;
1585
21
  assert(p_idx >= 0);
1586
21
  if (exec_symbol_info->input_size > 0)
1587
16
  {
1588
16
    fprintf(out, "<tr><td rowspan=\"%d\">Input</td>", exec_symbol_info->input_size);
1589
39
    for (i = 0; i < exec_symbol_info->input_size; 
i++23
)
1590
23
    {
1591
23
      if (i > 0)
1592
7
        fputs("<tr>", out);
1593
23
      if (exec_symbol_info->inputs[i] >= 0)
1594
23
      {
1595
23
        fputs("<td>", out);
1596
23
        const ccv_nnc_tensor_symbol_info_t* const tensor_symbol = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(tensor_symbol_info, exec_symbol_info->inputs[i]);
1597
23
        const ccv_nnc_tensor_symbol_info_t* const alias_symbol = tensor_symbol->alias_ref ? 
(ccv_nnc_tensor_symbol_info_t*)0
ccv_array_get0
(tensor_symbol_info, tensor_symbol->alias_ref - 1) : 0;
1598
23
        _ccv_nnc_symbolic_graph_dot_tensor_symbol(exec_symbol_info->inputs[i], tensor_symbol, alias_symbol, 1, flags, out);
1599
23
        fputs("</td><td border=\"0\">=&gt; ", out);
1600
23
        const int s_idx = *(int*)ccv_array_get(tensor_symbol->s_ref, p_idx) - 1;
1601
23
        assert(s_idx >= 0);
1602
23
        const ccv_nnc_tensor_symbol_info_t* const sub_tensor_symbol = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(while_graph->tensor_symbol_info, s_idx);
1603
23
        if (sub_tensor_symbol->name)
1604
21
          fputs(sub_tensor_symbol->name, out);
1605
2
        else
1606
2
          fprintf(out, "tensor%d", s_idx);
1607
23
        fputs("</td></tr>", out);
1608
23
      } else {
1609
0
        if (flags == CCV_NNC_LONG_DOT_GRAPH)
1610
0
          fputs("<td colspan=\"3\">-</td></tr>", out);
1611
0
        else
1612
0
          fputs("<td colspan=\"2\">-</td></tr>", out);
1613
0
      }
1614
23
    }
1615
16
  }
1616
21
  if (exec_symbol_info->output_size > 0)
1617
15
  {
1618
15
    fprintf(out, "<tr><td rowspan=\"%d\">Output</td>", exec_symbol_info->output_size);
1619
38
    for (i = 0; i < exec_symbol_info->output_size; 
i++23
)
1620
23
    {
1621
23
      if (i > 0)
1622
8
        fputs("<tr>", out);
1623
23
      if (exec_symbol_info->outputs[i] >= 0)
1624
23
      {
1625
23
        fputs("<td>", out);
1626
23
        ccv_nnc_tensor_symbol_info_t* tensor_symbol = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(tensor_symbol_info, exec_symbol_info->outputs[i]);
1627
23
        ccv_nnc_tensor_symbol_info_t* alias_symbol = tensor_symbol->alias_ref ? 
(ccv_nnc_tensor_symbol_info_t*)0
ccv_array_get0
(tensor_symbol_info, tensor_symbol->alias_ref - 1) : 0;
1628
23
        _ccv_nnc_symbolic_graph_dot_tensor_symbol(exec_symbol_info->outputs[i], tensor_symbol, alias_symbol, 1, flags, out);
1629
23
        fputs("</td><td border=\"0\">=&gt; ", out);
1630
23
        const int s_idx = *(int*)ccv_array_get(tensor_symbol->s_ref, p_idx) - 1;
1631
23
        assert(s_idx >= 0);
1632
23
        const ccv_nnc_tensor_symbol_info_t* const sub_tensor_symbol = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(while_graph->tensor_symbol_info, s_idx);
1633
23
        if (sub_tensor_symbol->name)
1634
22
          fputs(sub_tensor_symbol->name, out);
1635
1
        else
1636
1
          fprintf(out, "tensor%d", s_idx);
1637
23
        fputs("</td></tr>", out);
1638
23
      } else {
1639
0
        if (flags == CCV_NNC_LONG_DOT_GRAPH)
1640
0
          fputs("<td colspan=\"3\">-</td></tr>", out);
1641
0
        else
1642
0
          fputs("<td colspan=\"2\">-</td></tr>", out);
1643
0
      }
1644
23
    }
1645
15
  }
1646
127
  
for (i = 0; 21
i < while_graph->tensor_symbol_info->rnum;
i++106
)
1647
106
  {
1648
106
    const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(while_graph->tensor_symbol_info, i);
1649
106
    if (tensor_symbol_info->assign_ref)
1650
24
    {
1651
24
      if (flags == CCV_NNC_LONG_DOT_GRAPH)
1652
24
        fputs("<tr><td colspan=\"3\" border=\"0\">", out);
1653
0
      else
1654
0
        fputs("<tr><td colspan=\"2\" border=\"0\">", out);
1655
24
      const ccv_nnc_tensor_symbol_info_t* const assign_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(while_graph->tensor_symbol_info, tensor_symbol_info->assign_ref - 1);
1656
24
      if (assign_symbol_info->name)
1657
22
        fputs(assign_symbol_info->name, out);
1658
2
      else
1659
2
        fprintf(out, "tensor%d", tensor_symbol_info->assign_ref - 1);
1660
24
      fputs(" -&gt; ", out);
1661
24
      if (tensor_symbol_info->name)
1662
22
        fputs(tensor_symbol_info->name, out);
1663
2
      else
1664
2
        fprintf(out, "tensor%d", i);
1665
24
      fputs("</td></tr>", out);
1666
24
    }
1667
106
  }
1668
21
  fputs("</table>", out);
1669
21
}
1670
1671
static void _ccv_nnc_symbolic_graph_dot_case_of_label(const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const int index, const ccv_array_t* const tensor_symbol_info, const int flags, FILE* out)
1672
11
{
1673
11
  int i;
1674
11
  if (flags == CCV_NNC_LONG_DOT_GRAPH)
1675
11
    fputs("<table border=\"0\" cellborder=\"1\" cellspacing=\"0\"><tr><td colspan=\"3\" border=\"0\"><b>", out);
1676
0
  else
1677
0
    fputs("<table border=\"0\" cellborder=\"1\" cellspacing=\"0\"><tr><td colspan=\"2\" border=\"0\"><b>", out);
1678
11
  if (exec_symbol_info->name)
1679
11
    fputs(exec_symbol_info->name, out);
1680
0
  else
1681
0
    fprintf(out, "caseof%d", index);
1682
11
  fputs(" </b>Command: ", out);
1683
11
  fputs(ccv_nnc_cmd_name(exec_symbol_info->cmd.cmd), out);
1684
11
  fputs("</td></tr>", out);
1685
11
  if (exec_symbol_info->input_size > 0)
1686
11
  {
1687
11
    fprintf(out, "<tr><td rowspan=\"%d\">Input</td>", exec_symbol_info->input_size);
1688
38
    for (i = 0; i < exec_symbol_info->input_size; 
i++27
)
1689
27
    {
1690
27
      if (i > 0)
1691
16
        fputs("<tr>", out);
1692
27
      if (exec_symbol_info->inputs[i] >= 0)
1693
27
      {
1694
27
        fputs("<td>", out);
1695
27
        const ccv_nnc_tensor_symbol_info_t* const tensor_symbol = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(tensor_symbol_info, exec_symbol_info->inputs[i]);
1696
27
        const ccv_nnc_tensor_symbol_info_t* const alias_symbol = tensor_symbol->alias_ref ? 
(ccv_nnc_tensor_symbol_info_t*)0
ccv_array_get0
(tensor_symbol_info, tensor_symbol->alias_ref - 1) : 0;
1697
27
        _ccv_nnc_symbolic_graph_dot_tensor_symbol(exec_symbol_info->inputs[i], tensor_symbol, alias_symbol, 1, flags, out);
1698
27
        fputs("</td></tr>", out);
1699
27
      } else {
1700
0
        if (flags == CCV_NNC_LONG_DOT_GRAPH)
1701
0
          fputs("<td colspan=\"2\">-</td></tr>", out);
1702
0
        else
1703
0
          fputs("<td colspan=\"1\">-</td></tr>", out);
1704
0
      }
1705
27
    }
1706
11
  }
1707
11
  if (exec_symbol_info->output_size > 0)
1708
11
  {
1709
11
    fprintf(out, "<tr><td rowspan=\"%d\">Output</td>", exec_symbol_info->output_size);
1710
24
    for (i = 0; i < exec_symbol_info->output_size; 
i++13
)
1711
13
    {
1712
13
      if (i > 0)
1713
2
        fputs("<tr>", out);
1714
13
      if (exec_symbol_info->outputs[i] >= 0)
1715
13
      {
1716
13
        fputs("<td>", out);
1717
13
        ccv_nnc_tensor_symbol_info_t* tensor_symbol = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(tensor_symbol_info, exec_symbol_info->outputs[i]);
1718
13
        ccv_nnc_tensor_symbol_info_t* alias_symbol = tensor_symbol->alias_ref ? 
(ccv_nnc_tensor_symbol_info_t*)0
ccv_array_get0
(tensor_symbol_info, tensor_symbol->alias_ref - 1) : 0;
1719
13
        _ccv_nnc_symbolic_graph_dot_tensor_symbol(exec_symbol_info->outputs[i], tensor_symbol, alias_symbol, 1, flags, out);
1720
13
        fputs("</td></tr>", out);
1721
13
      } else {
1722
0
        if (flags == CCV_NNC_LONG_DOT_GRAPH)
1723
0
          fputs("<td colspan=\"2\">-</td></tr>", out);
1724
0
        else
1725
0
          fputs("<td colspan=\"1\">-</td></tr>", out);
1726
0
      }
1727
13
    }
1728
11
  }
1729
11
  fputs("</table>", out);
1730
11
}
1731
1732
static void _ccv_nnc_symbolic_graph_dot_sub_graphs(const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info, const ccv_array_t* const tensor_symbol_info, const ccv_array_t* const sub_graphs, const int flags, FILE* out, int* c)
1733
32
{
1734
32
  int i, j, k;
1735
  // Output this node info within this subgraph.
1736
32
  if (exec_symbol_info->flags & CCV_NNC_GRAPH_EXEC_P_WHILE)
1737
21
  {
1738
21
    fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\nlabel=<", *c, *c);
1739
21
    const ccv_nnc_symbolic_graph_t* const while_graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_symbol_info)[0] - 1);
1740
21
    _ccv_nnc_symbolic_graph_dot_while_label(exec_symbol_info, *c, tensor_symbol_info, while_graph, flags, out);
1741
21
  } else 
if (11
exec_symbol_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF11
) {
1742
11
    fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\nlabel=<", *c, *c);
1743
11
    _ccv_nnc_symbolic_graph_dot_case_of_label(exec_symbol_info, *c, tensor_symbol_info, flags, out);
1744
11
  }
1745
32
  fputs(">;\n", out);
1746
32
  ++(*c);
1747
81
  for (k = 0; k < exec_symbol_info->graph_ref_size; 
k++49
)
1748
49
  {
1749
49
    if (exec_symbol_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
1750
28
    {
1751
28
      fprintf(out, "subgraph cluster%d {\nstyle=\"rounded\";\nnode%d [style=invisible];\nlabel=\"\"\n", *c, *c);
1752
28
      ++(*c);
1753
28
    }
1754
49
    const ccv_nnc_symbolic_graph_t* const graph = *(ccv_nnc_symbolic_graph_t**)ccv_array_get(sub_graphs, CCV_NNC_GRAPH_REF(exec_symbol_info)[k] - 1);
1755
49
    int* node_id = (int*)ccmalloc(sizeof(int) * graph->exec_symbol_info->rnum);
1756
144
    for (i = 0; i < graph->exec_symbol_info->rnum; 
i++95
)
1757
95
    {
1758
95
      node_id[i] = *c;
1759
95
      const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, i);
1760
      // Skip the dead one.
1761
95
      if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_symbol_info->flags))
1762
2
        continue;
1763
93
      if (exec_symbol_info->graph_ref_size)
1764
3
        _ccv_nnc_symbolic_graph_dot_sub_graphs(exec_symbol_info, graph->tensor_symbol_info, graph->sub_graphs, flags, out, c);
1765
90
      else {
1766
90
        _ccv_nnc_symbolic_graph_dot_node(exec_symbol_info, *c, graph->tensor_symbol_info, flags, out);
1767
90
        ++(*c);
1768
90
      }
1769
93
    }
1770
    // Output connections.
1771
144
    for (i = 0; i < graph->exec_symbol_info->rnum; 
i++95
)
1772
95
    {
1773
95
      const ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, i);
1774
      // Skip the dead one.
1775
95
      if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_symbol_info->flags))
1776
2
        continue;
1777
93
      if (exec_symbol_info->outgoings)
1778
90
        
for (j = 0; 45
j < exec_symbol_info->outgoings->rnum;
j++45
)
1779
45
        {
1780
45
          const int outgoing_idx = *(int*)ccv_array_get(exec_symbol_info->outgoings, j);
1781
45
          const ccv_nnc_graph_exec_symbol_info_t* const outgoing_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, outgoing_idx);
1782
          // If both are sub-graphs, have both tail and head specified.
1783
45
          if (CCV_NNC_GRAPH_REF(exec_symbol_info)[0] && 
CCV_NNC_GRAPH_REF1
(outgoing_symbol_info)[0]1
)
1784
0
            fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1785
45
          else if (CCV_NNC_GRAPH_REF(exec_symbol_info)[0] && 
!1
CCV_NNC_GRAPH_REF1
(outgoing_symbol_info)[0])
1786
1
            fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1787
44
          else if (!CCV_NNC_GRAPH_REF(exec_symbol_info)[0] && CCV_NNC_GRAPH_REF(outgoing_symbol_info)[0])
1788
3
            fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1789
41
          else
1790
41
            fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1791
45
        }
1792
93
    }
1793
49
    fputs("}\n", out);
1794
49
    ccfree(node_id);
1795
49
  }
1796
  // Extra subgraph cluster.
1797
32
  if (exec_symbol_info->flags & CCV_NNC_GRAPH_EXEC_CASE_OF)
1798
11
    fputs("}\n", out);
1799
32
}
1800
1801
void ccv_nnc_symbolic_graph_dot(const ccv_nnc_symbolic_graph_t* const graph, const int flags, FILE* out)
1802
647
{
1803
647
  fputs("digraph G {\ncompound=true;\n", out);
1804
647
  int i, j;
1805
647
  int c = 0;
1806
647
  int* node_id = (int*)ccmalloc(sizeof(int) * graph->exec_symbol_info->rnum);
1807
  // Output styles.
1808
2.31k
  for (i = 0; i < graph->exec_symbol_info->rnum; 
i++1.67k
)
1809
1.67k
  {
1810
1.67k
    node_id[i] = c;
1811
1.67k
    const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, i);
1812
    // Skip the dead one.
1813
1.67k
    if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_symbol_info->flags))
1814
433
      continue;
1815
1.23k
    if (exec_symbol_info->graph_ref_size)
1816
29
      _ccv_nnc_symbolic_graph_dot_sub_graphs(exec_symbol_info, graph->tensor_symbol_info, graph->sub_graphs, flags, out, &c);
1817
1.20k
    else {
1818
1.20k
      _ccv_nnc_symbolic_graph_dot_node(exec_symbol_info, c, graph->tensor_symbol_info, flags, out);
1819
1.20k
      ++c;
1820
1.20k
    }
1821
1.23k
  }
1822
  // Output connections.
1823
2.31k
  for (i = 0; i < graph->exec_symbol_info->rnum; 
i++1.67k
)
1824
1.67k
  {
1825
1.67k
    const ccv_nnc_graph_exec_symbol_info_t* exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, i);
1826
    // Skip the dead one.
1827
1.67k
    if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_symbol_info->flags))
1828
433
      continue;
1829
1.23k
    if (exec_symbol_info->outgoings)
1830
2.32k
      
for (j = 0; 940
j < exec_symbol_info->outgoings->rnum;
j++1.38k
)
1831
1.38k
      {
1832
1.38k
        const int outgoing_idx = *(int*)ccv_array_get(exec_symbol_info->outgoings, j);
1833
1.38k
        const ccv_nnc_graph_exec_symbol_info_t* const outgoing_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, outgoing_idx);
1834
        // If both are sub-graphs, have both tail and head specified.
1835
1.38k
        if (exec_symbol_info->graph_ref_size && 
outgoing_symbol_info->graph_ref_size16
)
1836
2
          fprintf(out, "node%d -> node%d [ltail=cluster%d,lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i], node_id[outgoing_idx]);
1837
1.38k
        else if (exec_symbol_info->graph_ref_size && 
!outgoing_symbol_info->graph_ref_size14
)
1838
14
          fprintf(out, "node%d -> node%d [ltail=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[i]);
1839
1.37k
        else if (!exec_symbol_info->graph_ref_size && outgoing_symbol_info->graph_ref_size)
1840
4
          fprintf(out, "node%d -> node%d [lhead=cluster%d];\n", node_id[i], node_id[outgoing_idx], node_id[outgoing_idx]);
1841
1.36k
        else
1842
1.36k
          fprintf(out, "node%d -> node%d;\n", node_id[i], node_id[outgoing_idx]);
1843
1.38k
      }
1844
1.23k
  }
1845
647
  fputs("}\n", out);
1846
647
  ccfree(node_id);
1847
647
}
1848
1849
void ccv_nnc_symbolic_graph_format(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context)
1850
2
{
1851
2
  assert((sources && source_size) || (!sources && !source_size));
1852
2
  const ccv_nnc_graph_exec_symbol_t* const graph_sources = sources ? 
sources1
:
(ccv_nnc_graph_exec_symbol_t*)1
ccv_array_get1
(graph->sources, 0);
1853
2
  const int graph_source_size = source_size ? 
source_size1
:
graph->sources->rnum1
;
1854
2
  assert((destinations && destination_size) || (!destinations && !destination_size));
1855
2
  const ccv_nnc_graph_exec_symbol_t* const graph_destinations = destinations ? 
destinations1
:
(ccv_nnc_graph_exec_symbol_t*)1
ccv_array_get1
(graph->destinations, 0);
1856
2
  const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0);
1857
2
  const int graph_destination_size = destination_size ? 
destination_size1
:
graph->destinations->rnum1
;
1858
4
  ccv_nnc_graph_visit_t* const visit = 
ccv_nnc_graph_visit_new2
(graph, exec_symbol_info, graph->exec_symbol_info->rnum, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0);
1859
0
  int outgoing_edge_count = 0;
1860
11
  ccv_nnc_graph_visit_for(visit, exec_symbol_info, node) {
1861
11
    outgoing_edge_count += node->outgoings ? 
node->outgoings->rnum8
:
03
;
1862
11
  } ccv_nnc_graph_visit_endfor
1863
4
  int* const incoming_counts = (int*)
ccmalloc2
(sizeof(int) * (graph->exec_symbol_info->rnum * 2 + outgoing_edge_count));
1864
4
  memset(incoming_counts, 0, sizeof(int) * graph->exec_symbol_info->rnum);
1865
4
  int i;
1866
11
  ccv_nnc_graph_visit_for(visit, exec_symbol_info, node) {
1867
11
    if (CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags))
1868
0
      continue;
1869
11
    if (node->outgoings && 
node->outgoings->rnum8
) {
1870
25
      for (i = 0; i < node->outgoings->rnum; 
i++17
)
1871
17
        ++incoming_counts[*(int*)ccv_array_get(node->outgoings, i)];
1872
8
    }
1873
11
  } ccv_nnc_graph_visit_endfor
1874
4
  int* const incoming_offsets = incoming_counts + graph->exec_symbol_info->rnum;
1875
4
  int incoming_edge_count = 0;
1876
11
  ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx) {
1877
11
    if (CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags))
1878
0
      continue;
1879
11
    incoming_offsets[idx] = incoming_edge_count;
1880
11
    incoming_edge_count += incoming_counts[idx];
1881
11
  } ccv_nnc_graph_visit_endfor
1882
4
  assert(incoming_edge_count <= outgoing_edge_count);
1883
2
  memset(incoming_counts, 0, sizeof(int) * graph->exec_symbol_info->rnum);
1884
2
  int* const incoming_edges = incoming_offsets + graph->exec_symbol_info->rnum;
1885
11
  ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx) {
1886
11
    if (CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags))
1887
0
      continue;
1888
11
    if (node->outgoings && 
node->outgoings->rnum8
) {
1889
25
      for (i = 0; i < node->outgoings->rnum; 
i++17
)
1890
17
      {
1891
17
        const int d = *(int*)ccv_array_get(node->outgoings, i);
1892
17
        incoming_edges[incoming_offsets[d] + incoming_counts[d]] = idx;
1893
17
        ++incoming_counts[d];
1894
17
      }
1895
8
    }
1896
11
  } ccv_nnc_graph_visit_endfor
1897
11
  ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx) {
1898
11
    if (CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags))
1899
0
      continue;
1900
11
    format_fn(graph, idx, node->name, node->cmd, node->flags, incoming_edges + incoming_offsets[idx], incoming_counts[idx], node->outgoings ? 
(int*)8
ccv_array_get8
(node->outgoings, 0) :
03
, node->outgoings ?
node->outgoings->rnum8
:
03
, node->inputs, node->input_size, node->outputs, node->output_size, context);
1901
11
  } ccv_nnc_graph_visit_endfor
1902
2
  ccv_nnc_graph_visit_free(visit);
1903
2
  ccfree(incoming_counts);
1904
2
}
1905
1906
void ccv_nnc_symbolic_graph_free(ccv_nnc_symbolic_graph_t* const graph)
1907
2.66k
{
1908
2.66k
  int i;
1909
15.9k
  for (i = 0; i < graph->exec_symbol_info->rnum; 
i++13.2k
)
1910
13.2k
    _ccv_nnc_graph_exec_symbol_free((ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, i), 0);
1911
38.9k
  for (i = 0; i < graph->tensor_symbol_info->rnum; 
i++36.2k
)
1912
36.2k
  {
1913
36.2k
    ccv_nnc_tensor_symbol_info_t* const symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, i);
1914
36.2k
    if (symbol_info->name)
1915
4.93k
      ccfree(symbol_info->name);
1916
36.2k
    if (symbol_info->s_ref)
1917
74
      ccv_array_free(symbol_info->s_ref);
1918
36.2k
  }
1919
2.66k
  if (graph->sub_graphs)
1920
29
  {
1921
80
    for (i = 0; i < graph->sub_graphs->rnum; 
i++51
)
1922
51
      ccv_nnc_symbolic_graph_free(*(ccv_nnc_symbolic_graph_t**)ccv_array_get(graph->sub_graphs, i));
1923
29
    ccv_array_free(graph->sub_graphs);
1924
29
  }
1925
2.66k
  if (graph->sources)
1926
2.58k
    ccv_array_free(graph->sources);
1927
2.66k
  if (graph->destinations)
1928
2.58k
    ccv_array_free(graph->destinations);
1929
2.66k
  if (graph->breakpoints)
1930
33
    ccfree(graph->breakpoints);
1931
2.66k
  ccv_array_free(graph->tensor_symbol_info);
1932
2.66k
  ccv_array_free(graph->exec_symbol_info);
1933
2.66k
  if (graph->backward.tensor_symbol_idx)
1934
2.37k
    ccfree(graph->backward.tensor_symbol_idx);
1935
2.66k
  if (graph->data_parallel.tensor_symbol_idx)
1936
17
    ccfree(graph->data_parallel.tensor_symbol_idx);
1937
2.66k
  if (graph->data_parallel.exec_symbol_idx)
1938
17
    ccfree(graph->data_parallel.exec_symbol_idx);
1939
2.66k
  ccfree(graph);
1940
2.66k
}
1941
1942
void ccv_nnc_symbolic_graph_symbol_infer(const ccv_nnc_symbolic_graph_t* const symbolic_graph, const ccv_nnc_graph_visit_t* const visit, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, const ccv_nnc_tensor_symbol_info_t* const p_tensor_symbol_info, const int p_tensor_symbol_info_size, ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info, ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info)
1943
26.5k
{
1944
26.5k
  if (ccv_array_get(symbolic_graph->tensor_symbol_info, 0) != tensor_symbol_info)
1945
17.6k
    memcpy(tensor_symbol_info, ccv_array_get(symbolic_graph->tensor_symbol_info, 0), sizeof(ccv_nnc_tensor_symbol_info_t) * symbolic_graph->tensor_symbol_info->rnum);
1946
26.5k
  if (ccv_array_get(symbolic_graph->exec_symbol_info, 0) != exec_symbol_info)
1947
17.6k
    memcpy(exec_symbol_info, ccv_array_get(symbolic_graph->exec_symbol_info, 0), sizeof(ccv_nnc_graph_exec_symbol_info_t) * symbolic_graph->exec_symbol_info->rnum);
1948
26.5k
  int i;
1949
26.5k
  if (p_tensor_symbol_info)
1950
417
    
for (i = 0; 64
i < symbolic_graph->tensor_symbol_info->rnum;
i++353
)
1951
353
      if (tensor_symbol_info[i].p_ref)
1952
132
      {
1953
132
        const int p_ref = tensor_symbol_info[i].p_ref - 1;
1954
132
        assert(p_ref < p_tensor_symbol_info_size);
1955
132
        tensor_symbol_info[i].info = p_tensor_symbol_info[p_ref].info;
1956
        // I don't need to copy over stride and ofs for alias.
1957
132
      }
1958
26.5k
  int max_input_size = 0, max_output_size = 0;
1959
  // Materialize auto hints.
1960
169k
  for (i = 0; i < symbolic_graph->exec_symbol_info->rnum; 
i++143k
)
1961
143k
  {
1962
143k
    if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_symbol_info[i].flags))
1963
24
      continue;
1964
143k
    max_input_size = ccv_max(max_input_size, exec_symbol_info[i].input_size);
1965
143k
    max_output_size = ccv_max(max_output_size, exec_symbol_info[i].output_size);
1966
    // If there is no hint and we have input and output tensor specified.
1967
143k
    if (ccv_nnc_is_no_hint(exec_symbol_info[i].hint) &&
1968
143k
      
exec_symbol_info[i].input_size > 0121k
&&
exec_symbol_info[i].inputs[0] >= 0117k
&&
!ccv_nnc_is_tensor_auto(tensor_symbol_info[exec_symbol_info[i].inputs[0]].info)116k
&&
1969
143k
      
exec_symbol_info[i].output_size > 0116k
&&
exec_symbol_info[i].outputs[0] >= 0116k
&&
!ccv_nnc_is_tensor_auto(tensor_symbol_info[exec_symbol_info[i].outputs[0]].info)112k
)
1970
112k
      exec_symbol_info[i].hint = ccv_nnc_hint_auto(exec_symbol_info[i].cmd.info, tensor_symbol_info[exec_symbol_info[i].inputs[0]].info, tensor_symbol_info[exec_symbol_info[i].outputs[0]].info);
1971
143k
  }
1972
1973
26.5k
  ccv_nnc_tensor_param_t input_params[ccv_max(1, max_input_size)];
1974
26.5k
  ccv_nnc_tensor_param_t output_params[ccv_max(1, max_output_size)];
1975
1976
  // Materialize auto tensors. This need to go with the topological order.
1977
  // TODO: Need to proper handle sub-graphs (thus, run sub-graph to figure out the tensor properties).
1978
123k
  ccv_nnc_graph_visit_for(visit, exec_symbol_info, node) {
1979
123k
    if (node->input_size > 0 && 
node->output_size > 0118k
)
1980
118k
    {
1981
465k
      for (i = 0; i < node->input_size; 
i++346k
)
1982
346k
        input_params[i] = node->inputs[i] >= 0 ? 
tensor_symbol_info[node->inputs[i]].info273k
:
ccv_nnc_tensor_auto73.1k
;
1983
      // output_params will be initialized to tensor_auto inside the ccv_nnc_hint_tensor_auto method.
1984
118k
      ccv_nnc_hint_tensor_auto(node->cmd, input_params, node->input_size, node->hint, output_params, node->output_size);
1985
305k
      for (i = 0; i < node->output_size; 
i++186k
)
1986
        /* Only assign the output parameters if the symbol itself is auto. */
1987
186k
        if (node->outputs[i] >= 0 && 
ccv_nnc_is_tensor_auto(tensor_symbol_info[node->outputs[i]].info)174k
)
1988
100
          tensor_symbol_info[node->outputs[i]].info = output_params[i];
1989
118k
    }
1990
123k
  } ccv_nnc_graph_visit_endfor
1991
  // If still point to any device, assign default device 00 to it.
1992
359k
  for (i = 0; i < symbolic_graph->tensor_symbol_info->rnum; 
i++332k
)
1993
332k
    if (CCV_TENSOR_GET_DEVICE(tensor_symbol_info[i].info.type) == CCV_COMPUTE_DEVICE_ANY)
1994
129k
      tensor_symbol_info[i].info.type = (~CCV_COMPUTE_DEVICE_ANY & tensor_symbol_info[i].info.type) | CCV_COMPUTE_DEVICE_000;
1995
26.5k
}
1996
1997
void ccv_nnc_symbolic_graph_tensor_auto(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
1998
8.85k
{
1999
8.85k
  assert((sources && source_size) || (!sources && !source_size));
2000
8.85k
  const ccv_nnc_graph_exec_symbol_t* const graph_sources = sources ? 
sources0
: (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(graph->sources, 0);
2001
8.85k
  const int graph_source_size = source_size ? 
source_size0
: graph->sources->rnum;
2002
8.85k
  assert((destinations && destination_size) || (!destinations && !destination_size));
2003
8.85k
  const ccv_nnc_graph_exec_symbol_t* const graph_destinations = destinations ? 
destinations0
: (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(graph->destinations, 0);
2004
8.85k
  const int graph_destination_size = destination_size ? 
destination_size0
: graph->destinations->rnum;
2005
17.7k
  ccv_nnc_graph_visit_t* const visit = 
ccv_nnc_graph_visit_new8.85k
(graph, (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0), graph->exec_symbol_info->rnum, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0);
2006
8.85k
  ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, 0);
2007
  // Some more clever things we can do here:
2008
  // 1. If there is a backward symbol for it, copy over the parameters.
2009
17.7k
  int i;
2010
67.9k
  for (i = 0; i < graph->backward.tensor_symbol_size; 
i++59.0k
)
2011
59.0k
  {
2012
59.0k
    const int d = graph->backward.tensor_symbol_idx[i];
2013
59.0k
    if (d >= 0)
2014
34.0k
    {
2015
34.0k
      tensor_symbol_info[d].info = tensor_symbol_info[i].info;
2016
34.0k
      memcpy(tensor_symbol_info[d].stride, tensor_symbol_info[i].stride, sizeof(tensor_symbol_info[i].stride));
2017
34.0k
      memcpy(tensor_symbol_info[d].ofs, tensor_symbol_info[i].ofs, sizeof(tensor_symbol_info[i].ofs));
2018
34.0k
    }
2019
59.0k
  }
2020
17.7k
  ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)
ccv_array_get8.85k
(graph->exec_symbol_info, 0);
2021
  // 2. If there is a copy (because the data parallel setting), copy over the info.
2022
17.7k
  const int parallel_count = graph->data_parallel.count;
2023
17.7k
  if (
parallel_count > 18.85k
)
2024
16
  {
2025
16
    int j;
2026
15.8k
    for (i = 0; i < graph->data_parallel.tensor_symbol_size; 
i++15.8k
)
2027
15.8k
    {
2028
15.8k
      const int device_id = CCV_TENSOR_GET_DEVICE_ID(tensor_symbol_info[i].info.type);
2029
63.2k
      for (j = 0; j < parallel_count - 1; 
j++47.4k
)
2030
47.4k
      {
2031
47.4k
        const int d = graph->data_parallel.tensor_symbol_idx[i * (parallel_count - 1) + j];
2032
47.4k
        if (d >= 0)
2033
17.4k
        {
2034
17.4k
          tensor_symbol_info[d].info = tensor_symbol_info[i].info;
2035
17.4k
          if (j + 1 != device_id)
2036
17.4k
            CCV_TENSOR_SET_DEVICE_ID(tensor_symbol_info[d].info.type, j + 1); // Set the device id.
2037
0
          else
2038
0
            CCV_TENSOR_SET_DEVICE_ID(tensor_symbol_info[d].info.type, 0);
2039
17.4k
          memcpy(tensor_symbol_info[d].stride, tensor_symbol_info[i].stride, sizeof(tensor_symbol_info[i].stride));
2040
17.4k
          memcpy(tensor_symbol_info[d].ofs, tensor_symbol_info[i].ofs, sizeof(tensor_symbol_info[i].ofs));
2041
17.4k
        }
2042
47.4k
      }
2043
15.8k
    }
2044
2.16k
    for (i = 0; i < graph->data_parallel.exec_symbol_size; 
i++2.14k
)
2045
8.57k
      
for (j = 0; 2.14k
j < parallel_count - 1;
j++6.43k
)
2046
6.43k
      {
2047
6.43k
        const int d = graph->data_parallel.exec_symbol_idx[i * (parallel_count - 1) + j];
2048
6.43k
        if (d >= 0)
2049
6.43k
          exec_symbol_info[d].cmd = exec_symbol_info[i].cmd;
2050
6.43k
      }
2051
16
  }
2052
17.7k
  ccv_nnc_symbolic_graph_symbol_infer(graph, visit, graph_sources, graph_source_size, graph_destinations, graph_destination_size, 0, 0, tensor_symbol_info, exec_symbol_info);
2053
17.7k
  ccv_nnc_graph_visit_free(visit);
2054
17.7k
}
2055
2056
void ccv_nnc_symbolic_graph_sources_to_destinations(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, uint64_t* const bitmask)
2057
13
{
2058
13
  assert(sources && source_size);
2059
13
  assert(destinations && destination_size);
2060
13
  int i;
2061
45
  for (i = 0; i < source_size; 
i++32
)
2062
32
  {
2063
32
    assert(sources[i].graph == graph);
2064
32
    assert(sources[i].d >= 0 && sources[i].d < graph->exec_symbol_info->rnum);
2065
32
  }
2066
26
  
for (i = 0; 13
i < destination_size;
i++13
)
2067
13
  {
2068
13
    assert(destinations[i].graph == graph);
2069
13
    assert(destinations[i].d >= 0 && destinations[i].d < graph->exec_symbol_info->rnum);
2070
13
  }
2071
13
  ccv_sparse_matrix_t* const exec_dep = ccv_sparse_matrix_new(graph->exec_symbol_info->rnum, graph->exec_symbol_info->rnum, CCV_8U | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
2072
13
  ccv_array_t* const ws = ccv_array_new(sizeof(int), source_size, 0);
2073
45
  for (i = 0; i < source_size; 
i++32
)
2074
32
    ccv_array_push(ws, &sources[i].d);
2075
13
  int* buf = (int*)ccmalloc(sizeof(int) * graph->exec_symbol_info->rnum);
2076
13
  int buf_size;
2077
13
#define for_block(x, val) \
2078
15
  do { \
2079
15
    if (((uint8_t*)val)[0] != 0) \
2080
15
      buf[buf_size++] = x; \
2081
15
  } while (0)
2082
13
  const uint8_t one = 1;
2083
55
  for (i = 0; i < ws->rnum; 
i++42
)
2084
42
  {
2085
42
    int j;
2086
42
    const int d = *(int*)ccv_array_get(ws, i);
2087
42
    int flag = 0;
2088
84
    for (j = 0; !flag && 
j < destination_size79
;
j++42
)
2089
42
      flag = (d == destinations[j].d);
2090
42
    if (flag)
2091
5
      continue;
2092
37
    buf_size = 0; /* save all its parent deps to this buffer */
2093
37
    ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, d);
2094
37
    if (vector)
2095
15
      
CCV_SPARSE_VECTOR_FOREACH11
(exec_dep, vector, for_block);
2096
37
    ccv_nnc_graph_exec_symbol_info_t* const info = ccv_array_get(graph->exec_symbol_info, d);
2097
37
    if (info->outgoings && 
info->outgoings->rnum > 028
)
2098
25
    {
2099
25
      ccv_array_t* const outgoings = info->outgoings;
2100
50
      for (j = 0; j < outgoings->rnum; 
j++25
)
2101
25
      {
2102
25
        const int outgoing_d = *(int*)ccv_array_get(outgoings, j);
2103
25
        int k;
2104
25
        int flag = 0;
2105
50
        for (k = 0; !flag && 
k < destination_size35
;
k++25
)
2106
25
          flag = (outgoing_d == destinations[k].d);
2107
        // We cannot avoid the ones that visited, because these may not contain all the deps.
2108
25
        if (!flag)
2109
10
          ccv_array_push(ws, &outgoing_d);
2110
25
        ccv_set_sparse_matrix_cell(exec_dep, outgoing_d, d, &one);
2111
31
        for (k = 0; k < buf_size; 
k++6
)
2112
6
          ccv_set_sparse_matrix_cell(exec_dep, outgoing_d, buf[k], &one);
2113
25
      }
2114
25
    }
2115
37
  }
2116
13
  ccfree(buf);
2117
13
  ccv_array_free(ws);
2118
  // Use exec_dep to fill the bitmask
2119
45
  for (i = 0; i < source_size; 
i++32
)
2120
32
  {
2121
32
    const int d = sources[i].d;
2122
32
    int j;
2123
32
    int flag = 0;
2124
64
    for (j = 0; !flag && 
j < destination_size44
;
j++32
)
2125
32
      if (d == destinations[j].d) {
2126
5
        flag = 1;
2127
27
      } else {
2128
27
        ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(exec_dep, destinations[j].d, d);
2129
27
        flag = (cell.u8 && 
cell.u8[0] != 015
);
2130
27
      }
2131
32
    if (flag)
2132
20
      bitmask[i >> 6] |= ((uint64_t)1 << (i & 63));
2133
12
    else
2134
12
      bitmask[i >> 6] &= ~((uint64_t)1 << (i & 63));
2135
32
  }
2136
13
  ccv_matrix_free(exec_dep);
2137
13
}