Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_symbolic_graph_memory_reduction.c
Line
Count
Source
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_internal.h"
5
#include "_ccv_nnc_symbolic_graph.h"
6
7
// MARK - Level-3.5 API
8
9
static void _ccv_nnc_remove_unused_from_marked(const uint32_t* const tensor_used, const int size, uint32_t* const tensor_marked)
10
0
{
11
0
  int i;
12
0
  for (i = 0; i < size; i++)
13
0
    tensor_marked[i] &= tensor_used[i];
14
0
}
15
16
typedef struct {
17
  int okay;
18
  int original;
19
  ccv_nnc_tensor_param_t info;
20
  ccv_array_t* old_conversion_nodes;
21
  struct {
22
    ccv_array_t* sources;
23
    ccv_array_t* nodes;
24
  } reconversion;
25
} ccv_nnc_conversion_info_t;
26
27
typedef struct {
28
  ccv_array_t* outgoings;
29
} ccv_nnc_graph_exec_symbol_reverse_t;
30
31
void ccv_nnc_symbolic_graph_memory_reduction(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size)
32
0
{
33
  // Note all these exec_symbol_info and tensor_symbol_info cannot be accessed once I start to mutate the graph. Therefore, I will do the
34
  // mutation at the last step, to carefully step away from that possibility.
35
0
  ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0);
36
0
  ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, 0);
37
0
  ccv_nnc_graph_visit_t* const visit = ccv_nnc_graph_visit_new(graph, exec_symbol_info, graph->exec_symbol_info->rnum, sources, source_size, destinations, destination_size, 0);
38
0
  ccv_nnc_symbolic_graph_symbol_infer(graph, visit, sources, source_size, destinations, destination_size, 0, 0, tensor_symbol_info, exec_symbol_info);
39
0
  const int tensor_symbol_info_size = graph->tensor_symbol_info->rnum;
40
0
  const int exec_symbol_info_size = graph->exec_symbol_info->rnum;
41
0
  uint32_t* const tensor_marked = (uint32_t*)cccalloc(((tensor_symbol_info_size + 31) >> 5) * 2, sizeof(uint32_t));
42
0
  uint32_t* const tensor_used = tensor_marked + ((tensor_symbol_info_size + 31) >> 5);
43
0
  ccv_nnc_graph_exec_symbol_reverse_t* const reversed_nodes = cccalloc(exec_symbol_info_size, sizeof(ccv_nnc_graph_exec_symbol_reverse_t));
44
0
  int i, j, k;
45
0
  ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx) {
46
0
    if (node->flags & CCV_NNC_GRAPH_EXEC_DEAD)
47
0
      continue;
48
0
    if (node->outgoings)
49
0
      for (i = 0; i < node->outgoings->rnum; i++)
50
0
      {
51
0
        const int d = *(int*)ccv_array_get(node->outgoings, i);
52
0
        if (!reversed_nodes[d].outgoings)
53
0
          reversed_nodes[d].outgoings = ccv_array_new(sizeof(int), 1, 0);
54
0
        ccv_array_add_unique_int(reversed_nodes[d].outgoings, idx);
55
0
      }
56
0
    if (node->cmd.cmd == CCV_NNC_DATATYPE_CONVERSION_FORWARD && node->output_size >= 1 && node->outputs[0] >= 0)
57
0
    {
58
0
      const int d = node->outputs[0];
59
      // If this tensor is alias, or assigned (while loop), or bypassed (case..of), skip.
60
0
      if (tensor_symbol_info[d].alias_ref || tensor_symbol_info[d].assign_ref || tensor_symbol_info[d].bypass_ref ||
61
0
          tensor_symbol_info[d].r_assign_ref || tensor_symbol_info[d].r_bypass_ref)
62
0
        continue;
63
0
      tensor_marked[d >> 5] |= (1u << (d & 0x1f));
64
0
    } else if (ccv_nnc_cmd_is_backward(node->cmd))
65
0
      for (i = 0; i < node->input_size; i++)
66
0
      {
67
0
        const int d = node->inputs[i];
68
0
        if (d >= 0)
69
0
          tensor_used[d >> 5] |= (1u << (d & 0x1f));
70
0
      }
71
0
  } ccv_nnc_graph_visit_endfor
72
  // If a tensor is marked but never used in backward pass, no need to reduce it.
73
0
  _ccv_nnc_remove_unused_from_marked(tensor_used, (tensor_symbol_info_size + 31) >> 5, tensor_marked);
74
  // If this tensor is pointed to by an alias, we don't want to reconversion.
75
0
  for (i = 0; i < tensor_symbol_info_size; i++)
76
0
    if (tensor_symbol_info[i].alias_ref)
77
0
    {
78
0
      const int d = tensor_symbol_info[i].alias_ref - 1;
79
      // unmark.
80
0
      if ((tensor_marked[d >> 5] & (1u << (d & 0x1f))))
81
0
        tensor_marked[d >> 5] &= ~(1u << (d & 0x1f));
82
0
    }
83
0
  ccv_nnc_exec_dep_t exec_deps = ccv_nnc_exec_dep_new(graph, visit);
84
  // Now tensor_marked only contains the tensors that we think beneficial to reconvert. Find the best place to insert conversion.
85
0
  ccv_nnc_conversion_info_t* const conversion_info = cccalloc(tensor_symbol_info_size, sizeof(ccv_nnc_conversion_info_t));
86
0
  ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx) {
87
0
    if (node->cmd.cmd == CCV_NNC_DATATYPE_CONVERSION_FORWARD && node->output_size >= 1 && node->outputs[0] >= 0)
88
0
    {
89
0
      const int d = node->outputs[0];
90
0
      if (d >= 0 && (tensor_marked[d >> 5] & (1u << (d & 0x1f))))
91
0
      {
92
0
        conversion_info[d].original = node->inputs[0];
93
0
        if (!conversion_info[d].old_conversion_nodes)
94
0
          conversion_info[d].old_conversion_nodes = ccv_array_new(sizeof(int), 0, 0);
95
0
        ccv_array_add_unique_int(conversion_info[d].old_conversion_nodes, idx);
96
0
      }
97
0
    } else if (ccv_nnc_cmd_is_backward(node->cmd))
98
0
      for (i = 0; i < node->input_size; i++)
99
0
      {
100
0
        const int d = node->inputs[i];
101
0
        if (d >= 0 && (tensor_marked[d >> 5] & (1u << (d & 0x1f))))
102
0
        {
103
0
          if (!conversion_info[d].reconversion.nodes)
104
0
            conversion_info[d].reconversion.nodes = ccv_array_new(sizeof(int), 0, 0);
105
0
          ccv_array_add_unique_int(conversion_info[d].reconversion.nodes, idx);
106
0
        }
107
0
      }
108
0
  } ccv_nnc_graph_visit_endfor
109
0
  for (i = 0; i < tensor_symbol_info_size; i++)
110
0
  {
111
0
    if (!conversion_info[i].reconversion.nodes)
112
0
      continue;
113
    // Check to see if it is beneficial for reconversion (i.e. the output is larger than the input).
114
0
    const int original_datatype = tensor_symbol_info[conversion_info[i].original].info.datatype;
115
0
    const int converted_datatype = tensor_symbol_info[i].info.datatype;
116
0
    if (CCV_GET_DATA_TYPE_SIZE(original_datatype) >= CCV_GET_DATA_TYPE_SIZE(converted_datatype))
117
0
      continue;
118
    // If we have more than one destination, need to find the common ancestor.
119
0
    ccv_array_t* const nodes = conversion_info[i].reconversion.nodes;
120
0
    ccv_array_t* const old_conversion_nodes = conversion_info[i].old_conversion_nodes;
121
0
    assert(nodes->rnum > 0);
122
0
    assert(old_conversion_nodes && old_conversion_nodes->rnum > 0);
123
0
    int flag = 0;
124
0
    for (j = 0; j < nodes->rnum; j++)
125
0
    {
126
0
      const int d = *(int*)ccv_array_get(nodes, j);
127
0
      ccv_sparse_matrix_vector_t* const vector = ccv_get_sparse_matrix_vector(exec_deps.deps, d);
128
0
      assert(vector);
129
0
      for (k = 0; k < old_conversion_nodes->rnum; k++)
130
0
      {
131
0
        const int dd = *(int*)ccv_array_get(old_conversion_nodes, k);
132
0
        const int hop = ccv_nnc_exec_dep_hop(exec_deps, d, vector, dd);
133
0
        if (hop >= 0 && hop <= 3)
134
0
          flag = 1;
135
0
      }
136
0
      if (flag)
137
0
        break;
138
0
    }
139
    // If there is no need to reconvert. Abort the whole thing.
140
0
    if (flag)
141
0
      continue;
142
0
    ccv_array_t* const reconversion_sources = ccv_array_new(sizeof(int), 0, 0);
143
0
    for (j = 0; j < nodes->rnum; j++)
144
0
    {
145
0
      const int d = *(int*)ccv_array_get(nodes, j);
146
0
      ccv_array_t* const outgoings = reversed_nodes[d].outgoings;
147
0
      if (!outgoings)
148
0
        continue;
149
0
      int x, y;
150
0
      for (x = 0; x < outgoings->rnum; x++)
151
0
      {
152
0
        const int dd = *(int*)ccv_array_get(outgoings, x);
153
0
        int flag = 0;
154
0
        for (y = 0; !flag && y < nodes->rnum; y++)
155
0
        {
156
0
          if (j == y)
157
0
            continue;
158
0
          const int ddd = *(int*)ccv_array_get(nodes, y);
159
          // If the outgoing is one of the nodes, we cannot add it as source.
160
0
          if (ddd == dd)
161
0
          {
162
0
            flag = 1;
163
0
            continue;
164
0
          }
165
          // Check dependencies, if there is a dependency from y node to dd, dd cannot be source.
166
0
          const int checked = ccv_nnc_exec_dep_check(exec_deps, dd, 0, ddd);
167
0
          if (checked)
168
0
            flag = 1;
169
0
        }
170
0
        if (!flag)
171
0
          ccv_array_add_unique_int(reconversion_sources, dd);
172
0
      }
173
0
    }
174
    // If there is no sources. Abort the whole thing.
175
0
    if (reconversion_sources->rnum == 0)
176
0
    {
177
0
      ccv_array_free(reconversion_sources);
178
0
      continue;
179
0
    }
180
    // Mark it as ready to be compressed.
181
0
    conversion_info[i].reconversion.sources = reconversion_sources;
182
0
    conversion_info[i].info = tensor_symbol_info[i].info;
183
0
    conversion_info[i].okay = 1;
184
0
  }
185
  // Do the graph mutation now based on the conversion info.
186
0
  for (i = 0; i < tensor_symbol_info_size; i++)
187
0
    if (conversion_info[i].okay)
188
0
    {
189
0
      const ccv_nnc_tensor_symbol_t reconverted = ccv_nnc_tensor_symbol_new(graph, conversion_info[i].info, 0);
190
0
      const ccv_nnc_tensor_symbol_t original = (ccv_nnc_tensor_symbol_t){
191
0
        .graph = graph,
192
0
        .d = conversion_info[i].original
193
0
      };
194
0
      const ccv_nnc_graph_exec_symbol_t reconversion_node = ccv_nnc_graph_exec_symbol_new(graph, CMD_DATATYPE_CONVERSION_FORWARD(), TENSOR_SYMBOL_LIST(original), TENSOR_SYMBOL_LIST(reconverted), 0);
195
0
      ccv_array_t* const nodes = conversion_info[i].reconversion.nodes;
196
0
      assert(nodes && nodes->rnum > 0);
197
0
      ccv_array_t* const sources = conversion_info[i].reconversion.sources;
198
0
      assert(sources && sources->rnum > 0);
199
0
      for (j = 0; j < sources->rnum; j++)
200
0
      {
201
0
        const int d = *(int*)ccv_array_get(sources, j);
202
0
        ccv_nnc_graph_exec_symbol_concat(graph, (ccv_nnc_graph_exec_symbol_t){
203
0
          .graph = graph,
204
0
          .d = d,
205
0
        }, reconversion_node);
206
0
      }
207
0
      for (j = 0; j < nodes->rnum; j++)
208
0
      {
209
0
        const int d = *(int*)ccv_array_get(nodes, j);
210
0
        ccv_nnc_graph_exec_symbol_concat(graph, reconversion_node, (ccv_nnc_graph_exec_symbol_t){
211
0
          .graph = graph,
212
0
          .d = d
213
0
        });
214
0
        ccv_nnc_graph_exec_symbol_info_t* const destination_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, d);
215
0
        for (k = 0; k < destination_info->input_size; k++)
216
0
          if (destination_info->inputs[k] == i)
217
0
            destination_info->inputs[k] = reconverted.d;
218
0
      }
219
0
    }
220
0
  ccv_nnc_graph_visit_free(visit);
221
0
  ccv_nnc_exec_dep_free(exec_deps);
222
0
  ccfree(tensor_marked);
223
0
  for (i = 0; i < tensor_symbol_info_size; i++)
224
0
  {
225
0
    if (conversion_info[i].old_conversion_nodes)
226
0
      ccv_array_free(conversion_info[i].old_conversion_nodes);
227
0
    if (conversion_info[i].reconversion.nodes)
228
0
      ccv_array_free(conversion_info[i].reconversion.nodes);
229
0
    if (conversion_info[i].reconversion.sources)
230
0
      ccv_array_free(conversion_info[i].reconversion.sources);
231
0
  }
232
0
  for (i = 0; i < exec_symbol_info_size; i++)
233
0
    if (reversed_nodes[i].outgoings)
234
0
      ccv_array_free(reversed_nodes[i].outgoings);
235
0
  ccfree(reversed_nodes);
236
0
  ccfree(conversion_info);
237
0
}