Coverage Report

Created: 2021-09-30 20:21

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc_dynamic_graph_alloc.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_nnc_easy.h"
5
#include "ccv_internal.h"
6
#include "_ccv_nnc_dynamic_graph.h"
7
#ifdef HAVE_CUDA
8
#include "gpu/ccv_nnc_compat.h"
9
#include <stdbool.h>
10
11
static int dy_alloc_tree_cmp(const dy_alloc_metadata_t* const a_node, const dy_alloc_metadata_t* const b_node)
12
1.41k
{
13
1.41k
  return (a_node->size > b_node->size) - (b_node->size > a_node->size);
14
1.41k
}
15
16
rb_gen(, dy_alloc_tree_, dy_alloc_tree_t, dy_alloc_metadata_t, link, dy_alloc_tree_cmp)
17
18
static void _ccv_nnc_dynamic_graph_metadata_free(dy_alloc_metadata_t* node, void* arg)
19
89
{
20
214
  do {
21
214
    dy_alloc_metadata_t* const next = node->next;
22
214
    cufree(node->device, node->ptr);
23
214
    ccfree(node);
24
214
    node = next;
25
214
  } while (node);
26
89
}
27
28
static void _ccv_nnc_dynamic_graph_xpu_alloc_drain(khash_t(dy_dev)* const dev, const ccv_nnc_stream_context_t* const stream)
29
18
{
30
18
  // Wait until the stream is free, and then do the free.
31
18
  if (stream)
32
8
    ccv_nnc_stream_context_wait(stream);
33
18
  khiter_t k;
34
106
  for (k = 
kh_begin18
(dev); k != kh_end(dev);
++k88
)
35
88
  {
36
88
    if (!kh_exist(dev, k))
37
88
      
continue47
;
38
41
    dy_alloc_tree_t* const tree = &kh_val(dev, k);
39
41
    dy_alloc_tree_destroy(tree, _ccv_nnc_dynamic_graph_metadata_free, 0);
40
41
    kh_del(dy_dev, dev, k);
41
41
  }
42
18
}
43
44
static void _ccv_nnc_dynamic_graph_xpu_stream_destructor_hook(const ccv_nnc_stream_context_t* const stream, void* const context)
45
2
{
46
2
  ccv_nnc_dynamic_graph_t* const graph = (ccv_nnc_dynamic_graph_t*)context;
47
2
  khash_t(dy_str)* const freed = graph->freed;
48
2
  const int64_t str = (int64_t)(intptr_t)stream;
49
2
  khiter_t i = kh_get(dy_str, freed, str);
50
2
  assert(i != kh_end(freed));
51
2
  khash_t(dy_dev)* const dev = kh_val(freed, i).dev;
52
2
  _ccv_nnc_dynamic_graph_xpu_alloc_drain(dev, stream);
53
2
  kh_destroy(dy_dev, dev);
54
2
  kh_del(dy_str, freed, i);
55
2
}
56
57
void* ccv_nnc_dynamic_graph_xpu_alloc(ccv_nnc_dynamic_graph_t* const graph, const int device, ccv_nnc_stream_context_t* const stream, const size_t size)
58
696
{
59
696
  khash_t(dy_str)* const freed = graph->freed;
60
696
  const int64_t str = (int64_t)(intptr_t)stream;
61
696
  int ret;
62
696
  khiter_t i = kh_put(dy_str, freed, str, &ret);
63
696
  assert(ret >= 0);
64
696
  dy_alloc_metadata_t* node = 0;
65
696
  if (ret == 0)
66
678
  {
67
678
    // If we can find stream related allocations, try to
68
678
    // find the suitable ones.
69
678
    khash_t(dy_dev)* const dev = kh_val(freed, i).dev;
70
678
    assert(dev);
71
678
    khiter_t j = kh_get(dy_dev, dev, device);
72
678
    if (j != kh_end(dev))
73
678
    {
74
539
      dy_alloc_tree_t* const tree = &kh_val(dev, j);
75
539
      dy_alloc_metadata_t key = {
76
539
        .size = size
77
539
      };
78
539
      node = dy_alloc_tree_nsearch(tree, &key);
79
539
      if (node)
80
434
      {
81
434
        if (node->next) // If it is a linked list, select the one.
82
203
        {
83
203
          dy_alloc_metadata_t* next_node = node->next;
84
203
          node->next = node->next->next;
85
203
          node = next_node;
86
203
        } else
87
231
          dy_alloc_tree_remove(tree, node);
88
434
      }
89
539
    }
90
678
  } else {
91
18
    // Otherwise, create it.
92
18
    kh_val(freed, i).dev = kh_init(dy_dev);
93
18
    kh_val(freed, i).hook_id = stream ? 
ccv_nnc_stream_context_add_destructor_hook(stream, _ccv_nnc_dynamic_graph_xpu_stream_destructor_hook, graph)8
:
-110
;
94
18
95
18
  }
96
696
  if (!node)
97
262
  {
98
262
    node = (dy_alloc_metadata_t*)ccmalloc(sizeof(dy_alloc_metadata_t));
99
262
    if (graph->mp_hdr < 0)
100
11
      graph->mp_hdr = curegmp((cump_f)ccv_nnc_dynamic_graph_gc, graph);
101
262
    node->ptr = cumalloc(device, size);
102
262
    if (!node->ptr) // If cannot allocate, drain the pool first and then allocate.
103
0
    {
104
0
      ccfree(node);
105
0
      return 0;
106
0
    }
107
262
    node->device = device;
108
262
    node->size = size;
109
262
    node->str = str;
110
434
  } else {
111
434
    assert(node->size >= size);
112
434
    assert(node->device == device);
113
434
    assert(node->str == str);
114
434
  }
115
696
  node->next = 0;
116
696
  khash_t(dy_alloc)* const allocd = graph->allocd;
117
696
  i = kh_put(dy_alloc, allocd, (int64_t)(intptr_t)node->ptr, &ret);
118
696
  assert(ret > 0);
119
696
  kh_val(allocd, i) = node;
120
696
  return node->ptr;
121
696
}
122
123
void ccv_nnc_dynamic_graph_xpu_free(ccv_nnc_dynamic_graph_t* const graph, void* const ptr)
124
696
{
125
696
  khash_t(dy_alloc)* const allocd = graph->allocd;
126
696
  khiter_t i = kh_get(dy_alloc, allocd, (int64_t)(intptr_t)ptr);
127
696
  assert(i != kh_end(allocd));
128
696
  dy_alloc_metadata_t* const node = kh_val(allocd, i);
129
696
  kh_del(dy_alloc, allocd, i);
130
696
  assert(node->ptr == ptr);
131
696
  khash_t(dy_str)* const freed = graph->freed;
132
696
  i = kh_get(dy_str, freed, node->str);
133
696
  // If cannot find associated stream, that means this allocation associated
134
696
  // stream has been freed. I have to do synchronous free of this pointer.
135
696
  if (i == kh_end(freed))
136
696
  {
137
48
    cufree(node->device, node->ptr);
138
48
    ccfree(node);
139
48
    return;
140
48
  }
141
648
  khash_t(dy_dev)* const dev = kh_val(freed, i).dev;
142
648
  int ret;
143
648
  khiter_t j = kh_put(dy_dev, dev, node->device, &ret);
144
648
  assert(ret >= 0);
145
648
  dy_alloc_tree_t* const tree = &kh_val(dev, j);
146
648
  if (ret != 0)
147
41
    dy_alloc_tree_new(tree);
148
648
  dy_alloc_metadata_t* const canon_node = dy_alloc_tree_search(tree, node);
149
648
  if (!canon_node)
150
320
    dy_alloc_tree_insert(tree, node);
151
328
  else { // Insert into the linked list.
152
328
    node->next = canon_node->next;
153
328
    canon_node->next = node;
154
328
  }
155
648
}
156
157
void ccv_nnc_dynamic_graph_xpu_alloc_destroy(ccv_nnc_dynamic_graph_t* const graph)
158
41
{
159
41
  khash_t(dy_alloc)* const allocd = graph->allocd;
160
41
  khiter_t k;
161
393
  for (k = 
kh_begin41
(allocd); k != kh_end(allocd);
++k352
)
162
352
  {
163
352
    if (!kh_exist(allocd, k))
164
352
      continue;
165
0
    _ccv_nnc_dynamic_graph_metadata_free(kh_val(allocd, k), 0);
166
0
  }
167
41
  kh_destroy(dy_alloc, allocd);
168
41
  khash_t(dy_str)* const freed = graph->freed;
169
85
  for (k = 
kh_begin41
(freed); k != kh_end(freed);
++k44
)
170
44
  {
171
44
    if (!kh_exist(freed, k))
172
44
      
continue28
;
173
16
    khash_t(dy_dev)* const dev = kh_val(freed, k).dev;
174
16
    ccv_nnc_stream_context_t* const stream = (ccv_nnc_stream_context_t*)(intptr_t)kh_key(freed, k);
175
16
    _ccv_nnc_dynamic_graph_xpu_alloc_drain(dev, stream);
176
16
    if (stream)
177
6
    {
178
6
      const int hook_id = kh_val(freed, k).hook_id;
179
6
      ccv_nnc_stream_context_remove_destructor_hook(stream, hook_id);
180
6
    }
181
16
    kh_destroy(dy_dev, dev);
182
16
  }
183
41
  kh_destroy(dy_str, freed);
184
41
  if (graph->mp_hdr >= 0)
185
11
    cuunregmp(graph->mp_hdr);
186
41
}
187
188
void ccv_nnc_dynamic_graph_gc(ccv_nnc_dynamic_graph_t* const graph)
189
0
{
190
0
  khash_t(dy_str)* const freed = graph->freed;
191
0
  khiter_t k;
192
0
  for (k = kh_begin(freed); k != kh_end(freed); ++k)
193
0
  {
194
0
    if (!kh_exist(freed, k))
195
0
      continue;
196
0
    khash_t(dy_dev)* const dev = kh_val(freed, k).dev;
197
0
    ccv_nnc_stream_context_t* const stream = (ccv_nnc_stream_context_t*)(intptr_t)kh_key(freed, k);
198
0
    _ccv_nnc_dynamic_graph_xpu_alloc_drain(dev, stream);
199
0
  }
200
0
}
201
#else
202
void* ccv_nnc_dynamic_graph_xpu_alloc(ccv_nnc_dynamic_graph_t* const graph, const int device, ccv_nnc_stream_context_t* const stream, const size_t size)
203
{
204
  return 0;
205
}
206
207
void ccv_nnc_dynamic_graph_xpu_free(ccv_nnc_dynamic_graph_t* const graph, void* const ptr)
208
{
209
}
210
211
void ccv_nnc_dynamic_graph_xpu_alloc_destroy(ccv_nnc_dynamic_graph_t* const graph)
212
{
213
}
214
215
void ccv_nnc_dynamic_graph_gc(ccv_nnc_dynamic_graph_t* const dynamic_graph)
216
{
217
}
218
#endif
219
220
ccv_nnc_compilation_artifact_t* ccv_nnc_compilation_artifact_new(ccv_nnc_graph_t* const graph, ccv_nnc_tensor_arena_t* const tensor_arena, ccv_nnc_graph_exec_arena_t* const exec_arena)
221
204
{
222
204
  ccv_nnc_compilation_artifact_t* const artifact = (ccv_nnc_compilation_artifact_t*)ccmalloc(sizeof(ccv_nnc_compilation_artifact_t));
223
204
  artifact->graph = graph;
224
204
  artifact->tensor_arena = tensor_arena;
225
204
  artifact->exec_arena = exec_arena;
226
204
  return artifact;
227
204
}
228
229
void ccv_nnc_compilation_artifact_free(ccv_nnc_compilation_artifact_t* const artifact)
230
204
{
231
204
  ccv_nnc_graph_free(artifact->graph);
232
204
  ccv_nnc_tensor_arena_free(artifact->tensor_arena);
233
204
  ccv_nnc_graph_exec_arena_free(artifact->exec_arena);
234
204
  ccfree(artifact);
235
204
}