Coverage Report

Created: 2022-08-03 23:52

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc_xpu_alloc.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_nnc_easy.h"
5
#include "ccv_internal.h"
6
#include "_ccv_nnc_xpu_alloc.h"
7
#ifdef HAVE_CUDA
8
#include "gpu/ccv_nnc_compat.h"
9
#include <stdbool.h>
10
11
static int dy_alloc_tree_cmp(const dy_alloc_metadata_t* const a_node, const dy_alloc_metadata_t* const b_node)
12
1.51k
{
13
1.51k
  return (a_node->size > b_node->size) - (b_node->size > a_node->size);
14
1.51k
}
15
16
rb_gen(, dy_alloc_tree_, dy_alloc_tree_t, dy_alloc_metadata_t, link, dy_alloc_tree_cmp)
17
18
static void _ccv_nnc_xpu_metadata_free(dy_alloc_metadata_t* node, void* arg)
19
92
{
20
228
  do {
21
228
    dy_alloc_metadata_t* const next = node->next;
22
228
    cufree(node->device, node->ptr);
23
228
    ccfree(node);
24
228
    node = next;
25
228
  } while (node);
26
92
}
27
28
static void _ccv_nnc_xpu_alloc_drain(khash_t(dy_dev)* const dev, const ccv_nnc_stream_context_t* const stream)
29
19
{
30
  // Wait until the stream is free, and then do the free.
31
19
  if (stream)
32
8
    ccv_nnc_stream_context_wait(stream);
33
19
  khiter_t k;
34
111
  for (k = 
kh_begin19
(dev); k != kh_end(dev);
++k92
)
35
92
  {
36
92
    if (!kh_exist(dev, k))
37
50
      continue;
38
42
    dy_alloc_tree_t* const tree = &kh_val(dev, k);
39
42
    dy_alloc_tree_destroy(tree, _ccv_nnc_xpu_metadata_free, 0);
40
42
    kh_del(dy_dev, dev, k);
41
42
  }
42
19
}
43
44
static void _ccv_nnc_xpu_stream_destructor_hook(const ccv_nnc_stream_context_t* const stream, void* const context)
45
2
{
46
2
  ccv_nnc_xpu_alloc_t* const xpu_alloc = (ccv_nnc_xpu_alloc_t*)context;
47
2
  khash_t(dy_str)* const freed = xpu_alloc->freed;
48
2
  const int64_t str = (int64_t)(intptr_t)stream;
49
2
  khiter_t i = kh_get(dy_str, freed, str);
50
2
  assert(i != kh_end(freed));
51
2
  khash_t(dy_dev)* const dev = kh_val(freed, i).dev;
52
2
  _ccv_nnc_xpu_alloc_drain(dev, stream);
53
2
  kh_destroy(dy_dev, dev);
54
2
  kh_del(dy_str, freed, i);
55
2
}
56
57
void* ccv_nnc_xpu_alloc(ccv_nnc_xpu_alloc_t* const xpu_alloc, const int device, ccv_nnc_stream_context_t* const stream, const size_t size)
58
725
{
59
725
  khash_t(dy_str)* const freed = xpu_alloc->freed;
60
725
  const int64_t str = (int64_t)(intptr_t)stream;
61
725
  int ret;
62
725
  khiter_t i = kh_put(dy_str, freed, str, &ret);
63
725
  assert(ret >= 0);
64
725
  dy_alloc_metadata_t* node = 0;
65
725
  if (ret == 0)
66
706
  {
67
    // If we can find stream related allocations, try to
68
    // find the suitable ones.
69
706
    khash_t(dy_dev)* const dev = kh_val(freed, i).dev;
70
706
    assert(dev);
71
706
    khiter_t j = kh_get(dy_dev, dev, device);
72
706
    if (j != kh_end(dev))
73
559
    {
74
559
      dy_alloc_tree_t* const tree = &kh_val(dev, j);
75
559
      dy_alloc_metadata_t key = {
76
559
        .size = size
77
559
      };
78
559
      node = dy_alloc_tree_nsearch(tree, &key);
79
559
      if (node)
80
449
      {
81
449
        if (node->next) // If it is a linked list, select the one.
82
215
        {
83
215
          dy_alloc_metadata_t* next_node = node->next;
84
215
          node->next = node->next->next;
85
215
          node = next_node;
86
215
        } else
87
234
          dy_alloc_tree_remove(tree, node);
88
449
      }
89
559
    }
90
706
  } else {
91
    // Otherwise, create it.
92
19
    kh_val(freed, i).dev = kh_init(dy_dev);
93
19
    kh_val(freed, i).hook_id = stream ? 
ccv_nnc_stream_context_add_destructor_hook(stream, _ccv_nnc_xpu_stream_destructor_hook, xpu_alloc)8
:
-111
;
94
95
19
  }
96
725
  if (!node)
97
276
  {
98
276
    node = (dy_alloc_metadata_t*)ccmalloc(sizeof(dy_alloc_metadata_t));
99
276
    if (xpu_alloc->mp_hdr < 0)
100
12
      xpu_alloc->mp_hdr = curegmp((cump_f)ccv_nnc_xpu_gc, xpu_alloc);
101
276
    node->ptr = cumalloc(device, size);
102
276
    if (!node->ptr) // If cannot allocate, drain the pool first and then allocate.
103
0
    {
104
0
      ccfree(node);
105
0
      return 0;
106
0
    }
107
276
    node->device = device;
108
276
    node->size = size;
109
276
    node->str = str;
110
449
  } else {
111
449
    assert(node->size >= size);
112
449
    assert(node->device == device);
113
449
    assert(node->str == str);
114
449
  }
115
725
  node->next = 0;
116
725
  khash_t(dy_alloc)* const allocd = xpu_alloc->allocd;
117
725
  i = kh_put(dy_alloc, allocd, (int64_t)(intptr_t)node->ptr, &ret);
118
725
  assert(ret > 0);
119
725
  kh_val(allocd, i) = node;
120
725
  return node->ptr;
121
725
}
122
123
void ccv_nnc_xpu_free(ccv_nnc_xpu_alloc_t* const xpu_alloc, void* const ptr)
124
725
{
125
725
  khash_t(dy_alloc)* const allocd = xpu_alloc->allocd;
126
725
  khiter_t i = kh_get(dy_alloc, allocd, (int64_t)(intptr_t)ptr);
127
725
  assert(i != kh_end(allocd));
128
725
  dy_alloc_metadata_t* const node = kh_val(allocd, i);
129
725
  kh_del(dy_alloc, allocd, i);
130
725
  assert(node->ptr == ptr);
131
725
  khash_t(dy_str)* const freed = xpu_alloc->freed;
132
725
  i = kh_get(dy_str, freed, node->str);
133
  // If cannot find associated stream, that means this allocation associated
134
  // stream has been freed. I have to do synchronous free of this pointer.
135
725
  if (i == kh_end(freed))
136
48
  {
137
48
    cufree(node->device, node->ptr);
138
48
    ccfree(node);
139
48
    return;
140
48
  }
141
677
  khash_t(dy_dev)* const dev = kh_val(freed, i).dev;
142
677
  int ret;
143
677
  khiter_t j = kh_put(dy_dev, dev, node->device, &ret);
144
677
  assert(ret >= 0);
145
677
  dy_alloc_tree_t* const tree = &kh_val(dev, j);
146
677
  if (ret != 0)
147
42
    dy_alloc_tree_new(tree);
148
677
  dy_alloc_metadata_t* const canon_node = dy_alloc_tree_search(tree, node);
149
677
  if (!canon_node)
150
326
    dy_alloc_tree_insert(tree, node);
151
351
  else { // Insert into the linked list.
152
351
    node->next = canon_node->next;
153
351
    canon_node->next = node;
154
351
  }
155
677
}
156
157
void ccv_nnc_xpu_alloc_destroy(ccv_nnc_xpu_alloc_t* const xpu_alloc)
158
2.31k
{
159
2.31k
  khash_t(dy_alloc)* const allocd = xpu_alloc->allocd;
160
2.31k
  khiter_t k;
161
2.68k
  for (k = 
kh_begin2.31k
(allocd); k != kh_end(allocd);
++k368
)
162
368
  {
163
368
    if (!kh_exist(allocd, k))
164
368
      continue;
165
0
    _ccv_nnc_xpu_metadata_free(kh_val(allocd, k), 0);
166
0
  }
167
2.31k
  kh_destroy(dy_alloc, allocd);
168
2.31k
  khash_t(dy_str)* const freed = xpu_alloc->freed;
169
2.36k
  for (k = 
kh_begin2.31k
(freed); k != kh_end(freed);
++k48
)
170
48
  {
171
48
    if (!kh_exist(freed, k))
172
31
      continue;
173
17
    khash_t(dy_dev)* const dev = kh_val(freed, k).dev;
174
17
    ccv_nnc_stream_context_t* const stream = (ccv_nnc_stream_context_t*)(intptr_t)kh_key(freed, k);
175
17
    _ccv_nnc_xpu_alloc_drain(dev, stream);
176
17
    if (stream)
177
6
    {
178
6
      const int hook_id = kh_val(freed, k).hook_id;
179
6
      ccv_nnc_stream_context_remove_destructor_hook(stream, hook_id);
180
6
    }
181
17
    kh_destroy(dy_dev, dev);
182
17
  }
183
2.31k
  kh_destroy(dy_str, freed);
184
2.31k
  if (xpu_alloc->mp_hdr >= 0)
185
12
    cuunregmp(xpu_alloc->mp_hdr);
186
2.31k
}
187
188
void ccv_nnc_xpu_gc(ccv_nnc_xpu_alloc_t* const xpu_alloc)
189
0
{
190
0
  khash_t(dy_str)* const freed = xpu_alloc->freed;
191
0
  khiter_t k;
192
0
  for (k = kh_begin(freed); k != kh_end(freed); ++k)
193
0
  {
194
0
    if (!kh_exist(freed, k))
195
0
      continue;
196
0
    khash_t(dy_dev)* const dev = kh_val(freed, k).dev;
197
0
    ccv_nnc_stream_context_t* const stream = (ccv_nnc_stream_context_t*)(intptr_t)kh_key(freed, k);
198
0
    _ccv_nnc_xpu_alloc_drain(dev, stream);
199
0
  }
200
0
}
201
#else
202
void* ccv_nnc_xpu_alloc(ccv_nnc_xpu_alloc_t* const xpu_alloc, const int device, ccv_nnc_stream_context_t* const stream, const size_t size)
203
{
204
  return 0;
205
}
206
207
void ccv_nnc_xpu_free(ccv_nnc_xpu_alloc_t* const xpu_alloc, void* const ptr)
208
{
209
}
210
211
void ccv_nnc_xpu_alloc_destroy(ccv_nnc_xpu_alloc_t* const xpu_alloc)
212
{
213
}
214
215
void ccv_nnc_xpu_gc(ccv_nnc_xpu_alloc_t* const dynamic_graph)
216
{
217
}
218
#endif