Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor_io.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_internal.h"
5
#include "_ccv_nnc_symbolic_graph.h"
6
#include "3rdparty/sqlite3/sqlite3.h"
7
#ifdef HAVE_CUDA
8
#include "gpu/ccv_nnc_compat.h"
9
#elif HAVE_MPS
10
#include "mps/ccv_nnc_mps.h"
11
#endif
12
13
#ifdef NDEBUG
14
#define SQLITE_ENFORCE(stmt) (void)(stmt)
15
#else
16
60
#define SQLITE_ENFORCE assert
17
#endif
18
19
// MARK - Level-1 API
20
21
int ccv_nnc_tensor_write(const ccv_nnc_tensor_t* const tensor, void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options)
22
30
{
23
30
  assert(CCV_IS_TENSOR_CONTIGUOUS(tensor));
24
30
  assert(name);
25
30
  sqlite3* conn = (sqlite3*)handle;
26
30
  if (!conn)
27
0
    return CCV_IO_ERROR;
28
30
  const char tensor_create_table_qs[] = "CREATE TABLE IF NOT EXISTS tensors "
29
30
    "(name TEXT, type INTEGER, format INTEGER, datatype INTEGER, "
30
30
    "dim BLOB, data BLOB, PRIMARY KEY (name))";
31
30
  SQLITE_ENFORCE(SQLITE_OK == sqlite3_exec(conn, tensor_create_table_qs, 0, 0, 0));
32
30
  const char tensor_insert_qs[] =
33
30
    "REPLACE INTO tensors "
34
30
    "(name, type, format, datatype, dim, data) VALUES ("
35
30
    "$name, $type, $format, $datatype, $dim, $data)";
36
30
  sqlite3_stmt* tensor_insert_stmt = 0;
37
30
  SQLITE_ENFORCE(SQLITE_OK == sqlite3_prepare_v2(conn, tensor_insert_qs, sizeof(tensor_insert_qs), &tensor_insert_stmt, 0));
38
30
  sqlite3_bind_text(tensor_insert_stmt, 1, name, -1, 0);
39
30
  ccv_nnc_tensor_param_t params = tensor->info;
40
30
  const size_t data_size = ccv_nnc_tensor_data_size_without_padding(tensor->info);
41
30
  unsigned char* workspace = 0;
42
30
  unsigned int identifier = 0;
43
30
#ifdef HAVE_CUDA
44
30
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY)
45
9
  {
46
9
    if (!options || 
!options->encode6
)
47
3
    {
48
3
      workspace = ccmalloc(data_size);
49
3
      cumemcpy(workspace, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, data_size);
50
3
      sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0);
51
6
    } else {
52
6
      workspace = ccmalloc(data_size * 2 + 4);
53
6
      cumemcpy(workspace, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, data_size);
54
6
      size_t encoded_size = data_size + 4;
55
6
      if (options->encode(workspace, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace + data_size, &encoded_size, &params, &identifier))
56
3
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace + data_size, encoded_size, 0);
57
3
      else
58
3
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0);
59
6
    }
60
21
  } else {
61
21
    if (!options || 
!options->encode6
)
62
15
      sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
63
6
    else {
64
6
      workspace = ccmalloc(data_size + 4);
65
6
      size_t encoded_size = data_size + 4;
66
6
      if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, &params, &identifier))
67
3
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0);
68
3
      else
69
3
        sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
70
6
    }
71
21
  }
72
#elif defined(HAVE_MPS)
73
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY)
74
  {
75
    if (!options || !options->encode)
76
    {
77
      workspace = ccmalloc(data_size);
78
      mpmemcpy(workspace, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->dataof, tensor->info.type, data_size);
79
      sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0);
80
    } else {
81
      workspace = ccmalloc(data_size * 2 + 4);
82
      mpmemcpy(workspace, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->dataof, tensor->info.type, data_size);
83
      size_t encoded_size = data_size + 4;
84
      if (options->encode(workspace, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace + data_size, &encoded_size, &params, &identifier))
85
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace + data_size, encoded_size, 0);
86
      else
87
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0);
88
    }
89
  } else {
90
    if (!options || !options->encode)
91
      sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
92
    else {
93
      workspace = ccmalloc(data_size + 4); // Allocate extra 4 bytes in case we need to copy the QX tensor out.
94
      size_t encoded_size = data_size + 4;
95
      if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, &params, &identifier))
96
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0);
97
      else
98
        sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
99
    }
100
  }
101
#else
102
  if (!options || !options->encode)
103
    sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
104
  else {
105
    workspace = ccmalloc(data_size + 4);
106
    size_t encoded_size = data_size + 4;
107
    if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, &params, &identifier))
108
      sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0);
109
    else
110
      sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
111
  }
112
#endif
113
30
  sqlite3_bind_int64(tensor_insert_stmt, 2, ((sqlite_int64)identifier << 32) | params.type);
114
30
  sqlite3_bind_int(tensor_insert_stmt, 3, params.format);
115
30
  sqlite3_bind_int64(tensor_insert_stmt, 4, ((sqlite_int64)params.reserved << 32) | params.datatype);
116
30
  sqlite3_bind_blob(tensor_insert_stmt, 5, params.dim, sizeof(params.dim), 0);
117
30
  sqlite3_step(tensor_insert_stmt);
118
30
  sqlite3_reset(tensor_insert_stmt);
119
30
  sqlite3_clear_bindings(tensor_insert_stmt);
120
30
  sqlite3_finalize(tensor_insert_stmt);
121
30
  if (workspace)
122
15
    free(workspace);
123
30
  return CCV_IO_FINAL;
124
30
}
125
126
int ccv_nnc_tensor_read(void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options, const int flags, const ccv_nnc_tensor_param_t* const tensor_params_optional, ccv_nnc_tensor_t** const tensor_out)
127
36
{
128
36
  assert(name);
129
36
  sqlite3* conn = (sqlite3*)handle;
130
36
  if (!conn)
131
0
    return CCV_IO_ERROR;
132
36
  const char tensor_select_qs[] =
133
36
    "SELECT data, type, format, datatype, dim FROM tensors WHERE name=$name";
134
36
  sqlite3_stmt* tensor_select_stmt = 0;
135
36
  if (SQLITE_OK != sqlite3_prepare_v2(conn, tensor_select_qs, sizeof(tensor_select_qs), &tensor_select_stmt, 0))
136
0
    return CCV_IO_ERROR;
137
36
  sqlite3_bind_text(tensor_select_stmt, 1, name, -1, 0);
138
36
  if (SQLITE_ROW != sqlite3_step(tensor_select_stmt))
139
0
  {
140
0
    sqlite3_finalize(tensor_select_stmt);
141
0
    return CCV_IO_ERROR;
142
0
  }
143
36
  ccv_nnc_tensor_t* tensor = *tensor_out;
144
36
  ccv_nnc_tensor_param_t tensor_params;
145
36
  int datatype = 0;
146
36
  unsigned int identifier = 0;
147
36
  if (!tensor) // If the tensor is not provided, we need to create one.
148
18
  {
149
18
    if (tensor_params_optional)
150
10
    {
151
10
      identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff;
152
10
      datatype = sqlite3_column_int64(tensor_select_stmt, 3) & 0xffffffff;
153
10
      tensor_params = *tensor_params_optional;
154
10
      assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY));
155
10
    } else {
156
8
      const sqlite_int64 type = sqlite3_column_int64(tensor_select_stmt, 1);
157
8
      identifier = (type >> 32) & 0xffffffff;
158
8
      tensor_params.type = (type & 0xffffffff);
159
8
      tensor_params.format = sqlite3_column_int(tensor_select_stmt, 2);
160
8
      const sqlite_int64 datatype_mix = sqlite3_column_int64(tensor_select_stmt, 3);
161
8
      datatype = tensor_params.datatype = (datatype_mix & 0xffffffff);
162
8
      tensor_params.reserved = (datatype_mix >> 32) & 0xffffffff;
163
8
      const void* const dim = sqlite3_column_blob(tensor_select_stmt, 4);
164
8
      memcpy(tensor_params.dim, dim, ccv_min(sizeof(tensor_params.dim), sqlite3_column_bytes(tensor_select_stmt, 4)));
165
8
    }
166
18
    if (!options || 
!options->decode4
)
167
14
    {
168
14
      if (flags & CCV_NNC_TENSOR_READ_METADATA_ONLY)
169
2
      {
170
2
        *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, CCV_NO_DATA_ALLOC); // Set the data point to 1 so it is allocated without data.
171
2
        assert(tensor->data.u8 == 0); // Set it back to 0.
172
        // Already done loading metadata, return.
173
2
        sqlite3_reset(tensor_select_stmt);
174
2
        sqlite3_clear_bindings(tensor_select_stmt);
175
2
        sqlite3_finalize(tensor_select_stmt);
176
2
        return CCV_IO_FINAL;
177
2
      } else
178
12
        *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
179
14
    } else {
180
4
      assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY));
181
4
    }
182
18
  } else {
183
18
    identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff;
184
18
    datatype = sqlite3_column_int(tensor_select_stmt, 3) & 0xffffffff;
185
18
    tensor_params = tensor->info;
186
18
    assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY));
187
18
  }
188
34
  const void* const data = sqlite3_column_blob(tensor_select_stmt, 0);
189
34
  int dim[CCV_NNC_MAX_DIM_ALLOC];
190
34
  memcpy(dim, sqlite3_column_blob(tensor_select_stmt, 4), ccv_min(sizeof(dim), sqlite3_column_bytes(tensor_select_stmt, 4)));
191
34
  const int nd = ccv_nnc_tensor_nd(dim);
192
34
  if (datatype != tensor_params.datatype && 
CCV_GET_DATA_TYPE12
(tensor_params.datatype) != CCV_QX12
)
193
12
  {
194
    // Only ever works for 16F to 32F or 32F to 16F transparently.
195
12
    assert((datatype == CCV_16F && tensor_params.datatype == CCV_32F) || (datatype == CCV_32F && tensor_params.datatype == CCV_16F));
196
12
    const size_t tensor_count = ccv_nnc_tensor_count(tensor_params);
197
12
    ccv_nnc_tensor_param_t params = tensor_params;
198
12
    params.datatype = datatype;
199
12
    const size_t source_data_size = ccv_nnc_tensor_data_size(params);
200
12
#ifdef HAVE_CUDA
201
12
    if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
202
6
    {
203
6
      const size_t data_size = ccv_nnc_tensor_data_size(tensor_params);
204
6
      unsigned char* workspace;
205
6
      unsigned char* copying;
206
6
      size_t decoded_size = data_size;
207
6
      if (!options || 
!options->decode4
)
208
2
      {
209
2
        copying = workspace = ccmalloc(data_size);
210
2
        if (datatype == CCV_16F && 
tensor_params.datatype == CCV_32F1
)
211
1
          ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
212
1
        else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F)
213
1
          ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
214
0
        else
215
0
          { assert(0); }
216
4
      } else {
217
4
        copying = workspace = ccmalloc(data_size + source_data_size);
218
4
        if (datatype == CCV_16F && 
tensor_params.datatype == CCV_32F2
)
219
2
        {
220
2
          decoded_size = source_data_size;
221
2
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size))
222
1
          {
223
            // If we loaded quantized tensor, don't do the conversion.
224
1
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
225
0
              copying = workspace + data_size;
226
1
            else {
227
1
              ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
228
1
              decoded_size = data_size;
229
1
            }
230
1
          } else {
231
1
            if (!tensor)
232
0
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
233
1
            ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
234
1
            decoded_size = data_size;
235
1
          }
236
2
        } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) {
237
2
          decoded_size = source_data_size;
238
2
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size))
239
1
          {
240
1
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
241
0
              copying = workspace + data_size;
242
1
            else {
243
1
              ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
244
1
              decoded_size = data_size;
245
1
            }
246
1
          } else {
247
1
            if (!tensor)
248
0
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
249
1
            ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
250
1
            decoded_size = data_size;
251
1
          }
252
2
        } else
253
0
          { assert(0); }
254
4
      }
255
6
      cumemcpy(tensor_out[0]->data.u8, tensor_out[0]->info.type, copying, CCV_TENSOR_CPU_MEMORY, decoded_size);
256
6
      ccfree(workspace);
257
6
    } else {
258
6
      if (!options || 
!options->decode4
)
259
2
      {
260
2
        if (datatype == CCV_16F && 
tensor_params.datatype == CCV_32F1
)
261
1
          ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
262
1
        else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F)
263
1
          ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
264
0
        else
265
0
          { assert(0); }
266
4
      } else {
267
4
        void* const workspace = ccmalloc(source_data_size);
268
4
        if (datatype == CCV_16F && 
tensor_params.datatype == CCV_32F2
)
269
2
        {
270
2
          size_t decoded_size = source_data_size;
271
2
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
272
1
          {
273
1
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
274
0
            {
275
0
              if (decoded_size > 0)
276
0
                memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size));
277
0
            } else
278
1
              ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
279
1
          } else {
280
1
            if (!tensor)
281
0
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
282
1
            ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
283
1
          }
284
2
        } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) {
285
2
          size_t decoded_size = source_data_size;
286
2
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
287
1
          {
288
1
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
289
0
            {
290
0
              if (decoded_size > 0)
291
0
                memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size));
292
0
            } else
293
1
              ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
294
1
          } else {
295
1
            if (!tensor)
296
0
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
297
1
            ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
298
1
          }
299
2
        } else
300
0
          { assert(0); }
301
4
        ccfree(workspace);
302
4
      }
303
6
    }
304
#elif defined(HAVE_MPS)
305
    if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
306
    {
307
      const size_t data_size = ccv_nnc_tensor_data_size(tensor_params);
308
      unsigned char* workspace;
309
      unsigned char* copying;
310
      size_t decoded_size = data_size;
311
      if (!options || !options->decode)
312
      {
313
        copying = workspace = ccmalloc(data_size);
314
        if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
315
          ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
316
        else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F)
317
          ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
318
        else
319
          { assert(0); }
320
      } else {
321
        copying = workspace = ccmalloc(data_size + source_data_size);
322
        if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
323
        {
324
          decoded_size = source_data_size;
325
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size))
326
          {
327
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
328
              copying = workspace + data_size;
329
            else {
330
              ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
331
              decoded_size = data_size;
332
            }
333
          } else {
334
            if (!tensor)
335
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
336
            ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
337
            decoded_size = data_size;
338
          }
339
        } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) {
340
          decoded_size = source_data_size;
341
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size))
342
          {
343
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
344
              copying = workspace + data_size;
345
            else {
346
              ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
347
              decoded_size = data_size;
348
            }
349
          } else {
350
            if (!tensor)
351
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
352
            ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
353
            decoded_size = data_size;
354
          }
355
        } else
356
          { assert(0); }
357
      }
358
      assert(tensor_out[0]->dataof == 0);
359
      mpmemcpy(tensor_out[0]->data.u8, tensor_out[0]->dataof, tensor_out[0]->info.type, copying, 0, CCV_TENSOR_CPU_MEMORY, decoded_size);
360
      ccfree(workspace);
361
    } else {
362
      if (!options || !options->decode)
363
      {
364
        if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
365
          ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
366
        else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F)
367
          ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
368
        else
369
          { assert(0); }
370
      } else {
371
        void* const workspace = ccmalloc(source_data_size);
372
        if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
373
        {
374
          size_t decoded_size = source_data_size;
375
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
376
          {
377
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
378
            {
379
              if (decoded_size > 0)
380
                memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size));
381
            } else
382
              ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
383
          } else {
384
            if (!tensor)
385
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
386
            ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
387
          }
388
        } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) {
389
          size_t decoded_size = source_data_size;
390
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
391
          {
392
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
393
            {
394
              if (decoded_size > 0)
395
                memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size));
396
            } else
397
              ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
398
          } else {
399
            if (!tensor)
400
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
401
            ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
402
          }
403
        } else
404
          { assert(0); }
405
        ccfree(workspace);
406
      }
407
    }
408
#else
409
    if (!options || !options->decode)
410
    {
411
      if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
412
        ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
413
      else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F)
414
        ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
415
      else
416
        { assert(0); }
417
    } else {
418
      void* const workspace = ccmalloc(source_data_size);
419
      if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
420
      {
421
        size_t decoded_size = source_data_size;
422
        if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
423
        {
424
          if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
425
          {
426
            if (decoded_size > 0)
427
              memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size));
428
          } else
429
            ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
430
        } else {
431
          if (!tensor)
432
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
433
          ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
434
        }
435
      } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) {
436
        size_t decoded_size = source_data_size;
437
        if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
438
        {
439
          if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
440
          {
441
            if (decoded_size > 0)
442
              memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size));
443
          } else
444
            ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
445
        } else {
446
          if (!tensor)
447
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
448
          ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
449
        }
450
      } else
451
        { assert(0); }
452
      ccfree(workspace);
453
    }
454
#endif
455
22
  } else {
456
    // If it is QX, we need to have a custom decoder to decode properly.
457
22
    if (datatype != tensor_params.datatype)
458
0
      { assert(options && options->decode); }
459
22
    size_t data_size = ccv_nnc_tensor_data_size(tensor_params);
460
22
#ifdef HAVE_CUDA
461
22
    if (!options || 
!options->decode8
)
462
14
    {
463
14
      if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
464
1
        cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
465
13
      else
466
13
        memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
467
14
    } else {
468
8
      if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
469
2
      {
470
2
        void* const workspace = ccmalloc(data_size);
471
2
        size_t decoded_size = data_size;
472
2
        if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
473
1
          cumemcpy(tensor_out[0]->data.u8, tensor_out[0]->info.type, workspace, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size));
474
1
        else {
475
1
          if (!tensor)
476
1
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
477
1
          cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
478
1
        }
479
2
        ccfree(workspace);
480
6
      } else {
481
6
        size_t decoded_size = data_size;
482
6
        if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? 
tensor->data.u84
:
02
, &decoded_size))
483
3
        {
484
3
          if (!tensor)
485
1
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
486
3
          memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
487
3
        }
488
6
      }
489
8
    }
490
#elif defined(HAVE_MPS)
491
    if (!options || !options->decode)
492
    {
493
      if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
494
      {
495
        assert(tensor->dataof == 0);
496
        mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, data, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
497
      } else
498
        memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
499
    } else {
500
      if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
501
      {
502
        if (tensor)
503
          { assert(tensor->dataof == 0); }
504
        void* const workspace = ccmalloc(data_size);
505
        size_t decoded_size = data_size;
506
        if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) {
507
          mpmemcpy(tensor_out[0]->data.u8, tensor_out[0]->dataof, tensor_out[0]->info.type, workspace, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size));
508
        } else {
509
          if (!tensor)
510
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
511
          mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, data, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
512
        }
513
        ccfree(workspace);
514
      } else {
515
        size_t decoded_size = data_size;
516
        if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u8 : 0, &decoded_size))
517
        {
518
          if (!tensor)
519
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
520
          memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
521
        }
522
      }
523
    }
524
#else
525
    if (!options || !options->decode)
526
      memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
527
    else {
528
      size_t decoded_size = data_size;
529
      if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u8 : 0, &decoded_size))
530
      {
531
        if (!tensor)
532
          *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
533
        memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
534
      }
535
    }
536
#endif
537
22
  }
538
34
  tensor_out[0]->type &= ~CCV_GARBAGE; // If it is marked as garbage, remove that mark now.
539
34
  sqlite3_reset(tensor_select_stmt);
540
34
  sqlite3_clear_bindings(tensor_select_stmt);
541
34
  sqlite3_finalize(tensor_select_stmt);
542
34
  return CCV_IO_FINAL;
543
34
}
544