Coverage Report

Created: 2025-05-06 15:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor_io.c
Line
Count
Source
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_internal.h"
5
#include "_ccv_nnc_symbolic_graph.h"
6
#include "3rdparty/sqlite3/sqlite3.h"
7
#ifdef HAVE_CUDA
8
#include "gpu/ccv_nnc_compat.h"
9
#elif HAVE_MPS
10
#include "mps/ccv_nnc_mps.h"
11
#endif
12
13
#ifdef NDEBUG
14
#define SQLITE_ENFORCE(stmt) (void)(stmt)
15
#else
16
60
#define SQLITE_ENFORCE assert
17
#endif
18
19
// MARK - Level-1 API
20
21
int ccv_nnc_tensor_write(const ccv_nnc_tensor_t* const tensor, void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options)
22
30
{
23
30
  assert(CCV_IS_TENSOR_CONTIGUOUS(tensor));
24
30
  assert(name);
25
30
  sqlite3* conn = (sqlite3*)handle;
26
30
  if (!conn)
27
0
    return CCV_IO_ERROR;
28
30
  const char tensor_create_table_qs[] = "CREATE TABLE IF NOT EXISTS tensors "
29
30
    "(name TEXT, type INTEGER, format INTEGER, datatype INTEGER, "
30
30
    "dim BLOB, data BLOB, PRIMARY KEY (name))";
31
30
  SQLITE_ENFORCE(SQLITE_OK == sqlite3_exec(conn, tensor_create_table_qs, 0, 0, 0));
32
30
  const char tensor_insert_qs[] =
33
30
    "REPLACE INTO tensors "
34
30
    "(name, type, format, datatype, dim, data) VALUES ("
35
30
    "$name, $type, $format, $datatype, $dim, $data)";
36
30
  sqlite3_stmt* tensor_insert_stmt = 0;
37
30
  SQLITE_ENFORCE(SQLITE_OK == sqlite3_prepare_v2(conn, tensor_insert_qs, sizeof(tensor_insert_qs), &tensor_insert_stmt, 0));
38
30
  sqlite3_bind_text(tensor_insert_stmt, 1, name, -1, 0);
39
30
  ccv_nnc_tensor_param_t params = tensor->info;
40
30
  const size_t data_size = ccv_nnc_tensor_data_size_without_padding(tensor->info);
41
30
  unsigned char* workspace = 0;
42
30
  unsigned int identifier = 0;
43
30
#ifdef HAVE_CUDA
44
30
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY)
45
9
  {
46
9
    if (!options || 
!options->encode6
)
47
3
    {
48
3
      workspace = ccmalloc(data_size);
49
3
      cumemcpy(workspace, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, data_size);
50
3
      sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0);
51
6
    } else {
52
6
      workspace = ccmalloc(data_size * 2 + 4);
53
6
      cumemcpy(workspace, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, data_size);
54
6
      size_t encoded_size = data_size + 4;
55
6
      if (options->encode(workspace, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace + data_size, &encoded_size, &params, &identifier))
56
3
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace + data_size, encoded_size, 0);
57
3
      else
58
3
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0);
59
6
    }
60
21
  } else {
61
21
    if (!options || 
!options->encode6
)
62
15
      sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
63
6
    else {
64
6
      workspace = ccmalloc(data_size + 4);
65
6
      size_t encoded_size = data_size + 4;
66
6
      if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, &params, &identifier))
67
3
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0);
68
3
      else
69
3
        sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
70
6
    }
71
21
  }
72
#elif defined(HAVE_MPS)
73
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY)
74
  {
75
    if (!options || !options->encode)
76
    {
77
      workspace = ccmalloc(data_size);
78
      mpmemcpy(workspace, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->dataof, tensor->info.type, data_size);
79
      sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0);
80
    } else {
81
      workspace = ccmalloc(data_size * 2 + 4);
82
      mpmemcpy(workspace, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->dataof, tensor->info.type, data_size);
83
      size_t encoded_size = data_size + 4;
84
      if (options->encode(workspace, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace + data_size, &encoded_size, &params, &identifier))
85
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace + data_size, encoded_size, 0);
86
      else
87
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0);
88
    }
89
  } else {
90
    if (!options || !options->encode)
91
      sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
92
    else {
93
      workspace = ccmalloc(data_size + 4); // Allocate extra 4 bytes in case we need to copy the QX tensor out.
94
      size_t encoded_size = data_size + 4;
95
      if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, &params, &identifier))
96
        sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0);
97
      else
98
        sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
99
    }
100
  }
101
#else
102
  if (!options || !options->encode)
103
    sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
104
  else {
105
    workspace = ccmalloc(data_size + 4);
106
    size_t encoded_size = data_size + 4;
107
    if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, &params, &identifier))
108
      sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0);
109
    else
110
      sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0);
111
  }
112
#endif
113
30
  sqlite3_bind_int64(tensor_insert_stmt, 2, ((sqlite_int64)identifier << 32) | params.type);
114
30
  sqlite3_bind_int(tensor_insert_stmt, 3, params.format);
115
30
  sqlite3_bind_int64(tensor_insert_stmt, 4, ((sqlite_int64)params.reserved << 32) | params.datatype);
116
30
  sqlite3_bind_blob(tensor_insert_stmt, 5, params.dim, sizeof(params.dim), 0);
117
30
  const int result = sqlite3_step(tensor_insert_stmt);
118
30
  sqlite3_reset(tensor_insert_stmt);
119
30
  sqlite3_clear_bindings(tensor_insert_stmt);
120
30
  sqlite3_finalize(tensor_insert_stmt);
121
30
  if (workspace)
122
15
    free(workspace);
123
30
  return result == SQLITE_DONE ? CCV_IO_FINAL : 
CCV_IO_ERROR0
;
124
30
}
125
126
int ccv_nnc_tensor_read(void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options, const int flags, const ccv_nnc_tensor_param_t* const tensor_params_optional, ccv_nnc_tensor_t** const tensor_out)
127
36
{
128
36
  assert(name);
129
36
  sqlite3* conn = (sqlite3*)handle;
130
36
  if (!conn)
131
0
    return CCV_IO_ERROR;
132
36
  const char tensor_select_qs[] =
133
36
    "SELECT data, type, format, datatype, dim FROM tensors WHERE name=$name";
134
36
  sqlite3_stmt* tensor_select_stmt = 0;
135
36
  if (SQLITE_OK != sqlite3_prepare_v2(conn, tensor_select_qs, sizeof(tensor_select_qs), &tensor_select_stmt, 0))
136
0
    return CCV_IO_ERROR;
137
36
  sqlite3_bind_text(tensor_select_stmt, 1, name, -1, 0);
138
36
  if (SQLITE_ROW != sqlite3_step(tensor_select_stmt))
139
0
  {
140
0
    sqlite3_finalize(tensor_select_stmt);
141
0
    return CCV_IO_ERROR;
142
0
  }
143
36
  ccv_nnc_tensor_t* tensor = *tensor_out;
144
36
  ccv_nnc_tensor_param_t tensor_params;
145
36
  int datatype = 0;
146
36
  unsigned int identifier = 0;
147
36
  if (!tensor) // If the tensor is not provided, we need to create one.
148
18
  {
149
18
    if (tensor_params_optional)
150
10
    {
151
10
      identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff;
152
10
      datatype = sqlite3_column_int64(tensor_select_stmt, 3) & 0xffffffff;
153
10
      tensor_params = *tensor_params_optional;
154
10
      assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY));
155
10
    } else {
156
8
      const sqlite_int64 type = sqlite3_column_int64(tensor_select_stmt, 1);
157
8
      identifier = (type >> 32) & 0xffffffff;
158
8
      tensor_params.type = (type & 0xffffffff);
159
8
      tensor_params.format = sqlite3_column_int(tensor_select_stmt, 2);
160
8
      const sqlite_int64 datatype_mix = sqlite3_column_int64(tensor_select_stmt, 3);
161
8
      datatype = tensor_params.datatype = (datatype_mix & 0xffffffff);
162
8
      tensor_params.reserved = (datatype_mix >> 32) & 0xffffffff;
163
8
      const void* const dim = sqlite3_column_blob(tensor_select_stmt, 4);
164
8
      memcpy(tensor_params.dim, dim, ccv_min(sizeof(tensor_params.dim), sqlite3_column_bytes(tensor_select_stmt, 4)));
165
8
    }
166
18
    if (flags & CCV_NNC_TENSOR_READ_CPU_MEMORY) // Reset type to CPU memory.
167
0
      tensor_params.type = (tensor_params.type & 0xfff00000) | CCV_TENSOR_CPU_MEMORY;
168
18
    if (!options || 
!options->decode4
)
169
14
    {
170
14
      if (flags & CCV_NNC_TENSOR_READ_METADATA_ONLY)
171
2
      {
172
2
        *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, CCV_NO_DATA_ALLOC); // Set the data point to 1 so it is allocated without data.
173
2
        assert(tensor->data.u8 == 0); // Set it back to 0.
174
        // Already done loading metadata, return.
175
2
        sqlite3_reset(tensor_select_stmt);
176
2
        sqlite3_clear_bindings(tensor_select_stmt);
177
2
        sqlite3_finalize(tensor_select_stmt);
178
2
        return CCV_IO_FINAL;
179
2
      } else
180
12
        *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
181
14
    } else {
182
4
      assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY));
183
4
    }
184
18
  } else {
185
18
    identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff;
186
18
    datatype = sqlite3_column_int(tensor_select_stmt, 3) & 0xffffffff;
187
18
    tensor_params = tensor->info;
188
18
    assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY));
189
18
  }
190
34
  const void* const data = sqlite3_column_blob(tensor_select_stmt, 0);
191
34
  int dim[CCV_NNC_MAX_DIM_ALLOC];
192
34
  memcpy(dim, sqlite3_column_blob(tensor_select_stmt, 4), ccv_min(sizeof(dim), sqlite3_column_bytes(tensor_select_stmt, 4)));
193
34
  const int nd = ccv_nnc_tensor_nd(dim);
194
34
  if (datatype != tensor_params.datatype && 
CCV_GET_DATA_TYPE12
(tensor_params.datatype) != CCV_QX12
)
195
12
  {
196
    // Only ever works for 16F to 32F or 32F to 16F transparently.
197
12
    assert((datatype == CCV_16F && tensor_params.datatype == CCV_32F) || (datatype == CCV_32F && tensor_params.datatype == CCV_16F));
198
12
    const size_t tensor_count = ccv_nnc_tensor_count(tensor_params);
199
12
    ccv_nnc_tensor_param_t params = tensor_params;
200
12
    params.datatype = datatype;
201
12
    const size_t source_data_size = ccv_nnc_tensor_data_size(params);
202
12
#ifdef HAVE_CUDA
203
12
    if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
204
6
    {
205
6
      const size_t data_size = ccv_nnc_tensor_data_size(tensor_params);
206
6
      unsigned char* workspace;
207
6
      unsigned char* copying;
208
6
      size_t decoded_size = data_size;
209
6
      if (!options || 
!options->decode4
)
210
2
      {
211
2
        copying = workspace = ccmalloc(data_size);
212
2
        if (datatype == CCV_16F && 
tensor_params.datatype == CCV_32F1
)
213
1
          ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
214
1
        else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F)
215
1
          ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
216
0
        else
217
0
          { assert(0); }
218
4
      } else {
219
4
        copying = workspace = ccmalloc(data_size + source_data_size);
220
4
        if (datatype == CCV_16F && 
tensor_params.datatype == CCV_32F2
)
221
2
        {
222
2
          decoded_size = source_data_size;
223
2
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size))
224
1
          {
225
            // If we loaded quantized tensor, don't do the conversion.
226
1
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
227
0
              copying = workspace + data_size;
228
1
            else {
229
1
              ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
230
1
              decoded_size = data_size;
231
1
            }
232
1
          } else {
233
1
            if (!tensor)
234
0
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
235
1
            ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
236
1
            decoded_size = data_size;
237
1
          }
238
2
        } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) {
239
2
          decoded_size = source_data_size;
240
2
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size))
241
1
          {
242
1
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
243
0
              copying = workspace + data_size;
244
1
            else {
245
1
              ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
246
1
              decoded_size = data_size;
247
1
            }
248
1
          } else {
249
1
            if (!tensor)
250
0
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
251
1
            ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
252
1
            decoded_size = data_size;
253
1
          }
254
2
        } else
255
0
          { assert(0); }
256
4
      }
257
6
      cumemcpy(tensor_out[0]->data.u8, tensor_out[0]->info.type, copying, CCV_TENSOR_CPU_MEMORY, decoded_size);
258
6
      ccfree(workspace);
259
6
    } else {
260
6
      if (!options || 
!options->decode4
)
261
2
      {
262
2
        if (datatype == CCV_16F && 
tensor_params.datatype == CCV_32F1
)
263
1
          ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
264
1
        else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F)
265
1
          ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
266
0
        else
267
0
          { assert(0); }
268
4
      } else {
269
4
        void* const workspace = ccmalloc(source_data_size);
270
4
        if (datatype == CCV_16F && 
tensor_params.datatype == CCV_32F2
)
271
2
        {
272
2
          size_t decoded_size = source_data_size;
273
2
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
274
1
          {
275
1
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
276
0
            {
277
0
              if (decoded_size > 0)
278
0
                memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size));
279
0
            } else
280
1
              ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
281
1
          } else {
282
1
            if (!tensor)
283
0
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
284
1
            ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
285
1
          }
286
2
        } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) {
287
2
          size_t decoded_size = source_data_size;
288
2
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
289
1
          {
290
1
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
291
0
            {
292
0
              if (decoded_size > 0)
293
0
                memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size));
294
0
            } else
295
1
              ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
296
1
          } else {
297
1
            if (!tensor)
298
0
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
299
1
            ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
300
1
          }
301
2
        } else
302
0
          { assert(0); }
303
4
        ccfree(workspace);
304
4
      }
305
6
    }
306
#elif defined(HAVE_MPS)
307
    if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
308
    {
309
      const size_t data_size = ccv_nnc_tensor_data_size(tensor_params);
310
      unsigned char* workspace;
311
      unsigned char* copying;
312
      size_t decoded_size = data_size;
313
      if (!options || !options->decode)
314
      {
315
        copying = workspace = ccmalloc(data_size);
316
        if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
317
          ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
318
        else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F)
319
          ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
320
        else
321
          { assert(0); }
322
      } else {
323
        copying = workspace = ccmalloc(data_size + source_data_size);
324
        if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
325
        {
326
          decoded_size = source_data_size;
327
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size))
328
          {
329
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
330
              copying = workspace + data_size;
331
            else {
332
              ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
333
              decoded_size = data_size;
334
            }
335
          } else {
336
            if (!tensor)
337
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
338
            ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
339
            decoded_size = data_size;
340
          }
341
        } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) {
342
          decoded_size = source_data_size;
343
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size))
344
          {
345
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
346
              copying = workspace + data_size;
347
            else {
348
              ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
349
              decoded_size = data_size;
350
            }
351
          } else {
352
            if (!tensor)
353
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
354
            ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
355
            decoded_size = data_size;
356
          }
357
        } else
358
          { assert(0); }
359
      }
360
      assert(tensor_out[0]->dataof == 0);
361
      mpmemcpy(tensor_out[0]->data.u8, tensor_out[0]->dataof, tensor_out[0]->info.type, copying, 0, CCV_TENSOR_CPU_MEMORY, decoded_size);
362
      ccfree(workspace);
363
    } else {
364
      if (!options || !options->decode)
365
      {
366
        if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
367
          ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
368
        else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F)
369
          ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
370
        else
371
          { assert(0); }
372
      } else {
373
        void* const workspace = ccmalloc(source_data_size);
374
        if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
375
        {
376
          size_t decoded_size = source_data_size;
377
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
378
          {
379
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
380
            {
381
              if (decoded_size > 0)
382
                memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size));
383
            } else
384
              ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
385
          } else {
386
            if (!tensor)
387
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
388
            ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
389
          }
390
        } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) {
391
          size_t decoded_size = source_data_size;
392
          if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
393
          {
394
            if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
395
            {
396
              if (decoded_size > 0)
397
                memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size));
398
            } else
399
              ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
400
          } else {
401
            if (!tensor)
402
              *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
403
            ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
404
          }
405
        } else
406
          { assert(0); }
407
        ccfree(workspace);
408
      }
409
    }
410
#else
411
    if (!options || !options->decode)
412
    {
413
      if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
414
        ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
415
      else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F)
416
        ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
417
      else
418
        { assert(0); }
419
    } else {
420
      void* const workspace = ccmalloc(source_data_size);
421
      if (datatype == CCV_16F && tensor_params.datatype == CCV_32F)
422
      {
423
        size_t decoded_size = source_data_size;
424
        if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
425
        {
426
          if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
427
          {
428
            if (decoded_size > 0)
429
              memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size));
430
          } else
431
            ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t)));
432
        } else {
433
          if (!tensor)
434
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
435
          ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t)));
436
        }
437
      } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) {
438
        size_t decoded_size = source_data_size;
439
        if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
440
        {
441
          if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX)
442
          {
443
            if (decoded_size > 0)
444
              memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size));
445
          } else
446
            ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float)));
447
        } else {
448
          if (!tensor)
449
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
450
          ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float)));
451
        }
452
      } else
453
        { assert(0); }
454
      ccfree(workspace);
455
    }
456
#endif
457
22
  } else {
458
    // If it is QX, we need to have a custom decoder to decode properly.
459
22
    if (datatype != tensor_params.datatype)
460
0
      { assert(options && options->decode); }
461
22
    size_t data_size = ccv_nnc_tensor_data_size(tensor_params);
462
22
#ifdef HAVE_CUDA
463
22
    if (!options || 
!options->decode8
)
464
14
    {
465
14
      if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
466
1
        cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
467
13
      else
468
13
        memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
469
14
    } else {
470
8
      if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
471
2
      {
472
2
        void* const workspace = ccmalloc(data_size);
473
2
        size_t decoded_size = data_size;
474
2
        if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size))
475
1
          cumemcpy(tensor_out[0]->data.u8, tensor_out[0]->info.type, workspace, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size));
476
1
        else {
477
1
          if (!tensor)
478
1
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
479
1
          cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
480
1
        }
481
2
        ccfree(workspace);
482
6
      } else {
483
6
        size_t decoded_size = data_size;
484
6
        if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? 
tensor->data.u84
:
02
, &decoded_size))
485
3
        {
486
3
          if (!tensor)
487
1
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
488
3
          memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
489
3
        }
490
6
      }
491
8
    }
492
#elif defined(HAVE_MPS)
493
    if (!options || !options->decode)
494
    {
495
      if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
496
      {
497
        assert(tensor->dataof == 0);
498
        mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, data, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
499
      } else
500
        memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
501
    } else {
502
      if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY)
503
      {
504
        if (tensor)
505
          { assert(tensor->dataof == 0); }
506
        void* const workspace = ccmalloc(data_size);
507
        size_t decoded_size = data_size;
508
        if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) {
509
          mpmemcpy(tensor_out[0]->data.u8, tensor_out[0]->dataof, tensor_out[0]->info.type, workspace, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size));
510
        } else {
511
          if (!tensor)
512
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
513
          mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, data, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
514
        }
515
        ccfree(workspace);
516
      } else {
517
        size_t decoded_size = data_size;
518
        if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u8 : 0, &decoded_size))
519
        {
520
          if (!tensor)
521
            *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
522
          memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
523
        }
524
      }
525
    }
526
#else
527
    if (!options || !options->decode)
528
      memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
529
    else {
530
      size_t decoded_size = data_size;
531
      if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u8 : 0, &decoded_size))
532
      {
533
        if (!tensor)
534
          *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0);
535
        memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0)));
536
      }
537
    }
538
#endif
539
22
  }
540
34
  tensor_out[0]->type &= ~CCV_GARBAGE; // If it is marked as garbage, remove that mark now.
541
34
  sqlite3_reset(tensor_select_stmt);
542
34
  sqlite3_clear_bindings(tensor_select_stmt);
543
34
  sqlite3_finalize(tensor_select_stmt);
544
34
  return CCV_IO_FINAL;
545
34
}
546