/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor_io.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "ccv_nnc.h" |
2 | | #include "ccv_nnc_easy.h" |
3 | | #include "ccv_nnc_internal.h" |
4 | | #include "ccv_internal.h" |
5 | | #include "_ccv_nnc_symbolic_graph.h" |
6 | | #include "3rdparty/sqlite3/sqlite3.h" |
7 | | #ifdef HAVE_CUDA |
8 | | #include "gpu/ccv_nnc_compat.h" |
9 | | #elif HAVE_MPS |
10 | | #include "mps/ccv_nnc_mps.h" |
11 | | #endif |
12 | | |
13 | | #ifdef NDEBUG |
14 | | #define SQLITE_ENFORCE(stmt) (void)(stmt) |
15 | | #else |
16 | 60 | #define SQLITE_ENFORCE assert |
17 | | #endif |
18 | | |
19 | | // MARK - Level-1 API |
20 | | |
21 | | int ccv_nnc_tensor_write(const ccv_nnc_tensor_t* const tensor, void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options) |
22 | 30 | { |
23 | 30 | assert(CCV_IS_TENSOR_CONTIGUOUS(tensor)); |
24 | 30 | assert(name); |
25 | 30 | sqlite3* conn = (sqlite3*)handle; |
26 | 30 | if (!conn) |
27 | 0 | return CCV_IO_ERROR; |
28 | 30 | const char tensor_create_table_qs[] = "CREATE TABLE IF NOT EXISTS tensors " |
29 | 30 | "(name TEXT, type INTEGER, format INTEGER, datatype INTEGER, " |
30 | 30 | "dim BLOB, data BLOB, PRIMARY KEY (name))"; |
31 | 30 | SQLITE_ENFORCE(SQLITE_OK == sqlite3_exec(conn, tensor_create_table_qs, 0, 0, 0)); |
32 | 30 | const char tensor_insert_qs[] = |
33 | 30 | "REPLACE INTO tensors " |
34 | 30 | "(name, type, format, datatype, dim, data) VALUES (" |
35 | 30 | "$name, $type, $format, $datatype, $dim, $data)"; |
36 | 30 | sqlite3_stmt* tensor_insert_stmt = 0; |
37 | 30 | SQLITE_ENFORCE(SQLITE_OK == sqlite3_prepare_v2(conn, tensor_insert_qs, sizeof(tensor_insert_qs), &tensor_insert_stmt, 0)); |
38 | 30 | sqlite3_bind_text(tensor_insert_stmt, 1, name, -1, 0); |
39 | 30 | ccv_nnc_tensor_param_t params = tensor->info; |
40 | 30 | const size_t data_size = ccv_nnc_tensor_data_size_without_padding(tensor->info); |
41 | 30 | unsigned char* workspace = 0; |
42 | 30 | unsigned int identifier = 0; |
43 | 30 | #ifdef HAVE_CUDA |
44 | 30 | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY) |
45 | 9 | { |
46 | 9 | if (!options || !options->encode6 ) |
47 | 3 | { |
48 | 3 | workspace = ccmalloc(data_size); |
49 | 3 | cumemcpy(workspace, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, data_size); |
50 | 3 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
51 | 6 | } else { |
52 | 6 | workspace = ccmalloc(data_size * 2 + 4); |
53 | 6 | cumemcpy(workspace, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, data_size); |
54 | 6 | size_t encoded_size = data_size + 4; |
55 | 6 | if (options->encode(workspace, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace + data_size, &encoded_size, ¶ms, &identifier)) |
56 | 3 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace + data_size, encoded_size, 0); |
57 | 3 | else |
58 | 3 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
59 | 6 | } |
60 | 21 | } else { |
61 | 21 | if (!options || !options->encode6 ) |
62 | 15 | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
63 | 6 | else { |
64 | 6 | workspace = ccmalloc(data_size + 4); |
65 | 6 | size_t encoded_size = data_size + 4; |
66 | 6 | if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, ¶ms, &identifier)) |
67 | 3 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0); |
68 | 3 | else |
69 | 3 | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
70 | 6 | } |
71 | 21 | } |
72 | | #elif defined(HAVE_MPS) |
73 | | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY) |
74 | | { |
75 | | if (!options || !options->encode) |
76 | | { |
77 | | workspace = ccmalloc(data_size); |
78 | | mpmemcpy(workspace, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->dataof, tensor->info.type, data_size); |
79 | | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
80 | | } else { |
81 | | workspace = ccmalloc(data_size * 2 + 4); |
82 | | mpmemcpy(workspace, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->dataof, tensor->info.type, data_size); |
83 | | size_t encoded_size = data_size + 4; |
84 | | if (options->encode(workspace, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace + data_size, &encoded_size, ¶ms, &identifier)) |
85 | | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace + data_size, encoded_size, 0); |
86 | | else |
87 | | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
88 | | } |
89 | | } else { |
90 | | if (!options || !options->encode) |
91 | | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
92 | | else { |
93 | | workspace = ccmalloc(data_size + 4); // Allocate extra 4 bytes in case we need to copy the QX tensor out. |
94 | | size_t encoded_size = data_size + 4; |
95 | | if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, ¶ms, &identifier)) |
96 | | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0); |
97 | | else |
98 | | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
99 | | } |
100 | | } |
101 | | #else |
102 | | if (!options || !options->encode) |
103 | | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
104 | | else { |
105 | | workspace = ccmalloc(data_size + 4); |
106 | | size_t encoded_size = data_size + 4; |
107 | | if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, ¶ms, &identifier)) |
108 | | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0); |
109 | | else |
110 | | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
111 | | } |
112 | | #endif |
113 | 30 | sqlite3_bind_int64(tensor_insert_stmt, 2, ((sqlite_int64)identifier << 32) | params.type); |
114 | 30 | sqlite3_bind_int(tensor_insert_stmt, 3, params.format); |
115 | 30 | sqlite3_bind_int64(tensor_insert_stmt, 4, ((sqlite_int64)params.reserved << 32) | params.datatype); |
116 | 30 | sqlite3_bind_blob(tensor_insert_stmt, 5, params.dim, sizeof(params.dim), 0); |
117 | 30 | sqlite3_step(tensor_insert_stmt); |
118 | 30 | sqlite3_reset(tensor_insert_stmt); |
119 | 30 | sqlite3_clear_bindings(tensor_insert_stmt); |
120 | 30 | sqlite3_finalize(tensor_insert_stmt); |
121 | 30 | if (workspace) |
122 | 15 | free(workspace); |
123 | 30 | return CCV_IO_FINAL; |
124 | 30 | } |
125 | | |
126 | | int ccv_nnc_tensor_read(void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options, const int flags, const ccv_nnc_tensor_param_t* const tensor_params_optional, ccv_nnc_tensor_t** const tensor_out) |
127 | 36 | { |
128 | 36 | assert(name); |
129 | 36 | sqlite3* conn = (sqlite3*)handle; |
130 | 36 | if (!conn) |
131 | 0 | return CCV_IO_ERROR; |
132 | 36 | const char tensor_select_qs[] = |
133 | 36 | "SELECT data, type, format, datatype, dim FROM tensors WHERE name=$name"; |
134 | 36 | sqlite3_stmt* tensor_select_stmt = 0; |
135 | 36 | if (SQLITE_OK != sqlite3_prepare_v2(conn, tensor_select_qs, sizeof(tensor_select_qs), &tensor_select_stmt, 0)) |
136 | 0 | return CCV_IO_ERROR; |
137 | 36 | sqlite3_bind_text(tensor_select_stmt, 1, name, -1, 0); |
138 | 36 | if (SQLITE_ROW != sqlite3_step(tensor_select_stmt)) |
139 | 0 | { |
140 | 0 | sqlite3_finalize(tensor_select_stmt); |
141 | 0 | return CCV_IO_ERROR; |
142 | 0 | } |
143 | 36 | ccv_nnc_tensor_t* tensor = *tensor_out; |
144 | 36 | ccv_nnc_tensor_param_t tensor_params; |
145 | 36 | int datatype = 0; |
146 | 36 | unsigned int identifier = 0; |
147 | 36 | if (!tensor) // If the tensor is not provided, we need to create one. |
148 | 18 | { |
149 | 18 | if (tensor_params_optional) |
150 | 10 | { |
151 | 10 | identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff; |
152 | 10 | datatype = sqlite3_column_int64(tensor_select_stmt, 3) & 0xffffffff; |
153 | 10 | tensor_params = *tensor_params_optional; |
154 | 10 | assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY)); |
155 | 10 | } else { |
156 | 8 | const sqlite_int64 type = sqlite3_column_int64(tensor_select_stmt, 1); |
157 | 8 | identifier = (type >> 32) & 0xffffffff; |
158 | 8 | tensor_params.type = (type & 0xffffffff); |
159 | 8 | tensor_params.format = sqlite3_column_int(tensor_select_stmt, 2); |
160 | 8 | const sqlite_int64 datatype_mix = sqlite3_column_int64(tensor_select_stmt, 3); |
161 | 8 | datatype = tensor_params.datatype = (datatype_mix & 0xffffffff); |
162 | 8 | tensor_params.reserved = (datatype_mix >> 32) & 0xffffffff; |
163 | 8 | const void* const dim = sqlite3_column_blob(tensor_select_stmt, 4); |
164 | 8 | memcpy(tensor_params.dim, dim, ccv_min(sizeof(tensor_params.dim), sqlite3_column_bytes(tensor_select_stmt, 4))); |
165 | 8 | } |
166 | 18 | if (!options || !options->decode4 ) |
167 | 14 | { |
168 | 14 | if (flags & CCV_NNC_TENSOR_READ_METADATA_ONLY) |
169 | 2 | { |
170 | 2 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, CCV_NO_DATA_ALLOC); // Set the data point to 1 so it is allocated without data. |
171 | 2 | assert(tensor->data.u8 == 0); // Set it back to 0. |
172 | | // Already done loading metadata, return. |
173 | 2 | sqlite3_reset(tensor_select_stmt); |
174 | 2 | sqlite3_clear_bindings(tensor_select_stmt); |
175 | 2 | sqlite3_finalize(tensor_select_stmt); |
176 | 2 | return CCV_IO_FINAL; |
177 | 2 | } else |
178 | 12 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
179 | 14 | } else { |
180 | 4 | assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY)); |
181 | 4 | } |
182 | 18 | } else { |
183 | 18 | identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff; |
184 | 18 | datatype = sqlite3_column_int(tensor_select_stmt, 3) & 0xffffffff; |
185 | 18 | tensor_params = tensor->info; |
186 | 18 | assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY)); |
187 | 18 | } |
188 | 34 | const void* const data = sqlite3_column_blob(tensor_select_stmt, 0); |
189 | 34 | int dim[CCV_NNC_MAX_DIM_ALLOC]; |
190 | 34 | memcpy(dim, sqlite3_column_blob(tensor_select_stmt, 4), ccv_min(sizeof(dim), sqlite3_column_bytes(tensor_select_stmt, 4))); |
191 | 34 | const int nd = ccv_nnc_tensor_nd(dim); |
192 | 34 | if (datatype != tensor_params.datatype && CCV_GET_DATA_TYPE12 (tensor_params.datatype) != CCV_QX12 ) |
193 | 12 | { |
194 | | // Only ever works for 16F to 32F or 32F to 16F transparently. |
195 | 12 | assert((datatype == CCV_16F && tensor_params.datatype == CCV_32F) || (datatype == CCV_32F && tensor_params.datatype == CCV_16F)); |
196 | 12 | const size_t tensor_count = ccv_nnc_tensor_count(tensor_params); |
197 | 12 | ccv_nnc_tensor_param_t params = tensor_params; |
198 | 12 | params.datatype = datatype; |
199 | 12 | const size_t source_data_size = ccv_nnc_tensor_data_size(params); |
200 | 12 | #ifdef HAVE_CUDA |
201 | 12 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
202 | 6 | { |
203 | 6 | const size_t data_size = ccv_nnc_tensor_data_size(tensor_params); |
204 | 6 | unsigned char* workspace; |
205 | 6 | unsigned char* copying; |
206 | 6 | size_t decoded_size = data_size; |
207 | 6 | if (!options || !options->decode4 ) |
208 | 2 | { |
209 | 2 | copying = workspace = ccmalloc(data_size); |
210 | 2 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F1 ) |
211 | 1 | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
212 | 1 | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
213 | 1 | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
214 | 0 | else |
215 | 0 | { assert(0); } |
216 | 4 | } else { |
217 | 4 | copying = workspace = ccmalloc(data_size + source_data_size); |
218 | 4 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F2 ) |
219 | 2 | { |
220 | 2 | decoded_size = source_data_size; |
221 | 2 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
222 | 1 | { |
223 | | // If we loaded quantized tensor, don't do the conversion. |
224 | 1 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
225 | 0 | copying = workspace + data_size; |
226 | 1 | else { |
227 | 1 | ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); |
228 | 1 | decoded_size = data_size; |
229 | 1 | } |
230 | 1 | } else { |
231 | 1 | if (!tensor) |
232 | 0 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
233 | 1 | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
234 | 1 | decoded_size = data_size; |
235 | 1 | } |
236 | 2 | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
237 | 2 | decoded_size = source_data_size; |
238 | 2 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
239 | 1 | { |
240 | 1 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
241 | 0 | copying = workspace + data_size; |
242 | 1 | else { |
243 | 1 | ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); |
244 | 1 | decoded_size = data_size; |
245 | 1 | } |
246 | 1 | } else { |
247 | 1 | if (!tensor) |
248 | 0 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
249 | 1 | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
250 | 1 | decoded_size = data_size; |
251 | 1 | } |
252 | 2 | } else |
253 | 0 | { assert(0); } |
254 | 4 | } |
255 | 6 | cumemcpy(tensor_out[0]->data.u8, tensor_out[0]->info.type, copying, CCV_TENSOR_CPU_MEMORY, decoded_size); |
256 | 6 | ccfree(workspace); |
257 | 6 | } else { |
258 | 6 | if (!options || !options->decode4 ) |
259 | 2 | { |
260 | 2 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F1 ) |
261 | 1 | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
262 | 1 | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
263 | 1 | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
264 | 0 | else |
265 | 0 | { assert(0); } |
266 | 4 | } else { |
267 | 4 | void* const workspace = ccmalloc(source_data_size); |
268 | 4 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F2 ) |
269 | 2 | { |
270 | 2 | size_t decoded_size = source_data_size; |
271 | 2 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
272 | 1 | { |
273 | 1 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
274 | 0 | { |
275 | 0 | if (decoded_size > 0) |
276 | 0 | memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size)); |
277 | 0 | } else |
278 | 1 | ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); |
279 | 1 | } else { |
280 | 1 | if (!tensor) |
281 | 0 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
282 | 1 | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
283 | 1 | } |
284 | 2 | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
285 | 2 | size_t decoded_size = source_data_size; |
286 | 2 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
287 | 1 | { |
288 | 1 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
289 | 0 | { |
290 | 0 | if (decoded_size > 0) |
291 | 0 | memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size)); |
292 | 0 | } else |
293 | 1 | ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); |
294 | 1 | } else { |
295 | 1 | if (!tensor) |
296 | 0 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
297 | 1 | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
298 | 1 | } |
299 | 2 | } else |
300 | 0 | { assert(0); } |
301 | 4 | ccfree(workspace); |
302 | 4 | } |
303 | 6 | } |
304 | | #elif defined(HAVE_MPS) |
305 | | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
306 | | { |
307 | | const size_t data_size = ccv_nnc_tensor_data_size(tensor_params); |
308 | | unsigned char* workspace; |
309 | | unsigned char* copying; |
310 | | size_t decoded_size = data_size; |
311 | | if (!options || !options->decode) |
312 | | { |
313 | | copying = workspace = ccmalloc(data_size); |
314 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
315 | | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
316 | | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
317 | | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
318 | | else |
319 | | { assert(0); } |
320 | | } else { |
321 | | copying = workspace = ccmalloc(data_size + source_data_size); |
322 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
323 | | { |
324 | | decoded_size = source_data_size; |
325 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
326 | | { |
327 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
328 | | copying = workspace + data_size; |
329 | | else { |
330 | | ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); |
331 | | decoded_size = data_size; |
332 | | } |
333 | | } else { |
334 | | if (!tensor) |
335 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
336 | | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
337 | | decoded_size = data_size; |
338 | | } |
339 | | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
340 | | decoded_size = source_data_size; |
341 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
342 | | { |
343 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
344 | | copying = workspace + data_size; |
345 | | else { |
346 | | ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); |
347 | | decoded_size = data_size; |
348 | | } |
349 | | } else { |
350 | | if (!tensor) |
351 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
352 | | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
353 | | decoded_size = data_size; |
354 | | } |
355 | | } else |
356 | | { assert(0); } |
357 | | } |
358 | | assert(tensor_out[0]->dataof == 0); |
359 | | mpmemcpy(tensor_out[0]->data.u8, tensor_out[0]->dataof, tensor_out[0]->info.type, copying, 0, CCV_TENSOR_CPU_MEMORY, decoded_size); |
360 | | ccfree(workspace); |
361 | | } else { |
362 | | if (!options || !options->decode) |
363 | | { |
364 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
365 | | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
366 | | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
367 | | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
368 | | else |
369 | | { assert(0); } |
370 | | } else { |
371 | | void* const workspace = ccmalloc(source_data_size); |
372 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
373 | | { |
374 | | size_t decoded_size = source_data_size; |
375 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
376 | | { |
377 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
378 | | { |
379 | | if (decoded_size > 0) |
380 | | memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size)); |
381 | | } else |
382 | | ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); |
383 | | } else { |
384 | | if (!tensor) |
385 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
386 | | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
387 | | } |
388 | | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
389 | | size_t decoded_size = source_data_size; |
390 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
391 | | { |
392 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
393 | | { |
394 | | if (decoded_size > 0) |
395 | | memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size)); |
396 | | } else |
397 | | ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); |
398 | | } else { |
399 | | if (!tensor) |
400 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
401 | | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
402 | | } |
403 | | } else |
404 | | { assert(0); } |
405 | | ccfree(workspace); |
406 | | } |
407 | | } |
408 | | #else |
409 | | if (!options || !options->decode) |
410 | | { |
411 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
412 | | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
413 | | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
414 | | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
415 | | else |
416 | | { assert(0); } |
417 | | } else { |
418 | | void* const workspace = ccmalloc(source_data_size); |
419 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
420 | | { |
421 | | size_t decoded_size = source_data_size; |
422 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
423 | | { |
424 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
425 | | { |
426 | | if (decoded_size > 0) |
427 | | memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size)); |
428 | | } else |
429 | | ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); |
430 | | } else { |
431 | | if (!tensor) |
432 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
433 | | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
434 | | } |
435 | | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
436 | | size_t decoded_size = source_data_size; |
437 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
438 | | { |
439 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
440 | | { |
441 | | if (decoded_size > 0) |
442 | | memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size)); |
443 | | } else |
444 | | ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); |
445 | | } else { |
446 | | if (!tensor) |
447 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
448 | | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
449 | | } |
450 | | } else |
451 | | { assert(0); } |
452 | | ccfree(workspace); |
453 | | } |
454 | | #endif |
455 | 22 | } else { |
456 | | // If it is QX, we need to have a custom decoder to decode properly. |
457 | 22 | if (datatype != tensor_params.datatype) |
458 | 0 | { assert(options && options->decode); } |
459 | 22 | size_t data_size = ccv_nnc_tensor_data_size(tensor_params); |
460 | 22 | #ifdef HAVE_CUDA |
461 | 22 | if (!options || !options->decode8 ) |
462 | 14 | { |
463 | 14 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
464 | 1 | cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
465 | 13 | else |
466 | 13 | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
467 | 14 | } else { |
468 | 8 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
469 | 2 | { |
470 | 2 | void* const workspace = ccmalloc(data_size); |
471 | 2 | size_t decoded_size = data_size; |
472 | 2 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
473 | 1 | cumemcpy(tensor_out[0]->data.u8, tensor_out[0]->info.type, workspace, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size)); |
474 | 1 | else { |
475 | 1 | if (!tensor) |
476 | 1 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
477 | 1 | cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
478 | 1 | } |
479 | 2 | ccfree(workspace); |
480 | 6 | } else { |
481 | 6 | size_t decoded_size = data_size; |
482 | 6 | if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u84 : 02 , &decoded_size)) |
483 | 3 | { |
484 | 3 | if (!tensor) |
485 | 1 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
486 | 3 | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
487 | 3 | } |
488 | 6 | } |
489 | 8 | } |
490 | | #elif defined(HAVE_MPS) |
491 | | if (!options || !options->decode) |
492 | | { |
493 | | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
494 | | { |
495 | | assert(tensor->dataof == 0); |
496 | | mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, data, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
497 | | } else |
498 | | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
499 | | } else { |
500 | | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
501 | | { |
502 | | if (tensor) |
503 | | { assert(tensor->dataof == 0); } |
504 | | void* const workspace = ccmalloc(data_size); |
505 | | size_t decoded_size = data_size; |
506 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) { |
507 | | mpmemcpy(tensor_out[0]->data.u8, tensor_out[0]->dataof, tensor_out[0]->info.type, workspace, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size)); |
508 | | } else { |
509 | | if (!tensor) |
510 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
511 | | mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, data, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
512 | | } |
513 | | ccfree(workspace); |
514 | | } else { |
515 | | size_t decoded_size = data_size; |
516 | | if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u8 : 0, &decoded_size)) |
517 | | { |
518 | | if (!tensor) |
519 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
520 | | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
521 | | } |
522 | | } |
523 | | } |
524 | | #else |
525 | | if (!options || !options->decode) |
526 | | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
527 | | else { |
528 | | size_t decoded_size = data_size; |
529 | | if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u8 : 0, &decoded_size)) |
530 | | { |
531 | | if (!tensor) |
532 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
533 | | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
534 | | } |
535 | | } |
536 | | #endif |
537 | 22 | } |
538 | 34 | tensor_out[0]->type &= ~CCV_GARBAGE; // If it is marked as garbage, remove that mark now. |
539 | 34 | sqlite3_reset(tensor_select_stmt); |
540 | 34 | sqlite3_clear_bindings(tensor_select_stmt); |
541 | 34 | sqlite3_finalize(tensor_select_stmt); |
542 | 34 | return CCV_IO_FINAL; |
543 | 34 | } |
544 | | |