/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor_io.c
Line | Count | Source |
1 | | #include "ccv_nnc.h" |
2 | | #include "ccv_nnc_easy.h" |
3 | | #include "ccv_nnc_internal.h" |
4 | | #include "ccv_internal.h" |
5 | | #include "_ccv_nnc_symbolic_graph.h" |
6 | | #include "3rdparty/sqlite3/sqlite3.h" |
7 | | #ifdef HAVE_CUDA |
8 | | #include "gpu/ccv_nnc_compat.h" |
9 | | #elif HAVE_MPS |
10 | | #include "mps/ccv_nnc_mps.h" |
11 | | #endif |
12 | | |
13 | | #ifdef NDEBUG |
14 | | #define SQLITE_ENFORCE(stmt) (void)(stmt) |
15 | | #else |
16 | 60 | #define SQLITE_ENFORCE assert |
17 | | #endif |
18 | | |
19 | | // MARK - Level-1 API |
20 | | |
21 | | int ccv_nnc_tensor_write(const ccv_nnc_tensor_t* const tensor, void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options) |
22 | 30 | { |
23 | 30 | assert(CCV_IS_TENSOR_CONTIGUOUS(tensor)); |
24 | 30 | assert(name); |
25 | 30 | sqlite3* conn = (sqlite3*)handle; |
26 | 30 | if (!conn) |
27 | 0 | return CCV_IO_ERROR; |
28 | 30 | const char tensor_create_table_qs[] = "CREATE TABLE IF NOT EXISTS tensors " |
29 | 30 | "(name TEXT, type INTEGER, format INTEGER, datatype INTEGER, " |
30 | 30 | "dim BLOB, data BLOB, PRIMARY KEY (name))"; |
31 | 30 | SQLITE_ENFORCE(SQLITE_OK == sqlite3_exec(conn, tensor_create_table_qs, 0, 0, 0)); |
32 | 30 | const char tensor_insert_qs[] = |
33 | 30 | "REPLACE INTO tensors " |
34 | 30 | "(name, type, format, datatype, dim, data) VALUES (" |
35 | 30 | "$name, $type, $format, $datatype, $dim, $data)"; |
36 | 30 | sqlite3_stmt* tensor_insert_stmt = 0; |
37 | 30 | SQLITE_ENFORCE(SQLITE_OK == sqlite3_prepare_v2(conn, tensor_insert_qs, sizeof(tensor_insert_qs), &tensor_insert_stmt, 0)); |
38 | 30 | sqlite3_bind_text(tensor_insert_stmt, 1, name, -1, 0); |
39 | 30 | ccv_nnc_tensor_param_t params = tensor->info; |
40 | 30 | const size_t data_size = ccv_nnc_tensor_data_size_without_padding(tensor->info); |
41 | 30 | unsigned char* workspace = 0; |
42 | 30 | unsigned int identifier = 0; |
43 | 30 | #ifdef HAVE_CUDA |
44 | 30 | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY) |
45 | 9 | { |
46 | 9 | if (!options || !options->encode6 ) |
47 | 3 | { |
48 | 3 | workspace = ccmalloc(data_size); |
49 | 3 | cumemcpy(workspace, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, data_size); |
50 | 3 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
51 | 6 | } else { |
52 | 6 | workspace = ccmalloc(data_size * 2 + 4); |
53 | 6 | cumemcpy(workspace, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, data_size); |
54 | 6 | size_t encoded_size = data_size + 4; |
55 | 6 | if (options->encode(workspace, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace + data_size, &encoded_size, ¶ms, &identifier)) |
56 | 3 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace + data_size, encoded_size, 0); |
57 | 3 | else |
58 | 3 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
59 | 6 | } |
60 | 21 | } else { |
61 | 21 | if (!options || !options->encode6 ) |
62 | 15 | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
63 | 6 | else { |
64 | 6 | workspace = ccmalloc(data_size + 4); |
65 | 6 | size_t encoded_size = data_size + 4; |
66 | 6 | if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, ¶ms, &identifier)) |
67 | 3 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0); |
68 | 3 | else |
69 | 3 | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
70 | 6 | } |
71 | 21 | } |
72 | | #elif defined(HAVE_MPS) |
73 | | if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY) |
74 | | { |
75 | | if (!options || !options->encode) |
76 | | { |
77 | | workspace = ccmalloc(data_size); |
78 | | mpmemcpy(workspace, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->dataof, tensor->info.type, data_size); |
79 | | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
80 | | } else { |
81 | | workspace = ccmalloc(data_size * 2 + 4); |
82 | | mpmemcpy(workspace, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->dataof, tensor->info.type, data_size); |
83 | | size_t encoded_size = data_size + 4; |
84 | | if (options->encode(workspace, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace + data_size, &encoded_size, ¶ms, &identifier)) |
85 | | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace + data_size, encoded_size, 0); |
86 | | else |
87 | | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
88 | | } |
89 | | } else { |
90 | | if (!options || !options->encode) |
91 | | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
92 | | else { |
93 | | workspace = ccmalloc(data_size + 4); // Allocate extra 4 bytes in case we need to copy the QX tensor out. |
94 | | size_t encoded_size = data_size + 4; |
95 | | if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, ¶ms, &identifier)) |
96 | | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0); |
97 | | else |
98 | | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
99 | | } |
100 | | } |
101 | | #else |
102 | | if (!options || !options->encode) |
103 | | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
104 | | else { |
105 | | workspace = ccmalloc(data_size + 4); |
106 | | size_t encoded_size = data_size + 4; |
107 | | if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, ¶ms, &identifier)) |
108 | | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0); |
109 | | else |
110 | | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
111 | | } |
112 | | #endif |
113 | 30 | sqlite3_bind_int64(tensor_insert_stmt, 2, ((sqlite_int64)identifier << 32) | params.type); |
114 | 30 | sqlite3_bind_int(tensor_insert_stmt, 3, params.format); |
115 | 30 | sqlite3_bind_int64(tensor_insert_stmt, 4, ((sqlite_int64)params.reserved << 32) | params.datatype); |
116 | 30 | sqlite3_bind_blob(tensor_insert_stmt, 5, params.dim, sizeof(params.dim), 0); |
117 | 30 | const int result = sqlite3_step(tensor_insert_stmt); |
118 | 30 | sqlite3_reset(tensor_insert_stmt); |
119 | 30 | sqlite3_clear_bindings(tensor_insert_stmt); |
120 | 30 | sqlite3_finalize(tensor_insert_stmt); |
121 | 30 | if (workspace) |
122 | 15 | free(workspace); |
123 | 30 | return result == SQLITE_DONE ? CCV_IO_FINAL : CCV_IO_ERROR0 ; |
124 | 30 | } |
125 | | |
126 | | int ccv_nnc_tensor_read(void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options, const int flags, const ccv_nnc_tensor_param_t* const tensor_params_optional, ccv_nnc_tensor_t** const tensor_out) |
127 | 36 | { |
128 | 36 | assert(name); |
129 | 36 | sqlite3* conn = (sqlite3*)handle; |
130 | 36 | if (!conn) |
131 | 0 | return CCV_IO_ERROR; |
132 | 36 | const char tensor_select_qs[] = |
133 | 36 | "SELECT data, type, format, datatype, dim FROM tensors WHERE name=$name"; |
134 | 36 | sqlite3_stmt* tensor_select_stmt = 0; |
135 | 36 | if (SQLITE_OK != sqlite3_prepare_v2(conn, tensor_select_qs, sizeof(tensor_select_qs), &tensor_select_stmt, 0)) |
136 | 0 | return CCV_IO_ERROR; |
137 | 36 | sqlite3_bind_text(tensor_select_stmt, 1, name, -1, 0); |
138 | 36 | if (SQLITE_ROW != sqlite3_step(tensor_select_stmt)) |
139 | 0 | { |
140 | 0 | sqlite3_finalize(tensor_select_stmt); |
141 | 0 | return CCV_IO_ERROR; |
142 | 0 | } |
143 | 36 | ccv_nnc_tensor_t* tensor = *tensor_out; |
144 | 36 | ccv_nnc_tensor_param_t tensor_params; |
145 | 36 | int datatype = 0; |
146 | 36 | unsigned int identifier = 0; |
147 | 36 | if (!tensor) // If the tensor is not provided, we need to create one. |
148 | 18 | { |
149 | 18 | if (tensor_params_optional) |
150 | 10 | { |
151 | 10 | identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff; |
152 | 10 | datatype = sqlite3_column_int64(tensor_select_stmt, 3) & 0xffffffff; |
153 | 10 | tensor_params = *tensor_params_optional; |
154 | 10 | assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY)); |
155 | 10 | } else { |
156 | 8 | const sqlite_int64 type = sqlite3_column_int64(tensor_select_stmt, 1); |
157 | 8 | identifier = (type >> 32) & 0xffffffff; |
158 | 8 | tensor_params.type = (type & 0xffffffff); |
159 | 8 | tensor_params.format = sqlite3_column_int(tensor_select_stmt, 2); |
160 | 8 | const sqlite_int64 datatype_mix = sqlite3_column_int64(tensor_select_stmt, 3); |
161 | 8 | datatype = tensor_params.datatype = (datatype_mix & 0xffffffff); |
162 | 8 | tensor_params.reserved = (datatype_mix >> 32) & 0xffffffff; |
163 | 8 | const void* const dim = sqlite3_column_blob(tensor_select_stmt, 4); |
164 | 8 | memcpy(tensor_params.dim, dim, ccv_min(sizeof(tensor_params.dim), sqlite3_column_bytes(tensor_select_stmt, 4))); |
165 | 8 | } |
166 | 18 | if (flags & CCV_NNC_TENSOR_READ_CPU_MEMORY) // Reset type to CPU memory. |
167 | 0 | tensor_params.type = (tensor_params.type & 0xfff00000) | CCV_TENSOR_CPU_MEMORY; |
168 | 18 | if (!options || !options->decode4 ) |
169 | 14 | { |
170 | 14 | if (flags & CCV_NNC_TENSOR_READ_METADATA_ONLY) |
171 | 2 | { |
172 | 2 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, CCV_NO_DATA_ALLOC); // Set the data point to 1 so it is allocated without data. |
173 | 2 | assert(tensor->data.u8 == 0); // Set it back to 0. |
174 | | // Already done loading metadata, return. |
175 | 2 | sqlite3_reset(tensor_select_stmt); |
176 | 2 | sqlite3_clear_bindings(tensor_select_stmt); |
177 | 2 | sqlite3_finalize(tensor_select_stmt); |
178 | 2 | return CCV_IO_FINAL; |
179 | 2 | } else |
180 | 12 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
181 | 14 | } else { |
182 | 4 | assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY)); |
183 | 4 | } |
184 | 18 | } else { |
185 | 18 | identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff; |
186 | 18 | datatype = sqlite3_column_int(tensor_select_stmt, 3) & 0xffffffff; |
187 | 18 | tensor_params = tensor->info; |
188 | 18 | assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY)); |
189 | 18 | } |
190 | 34 | const void* const data = sqlite3_column_blob(tensor_select_stmt, 0); |
191 | 34 | int dim[CCV_NNC_MAX_DIM_ALLOC]; |
192 | 34 | memcpy(dim, sqlite3_column_blob(tensor_select_stmt, 4), ccv_min(sizeof(dim), sqlite3_column_bytes(tensor_select_stmt, 4))); |
193 | 34 | const int nd = ccv_nnc_tensor_nd(dim); |
194 | 34 | if (datatype != tensor_params.datatype && CCV_GET_DATA_TYPE12 (tensor_params.datatype) != CCV_QX12 ) |
195 | 12 | { |
196 | | // Only ever works for 16F to 32F or 32F to 16F transparently. |
197 | 12 | assert((datatype == CCV_16F && tensor_params.datatype == CCV_32F) || (datatype == CCV_32F && tensor_params.datatype == CCV_16F)); |
198 | 12 | const size_t tensor_count = ccv_nnc_tensor_count(tensor_params); |
199 | 12 | ccv_nnc_tensor_param_t params = tensor_params; |
200 | 12 | params.datatype = datatype; |
201 | 12 | const size_t source_data_size = ccv_nnc_tensor_data_size(params); |
202 | 12 | #ifdef HAVE_CUDA |
203 | 12 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
204 | 6 | { |
205 | 6 | const size_t data_size = ccv_nnc_tensor_data_size(tensor_params); |
206 | 6 | unsigned char* workspace; |
207 | 6 | unsigned char* copying; |
208 | 6 | size_t decoded_size = data_size; |
209 | 6 | if (!options || !options->decode4 ) |
210 | 2 | { |
211 | 2 | copying = workspace = ccmalloc(data_size); |
212 | 2 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F1 ) |
213 | 1 | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
214 | 1 | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
215 | 1 | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
216 | 0 | else |
217 | 0 | { assert(0); } |
218 | 4 | } else { |
219 | 4 | copying = workspace = ccmalloc(data_size + source_data_size); |
220 | 4 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F2 ) |
221 | 2 | { |
222 | 2 | decoded_size = source_data_size; |
223 | 2 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
224 | 1 | { |
225 | | // If we loaded quantized tensor, don't do the conversion. |
226 | 1 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
227 | 0 | copying = workspace + data_size; |
228 | 1 | else { |
229 | 1 | ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); |
230 | 1 | decoded_size = data_size; |
231 | 1 | } |
232 | 1 | } else { |
233 | 1 | if (!tensor) |
234 | 0 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
235 | 1 | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
236 | 1 | decoded_size = data_size; |
237 | 1 | } |
238 | 2 | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
239 | 2 | decoded_size = source_data_size; |
240 | 2 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
241 | 1 | { |
242 | 1 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
243 | 0 | copying = workspace + data_size; |
244 | 1 | else { |
245 | 1 | ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); |
246 | 1 | decoded_size = data_size; |
247 | 1 | } |
248 | 1 | } else { |
249 | 1 | if (!tensor) |
250 | 0 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
251 | 1 | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
252 | 1 | decoded_size = data_size; |
253 | 1 | } |
254 | 2 | } else |
255 | 0 | { assert(0); } |
256 | 4 | } |
257 | 6 | cumemcpy(tensor_out[0]->data.u8, tensor_out[0]->info.type, copying, CCV_TENSOR_CPU_MEMORY, decoded_size); |
258 | 6 | ccfree(workspace); |
259 | 6 | } else { |
260 | 6 | if (!options || !options->decode4 ) |
261 | 2 | { |
262 | 2 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F1 ) |
263 | 1 | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
264 | 1 | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
265 | 1 | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
266 | 0 | else |
267 | 0 | { assert(0); } |
268 | 4 | } else { |
269 | 4 | void* const workspace = ccmalloc(source_data_size); |
270 | 4 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F2 ) |
271 | 2 | { |
272 | 2 | size_t decoded_size = source_data_size; |
273 | 2 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
274 | 1 | { |
275 | 1 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
276 | 0 | { |
277 | 0 | if (decoded_size > 0) |
278 | 0 | memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size)); |
279 | 0 | } else |
280 | 1 | ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); |
281 | 1 | } else { |
282 | 1 | if (!tensor) |
283 | 0 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
284 | 1 | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
285 | 1 | } |
286 | 2 | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
287 | 2 | size_t decoded_size = source_data_size; |
288 | 2 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
289 | 1 | { |
290 | 1 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
291 | 0 | { |
292 | 0 | if (decoded_size > 0) |
293 | 0 | memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size)); |
294 | 0 | } else |
295 | 1 | ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); |
296 | 1 | } else { |
297 | 1 | if (!tensor) |
298 | 0 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
299 | 1 | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
300 | 1 | } |
301 | 2 | } else |
302 | 0 | { assert(0); } |
303 | 4 | ccfree(workspace); |
304 | 4 | } |
305 | 6 | } |
306 | | #elif defined(HAVE_MPS) |
307 | | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
308 | | { |
309 | | const size_t data_size = ccv_nnc_tensor_data_size(tensor_params); |
310 | | unsigned char* workspace; |
311 | | unsigned char* copying; |
312 | | size_t decoded_size = data_size; |
313 | | if (!options || !options->decode) |
314 | | { |
315 | | copying = workspace = ccmalloc(data_size); |
316 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
317 | | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
318 | | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
319 | | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
320 | | else |
321 | | { assert(0); } |
322 | | } else { |
323 | | copying = workspace = ccmalloc(data_size + source_data_size); |
324 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
325 | | { |
326 | | decoded_size = source_data_size; |
327 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
328 | | { |
329 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
330 | | copying = workspace + data_size; |
331 | | else { |
332 | | ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); |
333 | | decoded_size = data_size; |
334 | | } |
335 | | } else { |
336 | | if (!tensor) |
337 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
338 | | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
339 | | decoded_size = data_size; |
340 | | } |
341 | | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
342 | | decoded_size = source_data_size; |
343 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
344 | | { |
345 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
346 | | copying = workspace + data_size; |
347 | | else { |
348 | | ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); |
349 | | decoded_size = data_size; |
350 | | } |
351 | | } else { |
352 | | if (!tensor) |
353 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
354 | | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
355 | | decoded_size = data_size; |
356 | | } |
357 | | } else |
358 | | { assert(0); } |
359 | | } |
360 | | assert(tensor_out[0]->dataof == 0); |
361 | | mpmemcpy(tensor_out[0]->data.u8, tensor_out[0]->dataof, tensor_out[0]->info.type, copying, 0, CCV_TENSOR_CPU_MEMORY, decoded_size); |
362 | | ccfree(workspace); |
363 | | } else { |
364 | | if (!options || !options->decode) |
365 | | { |
366 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
367 | | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
368 | | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
369 | | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
370 | | else |
371 | | { assert(0); } |
372 | | } else { |
373 | | void* const workspace = ccmalloc(source_data_size); |
374 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
375 | | { |
376 | | size_t decoded_size = source_data_size; |
377 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
378 | | { |
379 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
380 | | { |
381 | | if (decoded_size > 0) |
382 | | memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size)); |
383 | | } else |
384 | | ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); |
385 | | } else { |
386 | | if (!tensor) |
387 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
388 | | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
389 | | } |
390 | | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
391 | | size_t decoded_size = source_data_size; |
392 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
393 | | { |
394 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
395 | | { |
396 | | if (decoded_size > 0) |
397 | | memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size)); |
398 | | } else |
399 | | ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); |
400 | | } else { |
401 | | if (!tensor) |
402 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
403 | | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
404 | | } |
405 | | } else |
406 | | { assert(0); } |
407 | | ccfree(workspace); |
408 | | } |
409 | | } |
410 | | #else |
411 | | if (!options || !options->decode) |
412 | | { |
413 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
414 | | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
415 | | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
416 | | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
417 | | else |
418 | | { assert(0); } |
419 | | } else { |
420 | | void* const workspace = ccmalloc(source_data_size); |
421 | | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
422 | | { |
423 | | size_t decoded_size = source_data_size; |
424 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
425 | | { |
426 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
427 | | { |
428 | | if (decoded_size > 0) |
429 | | memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size)); |
430 | | } else |
431 | | ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))); |
432 | | } else { |
433 | | if (!tensor) |
434 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
435 | | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))); |
436 | | } |
437 | | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
438 | | size_t decoded_size = source_data_size; |
439 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
440 | | { |
441 | | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype) == CCV_QX) |
442 | | { |
443 | | if (decoded_size > 0) |
444 | | memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size)); |
445 | | } else |
446 | | ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))); |
447 | | } else { |
448 | | if (!tensor) |
449 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
450 | | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))); |
451 | | } |
452 | | } else |
453 | | { assert(0); } |
454 | | ccfree(workspace); |
455 | | } |
456 | | #endif |
457 | 22 | } else { |
458 | | // If it is QX, we need to have a custom decoder to decode properly. |
459 | 22 | if (datatype != tensor_params.datatype) |
460 | 0 | { assert(options && options->decode); } |
461 | 22 | size_t data_size = ccv_nnc_tensor_data_size(tensor_params); |
462 | 22 | #ifdef HAVE_CUDA |
463 | 22 | if (!options || !options->decode8 ) |
464 | 14 | { |
465 | 14 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
466 | 1 | cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
467 | 13 | else |
468 | 13 | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
469 | 14 | } else { |
470 | 8 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
471 | 2 | { |
472 | 2 | void* const workspace = ccmalloc(data_size); |
473 | 2 | size_t decoded_size = data_size; |
474 | 2 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
475 | 1 | cumemcpy(tensor_out[0]->data.u8, tensor_out[0]->info.type, workspace, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size)); |
476 | 1 | else { |
477 | 1 | if (!tensor) |
478 | 1 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
479 | 1 | cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
480 | 1 | } |
481 | 2 | ccfree(workspace); |
482 | 6 | } else { |
483 | 6 | size_t decoded_size = data_size; |
484 | 6 | if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u84 : 02 , &decoded_size)) |
485 | 3 | { |
486 | 3 | if (!tensor) |
487 | 1 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
488 | 3 | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
489 | 3 | } |
490 | 6 | } |
491 | 8 | } |
492 | | #elif defined(HAVE_MPS) |
493 | | if (!options || !options->decode) |
494 | | { |
495 | | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
496 | | { |
497 | | assert(tensor->dataof == 0); |
498 | | mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, data, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
499 | | } else |
500 | | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
501 | | } else { |
502 | | if (CCV_TENSOR_GET_MEMORY(tensor_params.type) == CCV_TENSOR_GPU_MEMORY) |
503 | | { |
504 | | if (tensor) |
505 | | { assert(tensor->dataof == 0); } |
506 | | void* const workspace = ccmalloc(data_size); |
507 | | size_t decoded_size = data_size; |
508 | | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) { |
509 | | mpmemcpy(tensor_out[0]->data.u8, tensor_out[0]->dataof, tensor_out[0]->info.type, workspace, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size)); |
510 | | } else { |
511 | | if (!tensor) |
512 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
513 | | mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, data, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
514 | | } |
515 | | ccfree(workspace); |
516 | | } else { |
517 | | size_t decoded_size = data_size; |
518 | | if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u8 : 0, &decoded_size)) |
519 | | { |
520 | | if (!tensor) |
521 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
522 | | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
523 | | } |
524 | | } |
525 | | } |
526 | | #else |
527 | | if (!options || !options->decode) |
528 | | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
529 | | else { |
530 | | size_t decoded_size = data_size; |
531 | | if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u8 : 0, &decoded_size)) |
532 | | { |
533 | | if (!tensor) |
534 | | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
535 | | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))); |
536 | | } |
537 | | } |
538 | | #endif |
539 | 22 | } |
540 | 34 | tensor_out[0]->type &= ~CCV_GARBAGE; // If it is marked as garbage, remove that mark now. |
541 | 34 | sqlite3_reset(tensor_select_stmt); |
542 | 34 | sqlite3_clear_bindings(tensor_select_stmt); |
543 | 34 | sqlite3_finalize(tensor_select_stmt); |
544 | 34 | return CCV_IO_FINAL; |
545 | 34 | } |
546 | | |