| File: | nnc/ccv_nnc_tensor_io.c |
| Warning: | line 234, column 22 Although the value stored to 'tensor' is used in the enclosing expression, the value is never actually read from 'tensor' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv_nnc.h" |
| 2 | #include "ccv_nnc_easy.h" |
| 3 | #include "ccv_nnc_internal.h" |
| 4 | #include "ccv_internal.h" |
| 5 | #include "_ccv_nnc_symbolic_graph.h" |
| 6 | #include "3rdparty/sqlite3/sqlite3.h" |
| 7 | #ifdef HAVE_CUDA1 |
| 8 | #include "gpu/ccv_nnc_compat.h" |
| 9 | #elif HAVE_MPS |
| 10 | #include "mps/ccv_nnc_mps.h" |
| 11 | #endif |
| 12 | |
| 13 | #ifdef NDEBUG |
| 14 | #define SQLITE_ENFORCE(stmt)((void) sizeof ((stmt) ? 1 : 0), __extension__ ({ if (stmt) ; else __assert_fail ("stmt", "ccv_nnc_tensor_io.c", 14, __extension__ __PRETTY_FUNCTION__); })) (void)(stmt) |
| 15 | #else |
| 16 | #define SQLITE_ENFORCEassert assert |
| 17 | #endif |
| 18 | |
| 19 | // MARK - Level-1 API |
| 20 | |
| 21 | int ccv_nnc_tensor_write(const ccv_nnc_tensor_t* const tensor, void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options) |
| 22 | { |
| 23 | assert(CCV_IS_TENSOR_CONTIGUOUS(tensor))((void) sizeof (((!((*(int*)(tensor)) & CCV_TENSOR_VIEW) || (((ccv_nnc_tensor_view_t*)tensor)->contiguous == 1))) ? 1 : 0), __extension__ ({ if ((!((*(int*)(tensor)) & CCV_TENSOR_VIEW ) || (((ccv_nnc_tensor_view_t*)tensor)->contiguous == 1))) ; else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(tensor)", "ccv_nnc_tensor_io.c" , 23, __extension__ __PRETTY_FUNCTION__); })); |
| 24 | assert(name)((void) sizeof ((name) ? 1 : 0), __extension__ ({ if (name) ; else __assert_fail ("name", "ccv_nnc_tensor_io.c", 24, __extension__ __PRETTY_FUNCTION__); })); |
| 25 | sqlite3* conn = (sqlite3*)handle; |
| 26 | if (!conn) |
| 27 | return CCV_IO_ERROR; |
| 28 | const char tensor_create_table_qs[] = "CREATE TABLE IF NOT EXISTS tensors " |
| 29 | "(name TEXT, type INTEGER, format INTEGER, datatype INTEGER, " |
| 30 | "dim BLOB, data BLOB, PRIMARY KEY (name))"; |
| 31 | SQLITE_ENFORCE(SQLITE_OK == sqlite3_exec(conn, tensor_create_table_qs, 0, 0, 0))((void) sizeof ((0 == sqlite3_exec(conn, tensor_create_table_qs , 0, 0, 0)) ? 1 : 0), __extension__ ({ if (0 == sqlite3_exec( conn, tensor_create_table_qs, 0, 0, 0)) ; else __assert_fail ( "SQLITE_OK == sqlite3_exec(conn, tensor_create_table_qs, 0, 0, 0)" , "ccv_nnc_tensor_io.c", 31, __extension__ __PRETTY_FUNCTION__ ); })); |
| 32 | const char tensor_insert_qs[] = |
| 33 | "REPLACE INTO tensors " |
| 34 | "(name, type, format, datatype, dim, data) VALUES (" |
| 35 | "$name, $type, $format, $datatype, $dim, $data)"; |
| 36 | sqlite3_stmt* tensor_insert_stmt = 0; |
| 37 | SQLITE_ENFORCE(SQLITE_OK == sqlite3_prepare_v2(conn, tensor_insert_qs, sizeof(tensor_insert_qs), &tensor_insert_stmt, 0))((void) sizeof ((0 == sqlite3_prepare_v2(conn, tensor_insert_qs , sizeof(tensor_insert_qs), &tensor_insert_stmt, 0)) ? 1 : 0), __extension__ ({ if (0 == sqlite3_prepare_v2(conn, tensor_insert_qs , sizeof(tensor_insert_qs), &tensor_insert_stmt, 0)) ; else __assert_fail ("SQLITE_OK == sqlite3_prepare_v2(conn, tensor_insert_qs, sizeof(tensor_insert_qs), &tensor_insert_stmt, 0)" , "ccv_nnc_tensor_io.c", 37, __extension__ __PRETTY_FUNCTION__ ); })); |
| 38 | sqlite3_bind_text(tensor_insert_stmt, 1, name, -1, 0); |
| 39 | ccv_nnc_tensor_param_t params = tensor->info; |
| 40 | const size_t data_size = ccv_nnc_tensor_data_size_without_padding(tensor->info); |
| 41 | unsigned char* workspace = 0; |
| 42 | unsigned int identifier = 0; |
| 43 | #ifdef HAVE_CUDA1 |
| 44 | if (CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
| 45 | { |
| 46 | if (!options || !options->encode) |
| 47 | { |
| 48 | workspace = ccmallocmalloc(data_size); |
| 49 | cumemcpy(workspace, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, data_size); |
| 50 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
| 51 | } else { |
| 52 | workspace = ccmallocmalloc(data_size * 2 + 4); |
| 53 | cumemcpy(workspace, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->info.type, data_size); |
| 54 | size_t encoded_size = data_size + 4; |
| 55 | if (options->encode(workspace, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace + data_size, &encoded_size, ¶ms, &identifier)) |
| 56 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace + data_size, encoded_size, 0); |
| 57 | else |
| 58 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
| 59 | } |
| 60 | } else { |
| 61 | if (!options || !options->encode) |
| 62 | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
| 63 | else { |
| 64 | workspace = ccmallocmalloc(data_size + 4); |
| 65 | size_t encoded_size = data_size + 4; |
| 66 | if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, ¶ms, &identifier)) |
| 67 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0); |
| 68 | else |
| 69 | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
| 70 | } |
| 71 | } |
| 72 | #elif defined(HAVE_MPS) |
| 73 | if (CCV_TENSOR_GET_MEMORY(tensor->info.type)((tensor->info.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
| 74 | { |
| 75 | if (!options || !options->encode) |
| 76 | { |
| 77 | workspace = ccmallocmalloc(data_size); |
| 78 | mpmemcpy(workspace, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->dataof, tensor->info.type, data_size); |
| 79 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
| 80 | } else { |
| 81 | workspace = ccmallocmalloc(data_size * 2 + 4); |
| 82 | mpmemcpy(workspace, 0, CCV_TENSOR_CPU_MEMORY, tensor->data.u8, tensor->dataof, tensor->info.type, data_size); |
| 83 | size_t encoded_size = data_size + 4; |
| 84 | if (options->encode(workspace, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace + data_size, &encoded_size, ¶ms, &identifier)) |
| 85 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace + data_size, encoded_size, 0); |
| 86 | else |
| 87 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, data_size, 0); |
| 88 | } |
| 89 | } else { |
| 90 | if (!options || !options->encode) |
| 91 | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
| 92 | else { |
| 93 | workspace = ccmallocmalloc(data_size + 4); // Allocate extra 4 bytes in case we need to copy the QX tensor out. |
| 94 | size_t encoded_size = data_size + 4; |
| 95 | if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, ¶ms, &identifier)) |
| 96 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0); |
| 97 | else |
| 98 | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
| 99 | } |
| 100 | } |
| 101 | #else |
| 102 | if (!options || !options->encode) |
| 103 | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
| 104 | else { |
| 105 | workspace = ccmallocmalloc(data_size + 4); |
| 106 | size_t encoded_size = data_size + 4; |
| 107 | if (options->encode(tensor->data.u8, data_size, tensor->info.datatype, tensor->info.dim, ccv_nnc_tensor_nd(tensor->info.dim), options->context, workspace, &encoded_size, ¶ms, &identifier)) |
| 108 | sqlite3_bind_blob(tensor_insert_stmt, 6, workspace, encoded_size, 0); |
| 109 | else |
| 110 | sqlite3_bind_blob(tensor_insert_stmt, 6, tensor->data.u8, data_size, 0); |
| 111 | } |
| 112 | #endif |
| 113 | sqlite3_bind_int64(tensor_insert_stmt, 2, ((sqlite_int64)identifier << 32) | params.type); |
| 114 | sqlite3_bind_int(tensor_insert_stmt, 3, params.format); |
| 115 | sqlite3_bind_int64(tensor_insert_stmt, 4, ((sqlite_int64)params.reserved << 32) | params.datatype); |
| 116 | sqlite3_bind_blob(tensor_insert_stmt, 5, params.dim, sizeof(params.dim), 0); |
| 117 | const int result = sqlite3_step(tensor_insert_stmt); |
| 118 | sqlite3_reset(tensor_insert_stmt); |
| 119 | sqlite3_clear_bindings(tensor_insert_stmt); |
| 120 | sqlite3_finalize(tensor_insert_stmt); |
| 121 | if (workspace) |
| 122 | free(workspace); |
| 123 | return result == SQLITE_DONE101 ? CCV_IO_FINAL : CCV_IO_ERROR; |
| 124 | } |
| 125 | |
| 126 | int ccv_nnc_tensor_read(void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options, const int flags, const ccv_nnc_tensor_param_t* const tensor_params_optional, ccv_nnc_tensor_t** const tensor_out) |
| 127 | { |
| 128 | assert(name)((void) sizeof ((name) ? 1 : 0), __extension__ ({ if (name) ; else __assert_fail ("name", "ccv_nnc_tensor_io.c", 128, __extension__ __PRETTY_FUNCTION__); })); |
| 129 | sqlite3* conn = (sqlite3*)handle; |
| 130 | if (!conn) |
| 131 | return CCV_IO_ERROR; |
| 132 | const char tensor_select_qs[] = |
| 133 | "SELECT data, type, format, datatype, dim FROM tensors WHERE name=$name"; |
| 134 | sqlite3_stmt* tensor_select_stmt = 0; |
| 135 | if (SQLITE_OK0 != sqlite3_prepare_v2(conn, tensor_select_qs, sizeof(tensor_select_qs), &tensor_select_stmt, 0)) |
| 136 | return CCV_IO_ERROR; |
| 137 | sqlite3_bind_text(tensor_select_stmt, 1, name, -1, 0); |
| 138 | if (SQLITE_ROW100 != sqlite3_step(tensor_select_stmt)) |
| 139 | { |
| 140 | sqlite3_finalize(tensor_select_stmt); |
| 141 | return CCV_IO_ERROR; |
| 142 | } |
| 143 | ccv_nnc_tensor_t* tensor = *tensor_out; |
| 144 | ccv_nnc_tensor_param_t tensor_params; |
| 145 | int datatype = 0; |
| 146 | unsigned int identifier = 0; |
| 147 | if (!tensor) // If the tensor is not provided, we need to create one. |
| 148 | { |
| 149 | if (tensor_params_optional) |
| 150 | { |
| 151 | identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff; |
| 152 | datatype = sqlite3_column_int64(tensor_select_stmt, 3) & 0xffffffff; |
| 153 | tensor_params = *tensor_params_optional; |
| 154 | assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY))((void) sizeof ((!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY )) ? 1 : 0), __extension__ ({ if (!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY )) ; else __assert_fail ("!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY)" , "ccv_nnc_tensor_io.c", 154, __extension__ __PRETTY_FUNCTION__ ); })); |
| 155 | } else { |
| 156 | const sqlite_int64 type = sqlite3_column_int64(tensor_select_stmt, 1); |
| 157 | identifier = (type >> 32) & 0xffffffff; |
| 158 | tensor_params.type = (type & 0xffffffff); |
| 159 | tensor_params.format = sqlite3_column_int(tensor_select_stmt, 2); |
| 160 | const sqlite_int64 datatype_mix = sqlite3_column_int64(tensor_select_stmt, 3); |
| 161 | datatype = tensor_params.datatype = (datatype_mix & 0xffffffff); |
| 162 | tensor_params.reserved = (datatype_mix >> 32) & 0xffffffff; |
| 163 | const void* const dim = sqlite3_column_blob(tensor_select_stmt, 4); |
| 164 | memcpy(tensor_params.dim, dim, ccv_min(sizeof(tensor_params.dim), sqlite3_column_bytes(tensor_select_stmt, 4))({ typeof (sizeof(tensor_params.dim)) _a = (sizeof(tensor_params .dim)); typeof (sqlite3_column_bytes(tensor_select_stmt, 4)) _b = (sqlite3_column_bytes(tensor_select_stmt, 4)); (_a < _b ) ? _a : _b; })); |
| 165 | } |
| 166 | if (flags & CCV_NNC_TENSOR_READ_CPU_MEMORY) // Reset type to CPU memory. |
| 167 | tensor_params.type = (tensor_params.type & 0xfff00000) | CCV_TENSOR_CPU_MEMORY; |
| 168 | if (!options || !options->decode) |
| 169 | { |
| 170 | if (flags & CCV_NNC_TENSOR_READ_METADATA_ONLY) |
| 171 | { |
| 172 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, CCV_NO_DATA_ALLOC); // Set the data point to 1 so it is allocated without data. |
| 173 | assert(tensor->data.u8 == 0)((void) sizeof ((tensor->data.u8 == 0) ? 1 : 0), __extension__ ({ if (tensor->data.u8 == 0) ; else __assert_fail ("tensor->data.u8 == 0" , "ccv_nnc_tensor_io.c", 173, __extension__ __PRETTY_FUNCTION__ ); })); // Set it back to 0. |
| 174 | // Already done loading metadata, return. |
| 175 | sqlite3_reset(tensor_select_stmt); |
| 176 | sqlite3_clear_bindings(tensor_select_stmt); |
| 177 | sqlite3_finalize(tensor_select_stmt); |
| 178 | return CCV_IO_FINAL; |
| 179 | } else |
| 180 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 181 | } else { |
| 182 | assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY))((void) sizeof ((!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY )) ? 1 : 0), __extension__ ({ if (!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY )) ; else __assert_fail ("!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY)" , "ccv_nnc_tensor_io.c", 182, __extension__ __PRETTY_FUNCTION__ ); })); |
| 183 | } |
| 184 | } else { |
| 185 | identifier = (sqlite3_column_int64(tensor_select_stmt, 1) >> 32) & 0xffffffff; |
| 186 | datatype = sqlite3_column_int(tensor_select_stmt, 3) & 0xffffffff; |
| 187 | tensor_params = tensor->info; |
| 188 | assert(!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY))((void) sizeof ((!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY )) ? 1 : 0), __extension__ ({ if (!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY )) ; else __assert_fail ("!(flags & CCV_NNC_TENSOR_READ_METADATA_ONLY)" , "ccv_nnc_tensor_io.c", 188, __extension__ __PRETTY_FUNCTION__ ); })); |
| 189 | } |
| 190 | const void* const data = sqlite3_column_blob(tensor_select_stmt, 0); |
| 191 | int dim[CCV_NNC_MAX_DIM_ALLOC(12)]; |
| 192 | memcpy(dim, sqlite3_column_blob(tensor_select_stmt, 4), ccv_min(sizeof(dim), sqlite3_column_bytes(tensor_select_stmt, 4))({ typeof (sizeof(dim)) _a = (sizeof(dim)); typeof (sqlite3_column_bytes (tensor_select_stmt, 4)) _b = (sqlite3_column_bytes(tensor_select_stmt , 4)); (_a < _b) ? _a : _b; })); |
| 193 | const int nd = ccv_nnc_tensor_nd(dim); |
| 194 | if (datatype != tensor_params.datatype && CCV_GET_DATA_TYPE(tensor_params.datatype)((tensor_params.datatype) & 0xFF000) != CCV_QX) |
| 195 | { |
| 196 | // Only ever works for 16F to 32F or 32F to 16F transparently. |
| 197 | assert((datatype == CCV_16F && tensor_params.datatype == CCV_32F) || (datatype == CCV_32F && tensor_params.datatype == CCV_16F))((void) sizeof (((datatype == CCV_16F && tensor_params .datatype == CCV_32F) || (datatype == CCV_32F && tensor_params .datatype == CCV_16F)) ? 1 : 0), __extension__ ({ if ((datatype == CCV_16F && tensor_params.datatype == CCV_32F) || ( datatype == CCV_32F && tensor_params.datatype == CCV_16F )) ; else __assert_fail ("(datatype == CCV_16F && tensor_params.datatype == CCV_32F) || (datatype == CCV_32F && tensor_params.datatype == CCV_16F)" , "ccv_nnc_tensor_io.c", 197, __extension__ __PRETTY_FUNCTION__ ); })); |
| 198 | const size_t tensor_count = ccv_nnc_tensor_count(tensor_params); |
| 199 | ccv_nnc_tensor_param_t params = tensor_params; |
| 200 | params.datatype = datatype; |
| 201 | const size_t source_data_size = ccv_nnc_tensor_data_size(params); |
| 202 | #ifdef HAVE_CUDA1 |
| 203 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type)((tensor_params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
| 204 | { |
| 205 | const size_t data_size = ccv_nnc_tensor_data_size(tensor_params); |
| 206 | unsigned char* workspace; |
| 207 | unsigned char* copying; |
| 208 | size_t decoded_size = data_size; |
| 209 | if (!options || !options->decode) |
| 210 | { |
| 211 | copying = workspace = ccmallocmalloc(data_size); |
| 212 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
| 213 | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 214 | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
| 215 | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 216 | else |
| 217 | { assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_nnc_tensor_io.c", 217, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 218 | } else { |
| 219 | copying = workspace = ccmallocmalloc(data_size + source_data_size); |
| 220 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
| 221 | { |
| 222 | decoded_size = source_data_size; |
| 223 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
| 224 | { |
| 225 | // If we loaded quantized tensor, don't do the conversion. |
| 226 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype)((tensor_out[0]->info.datatype) & 0xFF000) == CCV_QX) |
| 227 | copying = workspace + data_size; |
| 228 | else { |
| 229 | ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (({ typeof (source_data_size) _a = (source_data_size); typeof (decoded_size ) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(uint16_t )) _b = (({ typeof (source_data_size) _a = (source_data_size) ; typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 230 | decoded_size = data_size; |
| 231 | } |
| 232 | } else { |
| 233 | if (!tensor) |
| 234 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
Although the value stored to 'tensor' is used in the enclosing expression, the value is never actually read from 'tensor' | |
| 235 | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 236 | decoded_size = data_size; |
| 237 | } |
| 238 | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
| 239 | decoded_size = source_data_size; |
| 240 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
| 241 | { |
| 242 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype)((tensor_out[0]->info.datatype) & 0xFF000) == CCV_QX) |
| 243 | copying = workspace + data_size; |
| 244 | else { |
| 245 | ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (({ typeof (source_data_size) _a = (source_data_size); typeof (decoded_size ) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(float )) _b = (({ typeof (source_data_size) _a = (source_data_size) ; typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 246 | decoded_size = data_size; |
| 247 | } |
| 248 | } else { |
| 249 | if (!tensor) |
| 250 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 251 | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 252 | decoded_size = data_size; |
| 253 | } |
| 254 | } else |
| 255 | { assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_nnc_tensor_io.c", 255, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 256 | } |
| 257 | cumemcpy(tensor_out[0]->data.u8, tensor_out[0]->info.type, copying, CCV_TENSOR_CPU_MEMORY, decoded_size); |
| 258 | ccfreefree(workspace); |
| 259 | } else { |
| 260 | if (!options || !options->decode) |
| 261 | { |
| 262 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
| 263 | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 264 | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
| 265 | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 266 | else |
| 267 | { assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_nnc_tensor_io.c", 267, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 268 | } else { |
| 269 | void* const workspace = ccmallocmalloc(source_data_size); |
| 270 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
| 271 | { |
| 272 | size_t decoded_size = source_data_size; |
| 273 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
| 274 | { |
| 275 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype)((tensor_out[0]->info.datatype) & 0xFF000) == CCV_QX) |
| 276 | { |
| 277 | if (decoded_size > 0) |
| 278 | memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size)({ typeof (source_data_size) _a = (source_data_size); typeof ( decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; })); |
| 279 | } else |
| 280 | ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (({ typeof (source_data_size) _a = (source_data_size); typeof (decoded_size ) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(uint16_t )) _b = (({ typeof (source_data_size) _a = (source_data_size) ; typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 281 | } else { |
| 282 | if (!tensor) |
| 283 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 284 | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 285 | } |
| 286 | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
| 287 | size_t decoded_size = source_data_size; |
| 288 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
| 289 | { |
| 290 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype)((tensor_out[0]->info.datatype) & 0xFF000) == CCV_QX) |
| 291 | { |
| 292 | if (decoded_size > 0) |
| 293 | memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size)({ typeof (source_data_size) _a = (source_data_size); typeof ( decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; })); |
| 294 | } else |
| 295 | ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (({ typeof (source_data_size) _a = (source_data_size); typeof (decoded_size ) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(float )) _b = (({ typeof (source_data_size) _a = (source_data_size) ; typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 296 | } else { |
| 297 | if (!tensor) |
| 298 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 299 | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 300 | } |
| 301 | } else |
| 302 | { assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_nnc_tensor_io.c", 302, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 303 | ccfreefree(workspace); |
| 304 | } |
| 305 | } |
| 306 | #elif defined(HAVE_MPS) |
| 307 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type)((tensor_params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
| 308 | { |
| 309 | const size_t data_size = ccv_nnc_tensor_data_size(tensor_params); |
| 310 | unsigned char* workspace; |
| 311 | unsigned char* copying; |
| 312 | size_t decoded_size = data_size; |
| 313 | if (!options || !options->decode) |
| 314 | { |
| 315 | copying = workspace = ccmallocmalloc(data_size); |
| 316 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
| 317 | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 318 | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
| 319 | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 320 | else |
| 321 | { assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_nnc_tensor_io.c", 321, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 322 | } else { |
| 323 | copying = workspace = ccmallocmalloc(data_size + source_data_size); |
| 324 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
| 325 | { |
| 326 | decoded_size = source_data_size; |
| 327 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
| 328 | { |
| 329 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype)((tensor_out[0]->info.datatype) & 0xFF000) == CCV_QX) |
| 330 | copying = workspace + data_size; |
| 331 | else { |
| 332 | ccv_half_precision_to_float((uint16_t*)(workspace + data_size), (float*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (({ typeof (source_data_size) _a = (source_data_size); typeof (decoded_size ) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(uint16_t )) _b = (({ typeof (source_data_size) _a = (source_data_size) ; typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 333 | decoded_size = data_size; |
| 334 | } |
| 335 | } else { |
| 336 | if (!tensor) |
| 337 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 338 | ccv_half_precision_to_float((uint16_t*)data, (float*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 339 | decoded_size = data_size; |
| 340 | } |
| 341 | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
| 342 | decoded_size = source_data_size; |
| 343 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace + data_size, &decoded_size)) |
| 344 | { |
| 345 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype)((tensor_out[0]->info.datatype) & 0xFF000) == CCV_QX) |
| 346 | copying = workspace + data_size; |
| 347 | else { |
| 348 | ccv_float_to_half_precision((float*)(workspace + data_size), (uint16_t*)workspace, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (({ typeof (source_data_size) _a = (source_data_size); typeof (decoded_size ) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(float )) _b = (({ typeof (source_data_size) _a = (source_data_size) ; typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 349 | decoded_size = data_size; |
| 350 | } |
| 351 | } else { |
| 352 | if (!tensor) |
| 353 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 354 | ccv_float_to_half_precision((float*)data, (uint16_t*)workspace, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 355 | decoded_size = data_size; |
| 356 | } |
| 357 | } else |
| 358 | { assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_nnc_tensor_io.c", 358, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 359 | } |
| 360 | assert(tensor_out[0]->dataof == 0)((void) sizeof ((tensor_out[0]->dataof == 0) ? 1 : 0), __extension__ ({ if (tensor_out[0]->dataof == 0) ; else __assert_fail ( "tensor_out[0]->dataof == 0", "ccv_nnc_tensor_io.c", 360, __extension__ __PRETTY_FUNCTION__); })); |
| 361 | mpmemcpy(tensor_out[0]->data.u8, tensor_out[0]->dataof, tensor_out[0]->info.type, copying, 0, CCV_TENSOR_CPU_MEMORY, decoded_size); |
| 362 | ccfreefree(workspace); |
| 363 | } else { |
| 364 | if (!options || !options->decode) |
| 365 | { |
| 366 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
| 367 | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 368 | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
| 369 | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 370 | else |
| 371 | { assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_nnc_tensor_io.c", 371, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 372 | } else { |
| 373 | void* const workspace = ccmallocmalloc(source_data_size); |
| 374 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
| 375 | { |
| 376 | size_t decoded_size = source_data_size; |
| 377 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
| 378 | { |
| 379 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype)((tensor_out[0]->info.datatype) & 0xFF000) == CCV_QX) |
| 380 | { |
| 381 | if (decoded_size > 0) |
| 382 | memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size)({ typeof (source_data_size) _a = (source_data_size); typeof ( decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; })); |
| 383 | } else |
| 384 | ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (({ typeof (source_data_size) _a = (source_data_size); typeof (decoded_size ) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(uint16_t )) _b = (({ typeof (source_data_size) _a = (source_data_size) ; typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 385 | } else { |
| 386 | if (!tensor) |
| 387 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 388 | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 389 | } |
| 390 | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
| 391 | size_t decoded_size = source_data_size; |
| 392 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
| 393 | { |
| 394 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype)((tensor_out[0]->info.datatype) & 0xFF000) == CCV_QX) |
| 395 | { |
| 396 | if (decoded_size > 0) |
| 397 | memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size)({ typeof (source_data_size) _a = (source_data_size); typeof ( decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; })); |
| 398 | } else |
| 399 | ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (({ typeof (source_data_size) _a = (source_data_size); typeof (decoded_size ) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(float )) _b = (({ typeof (source_data_size) _a = (source_data_size) ; typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 400 | } else { |
| 401 | if (!tensor) |
| 402 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 403 | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 404 | } |
| 405 | } else |
| 406 | { assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_nnc_tensor_io.c", 406, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 407 | ccfreefree(workspace); |
| 408 | } |
| 409 | } |
| 410 | #else |
| 411 | if (!options || !options->decode) |
| 412 | { |
| 413 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
| 414 | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 415 | else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) |
| 416 | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 417 | else |
| 418 | { assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_nnc_tensor_io.c", 418, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 419 | } else { |
| 420 | void* const workspace = ccmallocmalloc(source_data_size); |
| 421 | if (datatype == CCV_16F && tensor_params.datatype == CCV_32F) |
| 422 | { |
| 423 | size_t decoded_size = source_data_size; |
| 424 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
| 425 | { |
| 426 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype)((tensor_out[0]->info.datatype) & 0xFF000) == CCV_QX) |
| 427 | { |
| 428 | if (decoded_size > 0) |
| 429 | memcpy(tensor_out[0]->data.f32, workspace, ccv_min(source_data_size, decoded_size)({ typeof (source_data_size) _a = (source_data_size); typeof ( decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; })); |
| 430 | } else |
| 431 | ccv_half_precision_to_float((uint16_t*)workspace, tensor_out[0]->data.f32, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (({ typeof (source_data_size) _a = (source_data_size); typeof (decoded_size ) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(uint16_t )) _b = (({ typeof (source_data_size) _a = (source_data_size) ; typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 432 | } else { |
| 433 | if (!tensor) |
| 434 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 435 | ccv_half_precision_to_float((uint16_t*)data, tensor->data.f32, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(uint16_t))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(uint16_t)); (_a < _b) ? _a : _b; })); |
| 436 | } |
| 437 | } else if (datatype == CCV_32F && tensor_params.datatype == CCV_16F) { |
| 438 | size_t decoded_size = source_data_size; |
| 439 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
| 440 | { |
| 441 | if (CCV_GET_DATA_TYPE(tensor_out[0]->info.datatype)((tensor_out[0]->info.datatype) & 0xFF000) == CCV_QX) |
| 442 | { |
| 443 | if (decoded_size > 0) |
| 444 | memcpy(tensor_out[0]->data.f16, workspace, ccv_min(source_data_size, decoded_size)({ typeof (source_data_size) _a = (source_data_size); typeof ( decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; })); |
| 445 | } else |
| 446 | ccv_float_to_half_precision((float*)workspace, (uint16_t*)tensor_out[0]->data.f16, ccv_min(tensor_count, ccv_min(source_data_size, decoded_size) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (({ typeof (source_data_size) _a = (source_data_size); typeof (decoded_size ) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(float )) _b = (({ typeof (source_data_size) _a = (source_data_size) ; typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; }) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 447 | } else { |
| 448 | if (!tensor) |
| 449 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 450 | ccv_float_to_half_precision((float*)data, (uint16_t*)tensor->data.f16, ccv_min(tensor_count, sqlite3_column_bytes(tensor_select_stmt, 0) / sizeof(float))({ typeof (tensor_count) _a = (tensor_count); typeof (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)) _b = (sqlite3_column_bytes (tensor_select_stmt, 0) / sizeof(float)); (_a < _b) ? _a : _b; })); |
| 451 | } |
| 452 | } else |
| 453 | { assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_nnc_tensor_io.c", 453, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 454 | ccfreefree(workspace); |
| 455 | } |
| 456 | #endif |
| 457 | } else { |
| 458 | // If it is QX, we need to have a custom decoder to decode properly. |
| 459 | if (datatype != tensor_params.datatype) |
| 460 | { assert(options && options->decode)((void) sizeof ((options && options->decode) ? 1 : 0), __extension__ ({ if (options && options->decode ) ; else __assert_fail ("options && options->decode" , "ccv_nnc_tensor_io.c", 460, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 461 | size_t data_size = ccv_nnc_tensor_data_size(tensor_params); |
| 462 | #ifdef HAVE_CUDA1 |
| 463 | if (!options || !options->decode) |
| 464 | { |
| 465 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type)((tensor_params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
| 466 | cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))({ typeof (data_size) _a = (data_size); typeof (sqlite3_column_bytes (tensor_select_stmt, 0)) _b = (sqlite3_column_bytes(tensor_select_stmt , 0)); (_a < _b) ? _a : _b; })); |
| 467 | else |
| 468 | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))({ typeof (data_size) _a = (data_size); typeof (sqlite3_column_bytes (tensor_select_stmt, 0)) _b = (sqlite3_column_bytes(tensor_select_stmt , 0)); (_a < _b) ? _a : _b; })); |
| 469 | } else { |
| 470 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type)((tensor_params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
| 471 | { |
| 472 | void* const workspace = ccmallocmalloc(data_size); |
| 473 | size_t decoded_size = data_size; |
| 474 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) |
| 475 | cumemcpy(tensor_out[0]->data.u8, tensor_out[0]->info.type, workspace, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size)({ typeof (data_size) _a = (data_size); typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; })); |
| 476 | else { |
| 477 | if (!tensor) |
| 478 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 479 | cumemcpy(tensor->data.u8, tensor->info.type, data, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))({ typeof (data_size) _a = (data_size); typeof (sqlite3_column_bytes (tensor_select_stmt, 0)) _b = (sqlite3_column_bytes(tensor_select_stmt , 0)); (_a < _b) ? _a : _b; })); |
| 480 | } |
| 481 | ccfreefree(workspace); |
| 482 | } else { |
| 483 | size_t decoded_size = data_size; |
| 484 | if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u8 : 0, &decoded_size)) |
| 485 | { |
| 486 | if (!tensor) |
| 487 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 488 | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))({ typeof (data_size) _a = (data_size); typeof (sqlite3_column_bytes (tensor_select_stmt, 0)) _b = (sqlite3_column_bytes(tensor_select_stmt , 0)); (_a < _b) ? _a : _b; })); |
| 489 | } |
| 490 | } |
| 491 | } |
| 492 | #elif defined(HAVE_MPS) |
| 493 | if (!options || !options->decode) |
| 494 | { |
| 495 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type)((tensor_params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
| 496 | { |
| 497 | assert(tensor->dataof == 0)((void) sizeof ((tensor->dataof == 0) ? 1 : 0), __extension__ ({ if (tensor->dataof == 0) ; else __assert_fail ("tensor->dataof == 0" , "ccv_nnc_tensor_io.c", 497, __extension__ __PRETTY_FUNCTION__ ); })); |
| 498 | mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, data, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))({ typeof (data_size) _a = (data_size); typeof (sqlite3_column_bytes (tensor_select_stmt, 0)) _b = (sqlite3_column_bytes(tensor_select_stmt , 0)); (_a < _b) ? _a : _b; })); |
| 499 | } else |
| 500 | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))({ typeof (data_size) _a = (data_size); typeof (sqlite3_column_bytes (tensor_select_stmt, 0)) _b = (sqlite3_column_bytes(tensor_select_stmt , 0)); (_a < _b) ? _a : _b; })); |
| 501 | } else { |
| 502 | if (CCV_TENSOR_GET_MEMORY(tensor_params.type)((tensor_params.type) & 0x3) == CCV_TENSOR_GPU_MEMORY) |
| 503 | { |
| 504 | if (tensor) |
| 505 | { assert(tensor->dataof == 0)((void) sizeof ((tensor->dataof == 0) ? 1 : 0), __extension__ ({ if (tensor->dataof == 0) ; else __assert_fail ("tensor->dataof == 0" , "ccv_nnc_tensor_io.c", 505, __extension__ __PRETTY_FUNCTION__ ); })); } |
| 506 | void* const workspace = ccmallocmalloc(data_size); |
| 507 | size_t decoded_size = data_size; |
| 508 | if (options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, workspace, &decoded_size)) { |
| 509 | mpmemcpy(tensor_out[0]->data.u8, tensor_out[0]->dataof, tensor_out[0]->info.type, workspace, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, decoded_size)({ typeof (data_size) _a = (data_size); typeof (decoded_size) _b = (decoded_size); (_a < _b) ? _a : _b; })); |
| 510 | } else { |
| 511 | if (!tensor) |
| 512 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 513 | mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, data, 0, CCV_TENSOR_CPU_MEMORY, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))({ typeof (data_size) _a = (data_size); typeof (sqlite3_column_bytes (tensor_select_stmt, 0)) _b = (sqlite3_column_bytes(tensor_select_stmt , 0)); (_a < _b) ? _a : _b; })); |
| 514 | } |
| 515 | ccfreefree(workspace); |
| 516 | } else { |
| 517 | size_t decoded_size = data_size; |
| 518 | if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u8 : 0, &decoded_size)) |
| 519 | { |
| 520 | if (!tensor) |
| 521 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 522 | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))({ typeof (data_size) _a = (data_size); typeof (sqlite3_column_bytes (tensor_select_stmt, 0)) _b = (sqlite3_column_bytes(tensor_select_stmt , 0)); (_a < _b) ? _a : _b; })); |
| 523 | } |
| 524 | } |
| 525 | } |
| 526 | #else |
| 527 | if (!options || !options->decode) |
| 528 | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))({ typeof (data_size) _a = (data_size); typeof (sqlite3_column_bytes (tensor_select_stmt, 0)) _b = (sqlite3_column_bytes(tensor_select_stmt , 0)); (_a < _b) ? _a : _b; })); |
| 529 | else { |
| 530 | size_t decoded_size = data_size; |
| 531 | if (!options->decode(data, sqlite3_column_bytes(tensor_select_stmt, 0), datatype, dim, nd, identifier, options->context, tensor_params, tensor_out, tensor ? tensor->data.u8 : 0, &decoded_size)) |
| 532 | { |
| 533 | if (!tensor) |
| 534 | *tensor_out = tensor = ccv_nnc_tensor_new(0, tensor_params, 0); |
| 535 | memcpy(tensor->data.u8, data, ccv_min(data_size, sqlite3_column_bytes(tensor_select_stmt, 0))({ typeof (data_size) _a = (data_size); typeof (sqlite3_column_bytes (tensor_select_stmt, 0)) _b = (sqlite3_column_bytes(tensor_select_stmt , 0)); (_a < _b) ? _a : _b; })); |
| 536 | } |
| 537 | } |
| 538 | #endif |
| 539 | } |
| 540 | tensor_out[0]->type &= ~CCV_GARBAGE; // If it is marked as garbage, remove that mark now. |
| 541 | sqlite3_reset(tensor_select_stmt); |
| 542 | sqlite3_clear_bindings(tensor_select_stmt); |
| 543 | sqlite3_finalize(tensor_select_stmt); |
| 544 | return CCV_IO_FINAL; |
| 545 | } |
| 546 |