Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/tensor.tests.c
Line
Count
Source
1
#include "ccv.h"
2
#include "case.h"
3
#include "ccv_case.h"
4
#include "ccv_nnc_case.h"
5
#include "nnc/ccv_nnc.h"
6
#include "nnc/ccv_nnc_easy.h"
7
#include "3rdparty/sqlite3/sqlite3.h"
8
#include "3rdparty/dsfmt/dSFMT.h"
9
10
TEST_SETUP()
11
{
12
  ccv_nnc_init();
13
}
14
15
TEST_CASE("tensor persistence, to / from GPU")
16
{
17
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_MPS) || ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_GPU_REF));
18
  sqlite3* handle;
19
  sqlite3_open("tensors_g.sqlite3", &handle);
20
  ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 20, 30), 0);
21
  ccv_nnc_tensor_t* const tensorG = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 20, 30), 0);
22
  int i;
23
  dsfmt_t dsfmt;
24
  dsfmt_init_gen_rand(&dsfmt, 1);
25
  for (i = 0; i < 10 * 20 * 30; i++)
26
    tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
27
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor), TENSOR_LIST(tensorG), 0);
28
  ccv_nnc_tensor_write(tensorG, handle, "x", 0);
29
  sqlite3_close(handle);
30
  handle = 0;
31
  sqlite3_open("tensors_g.sqlite3", &handle);
32
  ccv_nnc_tensor_t* tensor1 = 0;
33
  ccv_nnc_tensor_read(handle, "x", 0, 0, 0, &tensor1);
34
  ccv_nnc_tensor_t* tensor2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
35
  ccv_nnc_tensor_read(handle, "x", 0, 0, 0, &tensor2);
36
  sqlite3_close(handle);
37
  ccv_nnc_tensor_t* const tensor1c = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 20, 30), 0);
38
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor1), TENSOR_LIST(tensor1c), 0);
39
  REQUIRE_TENSOR_EQ(tensor1c, tensor, "the first tensor should equal to the second");
40
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tensor2->data.f32, tensor->data.f32, 10, 1e-5, "the first 10 element should be equal");
41
  REQUIRE(ccv_nnc_tensor_nd(tensor2->info.dim) == 1, "should be 1-d tensor");
42
  REQUIRE_EQ(tensor2->info.dim[0], 10, "should be 1-d tensor with 10-element");
43
  ccv_nnc_tensor_free(tensor1);
44
  ccv_nnc_tensor_free(tensor1c);
45
  ccv_nnc_tensor_free(tensor2);
46
  ccv_nnc_tensor_free(tensor);
47
  ccv_nnc_tensor_free(tensorG);
48
}
49
50
TEST_CASE("tensor mapped from file")
51
1
{
52
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_MPS) || ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_GPU_REF));
53
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ADD_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_ADD_FORWARD, CCV_NNC_BACKEND_MPS));
54
1
  FILE* w = fopen("tensor.bin", "w+");
55
1
  float* w_a = (float*)ccmalloc(sizeof(float) * 4096 * 5);
56
1
  int i;
57
20.4k
  for (i = 0; i < 4096 * 5; 
i++20.4k
)
58
20.4k
    w_a[i] = (float)(i + 1);
59
1
  fwrite(w_a, 1, sizeof(float) * 4096 * 5, w);
60
1
  fclose(w);
61
1
  ccfree(w_a);
62
1
  ccv_nnc_tensor_t* one = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
63
1
  one->data.f32[0] = 1;
64
1
  ccv_nnc_tensor_t* one_gpu = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 1), 0);
65
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(one), TENSOR_LIST(one_gpu), 0);
66
1
  ccv_nnc_tensor_t* tensor_a = ccv_nnc_tensor_new_from_file(GPU_TENSOR_NHWC(000, 32F, 5), "tensor.bin", 0, 0);
67
1
  ccv_nnc_tensor_t* a_result = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5), 0);
68
1
  ccv_nnc_cmd_exec(CMD_ADD_FORWARD(0.5, 0.2), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor_a, one_gpu), TENSOR_LIST(a_result), 0);
69
1
  float a[] = {1 * 0.5 + 0.2, 2 * 0.5 + 0.2, 3 * 0.5 + 0.2, 4 * 0.5 + 0.2, 5 * 0.5 + 0.2};
70
1
  ccv_nnc_tensor_t* tensor_b = ccv_nnc_tensor_new_from_file(GPU_TENSOR_NHWC(000, 32F, 4), "tensor.bin", (4096 * 4 * 4), 0);
71
1
  ccv_nnc_tensor_t* b_result = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 4), 0);
72
1
  ccv_nnc_cmd_exec(CMD_ADD_FORWARD(1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor_b, one_gpu), TENSOR_LIST(b_result), 0);
73
1
  float b[] = {4096 * 4 + 1 + 1, 4096 * 4 + 2 + 1, 4096 * 4 + 3 + 1, 4096 * 4 + 4 + 1};
74
1
  ccv_nnc_tensor_t* at = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5), 0);
75
1
  ccv_nnc_tensor_t* bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
76
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a_result, b_result), TENSOR_LIST(at, bt), 0);
77
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, at->data.f32, a, 5, 1e-5, "the first 5 element should be equal");
78
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, bt->data.f32, b, 4, 1e-5, "the first 4 element should be equal");
79
1
  ccv_nnc_tensor_free(tensor_a);
80
1
  ccv_nnc_tensor_free(tensor_b);
81
1
  ccv_nnc_tensor_free(one);
82
1
  ccv_nnc_tensor_free(one_gpu);
83
1
  ccv_nnc_tensor_free(a_result);
84
1
  ccv_nnc_tensor_free(at);
85
1
  ccv_nnc_tensor_free(b_result);
86
1
  ccv_nnc_tensor_free(bt);
87
1
}
88
89
static int _tensor_xor_encode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, void* const context, void* const encoded, size_t* const encoded_size, ccv_nnc_tensor_param_t* const params, unsigned int* const identifier)
90
{
91
  unsigned char* const u8 = (unsigned char*)data;
92
  unsigned char* const u8enc = (unsigned char*)encoded;
93
  int i;
94
  for (i = 0; i < data_size; i++)
95
    u8enc[i] = u8[i] ^ 0x13;
96
  *encoded_size = data_size;
97
  *identifier = 1;
98
  return 1;
99
}
100
101
static int _tensor_xor_decode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, const ccv_nnc_tensor_param_t tensor_params, ccv_nnc_tensor_t** const tensor_out, void* decoded, size_t* const decoded_size)
102
{
103
  if (identifier != 1)
104
    return 0;
105
  if (!tensor_out[0])
106
  {
107
    tensor_out[0] = ccv_nnc_tensor_new(0, tensor_params, 0);
108
    if (!decoded)
109
      decoded = tensor_out[0]->data.u8;
110
  }
111
  unsigned char* const u8 = (unsigned char*)data;
112
  unsigned char* const u8dec = (unsigned char*)decoded;
113
  const size_t expected_size = *decoded_size;
114
  int i;
115
  for (i = 0; i < ccv_min(expected_size, data_size); i++)
116
    u8dec[i] = u8[i] ^ 0x13;
117
  *decoded_size = ccv_min(expected_size, data_size);
118
  return 1;
119
}
120
121
static int _tensor_noop_encode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, void* const context, void* const encoded, size_t* const encoded_size, ccv_nnc_tensor_param_t* const params, unsigned int* const identifier)
122
{
123
  return 0;
124
}
125
126
static int _tensor_noop_decode(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, const ccv_nnc_tensor_param_t tensor_params, ccv_nnc_tensor_t** const tensor_out, void* const decoded, size_t* const decoded_size)
127
{
128
  return 0;
129
}
130
131
TEST_CASE("tensor persistence with encoder / decoder, to / from GPU")
132
1
{
133
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_MPS) || ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_GPU_REF));
134
1
  sqlite3* handle;
135
1
  sqlite3_open("tensors_de_g.sqlite3", &handle);
136
1
  ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 20, 30), 0);
137
1
  int i;
138
1
  dsfmt_t dsfmt;
139
1
  dsfmt_init_gen_rand(&dsfmt, 1);
140
6.00k
  for (i = 0; i < 10 * 20 * 30; 
i++6.00k
)
141
6.00k
    tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
142
1
  ccv_nnc_tensor_t* const tensorG = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 20, 30), 0);
143
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor), TENSOR_LIST(tensorG), 0);
144
1
  ccv_nnc_tensor_io_option_t options = {
145
1
    .encode = _tensor_xor_encode,
146
1
    .decode = _tensor_xor_decode
147
1
  };
148
1
  ccv_nnc_tensor_write(tensorG, handle, "y", &options);
149
1
  sqlite3_close(handle);
150
1
  handle = 0;
151
1
  sqlite3_open("tensors_de_g.sqlite3", &handle);
152
1
  ccv_nnc_tensor_t* tensor1 = 0;
153
1
  ccv_nnc_tensor_read(handle, "y", &options, 0, 0, &tensor1);
154
1
  ccv_nnc_tensor_t* tensor2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
155
1
  ccv_nnc_tensor_read(handle, "y", &options, 0, 0, &tensor2);
156
1
  sqlite3_close(handle);
157
1
  ccv_nnc_tensor_t* tensor1c = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 20, 30), 0);
158
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor1), TENSOR_LIST(tensor1c), 0);
159
1
  REQUIRE_TENSOR_EQ(tensor1c, tensor, "the first tensor should equal to the second");
160
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tensor2->data.f32, tensor->data.f32, 10, 1e-5, "the first 10 element should be equal");
161
1
  REQUIRE(ccv_nnc_tensor_nd(tensor2->info.dim) == 1, "should be 1-d tensor");
162
1
  REQUIRE_EQ(tensor2->info.dim[0], 10, "should be 1-d tensor with 10-element");
163
1
  ccv_nnc_tensor_free(tensor1);
164
1
  ccv_nnc_tensor_free(tensor1c);
165
1
  ccv_nnc_tensor_free(tensor2);
166
1
  ccv_nnc_tensor_free(tensor);
167
1
  ccv_nnc_tensor_free(tensorG);
168
1
}
169
170
TEST_CASE("tensor persistence with noop encoder / decoder, to / from GPU")
171
1
{
172
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_MPS) || ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_GPU_REF));
173
1
  sqlite3* handle;
174
1
  sqlite3_open("tensors_noop_de_g.sqlite3", &handle);
175
1
  ccv_nnc_tensor_t* const tensor = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 20, 30), 0);
176
1
  int i;
177
1
  dsfmt_t dsfmt;
178
1
  dsfmt_init_gen_rand(&dsfmt, 1);
179
6.00k
  for (i = 0; i < 10 * 20 * 30; 
i++6.00k
)
180
6.00k
    tensor->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
181
1
  ccv_nnc_tensor_t* const tensorG = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 20, 30), 0);
182
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor), TENSOR_LIST(tensorG), 0);
183
1
  ccv_nnc_tensor_io_option_t options = {
184
1
    .encode = _tensor_noop_encode,
185
1
    .decode = _tensor_noop_decode
186
1
  };
187
1
  ccv_nnc_tensor_write(tensorG, handle, "y", &options);
188
1
  sqlite3_close(handle);
189
1
  handle = 0;
190
1
  sqlite3_open("tensors_noop_de_g.sqlite3", &handle);
191
1
  ccv_nnc_tensor_t* tensor1 = 0;
192
1
  ccv_nnc_tensor_read(handle, "y", &options, 0, 0, &tensor1);
193
1
  ccv_nnc_tensor_t* tensor2 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
194
1
  ccv_nnc_tensor_read(handle, "y", &options, 0, 0, &tensor2);
195
1
  sqlite3_close(handle);
196
1
  ccv_nnc_tensor_t* tensor1c = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 20, 30), 0);
197
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor1), TENSOR_LIST(tensor1c), 0);
198
1
  REQUIRE_TENSOR_EQ(tensor1c, tensor, "the first tensor should equal to the second");
199
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tensor2->data.f32, tensor->data.f32, 10, 1e-5, "the first 10 element should be equal");
200
1
  REQUIRE(ccv_nnc_tensor_nd(tensor2->info.dim) == 1, "should be 1-d tensor");
201
1
  REQUIRE_EQ(tensor2->info.dim[0], 10, "should be 1-d tensor with 10-element");
202
1
  ccv_nnc_tensor_free(tensor1);
203
1
  ccv_nnc_tensor_free(tensor1c);
204
1
  ccv_nnc_tensor_free(tensor2);
205
1
  ccv_nnc_tensor_free(tensor);
206
1
  ccv_nnc_tensor_free(tensorG);
207
1
}
208
209
TEST_CASE("tensor persistence with type coercion, to / from GPU")
210
1
{
211
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_MPS) || ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_GPU_REF));
212
1
  sqlite3* handle;
213
1
  sqlite3_open("tensors_tc_g.sqlite3", &handle);
214
1
  ccv_nnc_tensor_t* const tensorf32 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 20, 30), 0);
215
1
  int i;
216
1
  dsfmt_t dsfmt;
217
1
  dsfmt_init_gen_rand(&dsfmt, 1);
218
6.00k
  for (i = 0; i < 10 * 20 * 30; 
i++6.00k
)
219
6.00k
    tensorf32->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
220
1
  ccv_nnc_tensor_t* const tensorf16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 20, 30), 0);
221
1
  ccv_nnc_tensor_t* const tensorf16G = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10, 20, 30), 0);
222
1
  ccv_float_to_half_precision(tensorf32->data.f32, (uint16_t*)tensorf16->data.f16, 10 * 20 * 30);
223
6.00k
  for (i = 0; i < 10 * 20 * 30; 
i++6.00k
)
224
6.00k
    tensorf32->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
225
1
  ccv_nnc_tensor_t* const tensorf32G = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 20, 30), 0);
226
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensorf16, tensorf32), TENSOR_LIST(tensorf16G, tensorf32G), 0);
227
1
  ccv_nnc_tensor_write(tensorf16G, handle, "x", 0);
228
1
  ccv_nnc_tensor_write(tensorf32G, handle, "y", 0);
229
1
  sqlite3_close(handle);
230
1
  handle = 0;
231
1
  sqlite3_open("tensors_tc_g.sqlite3", &handle);
232
1
  ccv_nnc_tensor_t* tensor1 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
233
1
  ccv_nnc_tensor_t* tensor1c = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
234
1
  ccv_nnc_tensor_read(handle, "x", 0, 0, 0, &tensor1);
235
1
  ccv_nnc_tensor_t* tensor2 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
236
1
  ccv_nnc_tensor_t* tensor2c = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
237
1
  ccv_nnc_tensor_read(handle, "y", 0, 0, 0, &tensor2);
238
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor1, tensor2), TENSOR_LIST(tensor1c, tensor2c), 0);
239
1
  sqlite3_close(handle);
240
1
  float* tensor1_ref = (float*)ccmalloc(sizeof(float) * 10);
241
1
  ccv_half_precision_to_float((uint16_t*)tensorf16->data.f16, tensor1_ref, 10);
242
1
  float* tensor2_ret = (float*)ccmalloc(sizeof(float) * 10);
243
1
  ccv_half_precision_to_float((uint16_t*)tensor2c->data.f16, tensor2_ret, 10);
244
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tensor1c->data.f32, tensor1_ref, 10, 1e-3, "the first 10 element should be equal");
245
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tensor2_ret, tensorf32->data.f32, 10, 1e-3, "the first 10 element should be equal");
246
1
  REQUIRE(ccv_nnc_tensor_nd(tensor1->info.dim) == 1, "should be 1-d tensor");
247
1
  REQUIRE(ccv_nnc_tensor_nd(tensor2->info.dim) == 1, "should be 1-d tensor");
248
1
  REQUIRE_EQ(tensor1->info.dim[0], 10, "should be 1-d tensor with 10-element");
249
1
  REQUIRE_EQ(tensor2->info.dim[0], 10, "should be 1-d tensor with 10-element");
250
1
  ccv_nnc_tensor_free(tensor1);
251
1
  ccv_nnc_tensor_free(tensor2);
252
1
  ccv_nnc_tensor_free(tensor1c);
253
1
  ccv_nnc_tensor_free(tensor2c);
254
1
  ccv_nnc_tensor_free(tensorf16);
255
1
  ccv_nnc_tensor_free(tensorf32);
256
1
  ccv_nnc_tensor_free(tensorf16G);
257
1
  ccv_nnc_tensor_free(tensorf32G);
258
1
  ccfree(tensor1_ref);
259
1
  ccfree(tensor2_ret);
260
1
}
261
262
TEST_CASE("tensor persistence with type coercion and encoder / decoder, to / from GPU")
263
1
{
264
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_MPS) || ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_GPU_REF));
265
1
  sqlite3* handle;
266
1
  sqlite3_open("tensors_tc_de_g.sqlite3", &handle);
267
1
  ccv_nnc_tensor_t* const tensorf32 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 20, 30), 0);
268
1
  int i;
269
1
  dsfmt_t dsfmt;
270
1
  dsfmt_init_gen_rand(&dsfmt, 1);
271
6.00k
  for (i = 0; i < 10 * 20 * 30; 
i++6.00k
)
272
6.00k
    tensorf32->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
273
1
  ccv_nnc_tensor_t* const tensorf16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 20, 30), 0);
274
1
  ccv_float_to_half_precision(tensorf32->data.f32, (uint16_t*)tensorf16->data.f16, 10 * 20 * 30);
275
6.00k
  for (i = 0; i < 10 * 20 * 30; 
i++6.00k
)
276
6.00k
    tensorf32->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
277
1
  ccv_nnc_tensor_t* const tensorf32G = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 20, 30), 0);
278
1
  ccv_nnc_tensor_t* const tensorf16G = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10, 20, 30), 0);
279
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensorf16, tensorf32), TENSOR_LIST(tensorf16G, tensorf32G), 0);
280
1
  ccv_nnc_tensor_io_option_t options = {
281
1
    .encode = _tensor_xor_encode,
282
1
    .decode = _tensor_xor_decode
283
1
  };
284
1
  ccv_nnc_tensor_write(tensorf16G, handle, "x", &options);
285
1
  ccv_nnc_tensor_write(tensorf32G, handle, "y", &options);
286
1
  sqlite3_close(handle);
287
1
  handle = 0;
288
1
  sqlite3_open("tensors_tc_de_g.sqlite3", &handle);
289
1
  ccv_nnc_tensor_t* tensor1 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
290
1
  ccv_nnc_tensor_t* tensor1c = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
291
1
  ccv_nnc_tensor_read(handle, "x", &options, 0, 0, &tensor1);
292
1
  ccv_nnc_tensor_t* tensor2 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
293
1
  ccv_nnc_tensor_t* tensor2c = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
294
1
  ccv_nnc_tensor_read(handle, "y", &options, 0, 0, &tensor2);
295
1
  sqlite3_close(handle);
296
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor1, tensor2), TENSOR_LIST(tensor1c, tensor2c), 0);
297
1
  float* tensor1_ref = (float*)ccmalloc(sizeof(float) * 10);
298
1
  ccv_half_precision_to_float((uint16_t*)tensorf16->data.f16, tensor1_ref, 10);
299
1
  float* tensor2_ret = (float*)ccmalloc(sizeof(float) * 10);
300
1
  ccv_half_precision_to_float((uint16_t*)tensor2c->data.f16, tensor2_ret, 10);
301
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tensor1c->data.f32, tensor1_ref, 10, 1e-3, "the first 10 element should be equal");
302
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tensor2_ret, tensorf32->data.f32, 10, 1e-3, "the first 10 element should be equal");
303
1
  REQUIRE(ccv_nnc_tensor_nd(tensor1->info.dim) == 1, "should be 1-d tensor");
304
1
  REQUIRE(ccv_nnc_tensor_nd(tensor2->info.dim) == 1, "should be 1-d tensor");
305
1
  REQUIRE_EQ(tensor1->info.dim[0], 10, "should be 1-d tensor with 10-element");
306
1
  REQUIRE_EQ(tensor2->info.dim[0], 10, "should be 1-d tensor with 10-element");
307
1
  ccv_nnc_tensor_free(tensor1);
308
1
  ccv_nnc_tensor_free(tensor2);
309
1
  ccv_nnc_tensor_free(tensor1c);
310
1
  ccv_nnc_tensor_free(tensor2c);
311
1
  ccv_nnc_tensor_free(tensorf16);
312
1
  ccv_nnc_tensor_free(tensorf32);
313
1
  ccv_nnc_tensor_free(tensorf16G);
314
1
  ccv_nnc_tensor_free(tensorf32G);
315
1
  ccfree(tensor1_ref);
316
1
  ccfree(tensor2_ret);
317
1
}
318
319
TEST_CASE("tensor persistence with type coercion and noop encoder / decoder, to / from GPU")
320
{
321
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_MPS) || ccv_nnc_cmd_ok(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_GPU_REF));
322
  sqlite3* handle;
323
  sqlite3_open("tensors_tc_noop_de_g.sqlite3", &handle);
324
  ccv_nnc_tensor_t* const tensorf32 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 20, 30), 0);
325
  int i;
326
  dsfmt_t dsfmt;
327
  dsfmt_init_gen_rand(&dsfmt, 1);
328
  for (i = 0; i < 10 * 20 * 30; i++)
329
    tensorf32->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
330
  ccv_nnc_tensor_t* const tensorf16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10, 20, 30), 0);
331
  ccv_float_to_half_precision(tensorf32->data.f32, (uint16_t*)tensorf16->data.f16, 10 * 20 * 30);
332
  for (i = 0; i < 10 * 20 * 30; i++)
333
    tensorf32->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
334
  ccv_nnc_tensor_t* const tensorf32G = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 20, 30), 0);
335
  ccv_nnc_tensor_t* const tensorf16G = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10, 20, 30), 0);
336
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensorf16, tensorf32), TENSOR_LIST(tensorf16G, tensorf32G), 0);
337
  ccv_nnc_tensor_io_option_t options = {
338
    .encode = _tensor_noop_encode,
339
    .decode = _tensor_noop_decode
340
  };
341
  ccv_nnc_tensor_write(tensorf16G, handle, "x", &options);
342
  ccv_nnc_tensor_write(tensorf32G, handle, "y", &options);
343
  sqlite3_close(handle);
344
  handle = 0;
345
  sqlite3_open("tensors_tc_noop_de_g.sqlite3", &handle);
346
  ccv_nnc_tensor_t* tensor1 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
347
  ccv_nnc_tensor_t* tensor1c = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
348
  ccv_nnc_tensor_read(handle, "x", &options, 0, 0, &tensor1);
349
  ccv_nnc_tensor_t* tensor2 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 10), 0);
350
  ccv_nnc_tensor_t* tensor2c = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 10), 0);
351
  ccv_nnc_tensor_read(handle, "y", &options, 0, 0, &tensor2);
352
  sqlite3_close(handle);
353
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(tensor1, tensor2), TENSOR_LIST(tensor1c, tensor2c), 0);
354
  float* tensor1_ref = (float*)ccmalloc(sizeof(float) * 10);
355
  ccv_half_precision_to_float((uint16_t*)tensorf16->data.f16, tensor1_ref, 10);
356
  float* tensor2_ret = (float*)ccmalloc(sizeof(float) * 10);
357
  ccv_half_precision_to_float((uint16_t*)tensor2c->data.f16, tensor2_ret, 10);
358
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tensor1c->data.f32, tensor1_ref, 10, 1e-3, "the first 10 element should be equal");
359
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, tensor2_ret, tensorf32->data.f32, 10, 1e-3, "the first 10 element should be equal");
360
  REQUIRE(ccv_nnc_tensor_nd(tensor1->info.dim) == 1, "should be 1-d tensor");
361
  REQUIRE(ccv_nnc_tensor_nd(tensor2->info.dim) == 1, "should be 1-d tensor");
362
  REQUIRE_EQ(tensor1->info.dim[0], 10, "should be 1-d tensor with 10-element");
363
  REQUIRE_EQ(tensor2->info.dim[0], 10, "should be 1-d tensor with 10-element");
364
  ccv_nnc_tensor_free(tensor1);
365
  ccv_nnc_tensor_free(tensor2);
366
  ccv_nnc_tensor_free(tensor1c);
367
  ccv_nnc_tensor_free(tensor2c);
368
  ccv_nnc_tensor_free(tensorf16);
369
  ccv_nnc_tensor_free(tensorf32);
370
  ccv_nnc_tensor_free(tensorf16G);
371
  ccv_nnc_tensor_free(tensorf32G);
372
  ccfree(tensor1_ref);
373
  ccfree(tensor2_ret);
374
}
375
376
#include "case_main.h"