Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/test/unit/nnc/palettize.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include "3rdparty/dsfmt/dSFMT.h"
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("quantize double to 4-bit and dequantize on CPU losslessly")
15
1
{
16
1
  double lut[16] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0};
17
1
  double* const values = ccmalloc(sizeof(double) * 2839);
18
1
  int i;
19
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
20
2.83k
    values[i] = lut[i % 16];
21
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1420 + 2944));
22
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_64F, CCV_TENSOR_CPU_MEMORY, 2839, 4, 128, compressed, 1420 + 2944);
23
1
  REQUIRE_EQ(output_size, 1420 + 2944, "output size should match");
24
1
  double* const output_values = ccmalloc(sizeof(double) * 2839);
25
1
  ccv_nnc_depalettize(compressed, CCV_64F, CCV_TENSOR_CPU_MEMORY, output_size, 4, 128, output_values, 2839);
26
1
  REQUIRE_ARRAY_EQ(double, values, output_values, 2839, "should be lossless");
27
1
  ccfree(values);
28
1
  ccfree(output_values);
29
1
  ccfree(compressed);
30
1
}
31
32
TEST_CASE("quantize float to 4-bit and dequantize on CPU losslessly")
33
1
{
34
1
  float lut[16] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0};
35
1
  float* const values = ccmalloc(sizeof(float) * 2839);
36
1
  int i;
37
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
38
2.83k
    values[i] = lut[i % 16];
39
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1420 + 2944 / 2));
40
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_32F, CCV_TENSOR_CPU_MEMORY, 2839, 4, 128, compressed, 1420 + 2944 / 2);
41
1
  REQUIRE_EQ(output_size, 1420 + 2944 / 2, "output size should match");
42
1
  float* const output_values = ccmalloc(sizeof(double) * 2839);
43
1
  ccv_nnc_depalettize(compressed, CCV_32F, CCV_TENSOR_CPU_MEMORY, output_size, 4, 128, output_values, 2839);
44
1
  REQUIRE_ARRAY_EQ(float, values, output_values, 2839, "should be lossless");
45
1
  ccfree(values);
46
1
  ccfree(output_values);
47
1
  ccfree(compressed);
48
1
}
49
50
TEST_CASE("quantize half-precision to 4-bit and dequantize on CPU losslessly")
51
1
{
52
1
  float lut_f32[16] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0};
53
1
  uint16_t lut[16];
54
1
  ccv_float_to_half_precision(lut_f32, lut, 16);
55
1
  uint16_t* const values = ccmalloc(sizeof(uint16_t) * 2839);
56
1
  int i;
57
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
58
2.83k
    values[i] = lut[i % 16];
59
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1420 + 2944 / 4));
60
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_16F, CCV_TENSOR_CPU_MEMORY, 2839, 4, 128, compressed, 1420 + 2944 / 4);
61
1
  REQUIRE_EQ(output_size, 1420 + 2944 / 4, "output size should match");
62
1
  uint16_t* const output_values = ccmalloc(sizeof(uint16_t) * 2839);
63
1
  ccv_nnc_depalettize(compressed, CCV_16F, CCV_TENSOR_CPU_MEMORY, output_size, 4, 128, output_values, 2839);
64
1
  REQUIRE_ARRAY_EQ(uint16_t, values, output_values, 2839, "should be lossless");
65
1
  ccfree(values);
66
1
  ccfree(output_values);
67
1
  ccfree(compressed);
68
1
}
69
70
TEST_CASE("quantize double to 5-bit and dequantize on CPU losslessly")
71
1
{
72
1
  double lut[32] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, -14.0, -15.0};
73
1
  double* const values = ccmalloc(sizeof(double) * 2839);
74
1
  int i;
75
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
76
2.83k
    values[i] = lut[i % 32];
77
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1775 + 23 * 32 * 8));
78
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_64F, CCV_TENSOR_CPU_MEMORY, 2839, 5, 128, compressed, 1775 + 23 * 32 * 8);
79
1
  REQUIRE_EQ(output_size, 1775 + 23 * 32 * 8, "output size should match");
80
1
  double* const output_values = ccmalloc(sizeof(double) * 2839);
81
1
  ccv_nnc_depalettize(compressed, CCV_64F, CCV_TENSOR_CPU_MEMORY, output_size, 5, 128, output_values, 2839);
82
1
  REQUIRE_ARRAY_EQ(double, values, output_values, 2839, "should be lossless");
83
1
  ccfree(values);
84
1
  ccfree(output_values);
85
1
  ccfree(compressed);
86
1
}
87
88
TEST_CASE("quantize float to 5-bit and dequantize on CPU losslessly")
89
1
{
90
1
  float lut[32] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, -14.0, -15.0};
91
1
  float* const values = ccmalloc(sizeof(float) * 2839);
92
1
  int i;
93
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
94
2.83k
    values[i] = lut[i % 32];
95
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1775 + 23 * 32 * 4));
96
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_32F, CCV_TENSOR_CPU_MEMORY, 2839, 5, 128, compressed, 1775 + 23 * 32 * 4);
97
1
  REQUIRE_EQ(output_size, 1775 + 23 * 32 * 4, "output size should match");
98
1
  float* const output_values = ccmalloc(sizeof(double) * 2839);
99
1
  ccv_nnc_depalettize(compressed, CCV_32F, CCV_TENSOR_CPU_MEMORY, output_size, 5, 128, output_values, 2839);
100
1
  REQUIRE_ARRAY_EQ(float, values, output_values, 2839, "should be lossless");
101
1
  ccfree(values);
102
1
  ccfree(output_values);
103
1
  ccfree(compressed);
104
1
}
105
106
TEST_CASE("quantize half-precision to 5-bit and dequantize on CPU losslessly")
107
1
{
108
1
  float lut_f32[32] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, -14.0, -15.0};
109
1
  uint16_t lut[32];
110
1
  ccv_float_to_half_precision(lut_f32, lut, 32);
111
1
  uint16_t* const values = ccmalloc(sizeof(uint16_t) * 2839);
112
1
  int i;
113
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
114
2.83k
    values[i] = lut[i % 32];
115
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1775 + 23 * 32 * 2));
116
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_16F, CCV_TENSOR_CPU_MEMORY, 2839, 5, 128, compressed, 1775 + 23 * 32 * 2);
117
1
  REQUIRE_EQ(output_size, 1775 + 23 * 32 * 2, "output size should match");
118
1
  uint16_t* const output_values = ccmalloc(sizeof(uint16_t) * 2839);
119
1
  ccv_nnc_depalettize(compressed, CCV_16F, CCV_TENSOR_CPU_MEMORY, output_size, 5, 128, output_values, 2839);
120
1
  REQUIRE_ARRAY_EQ(uint16_t, values, output_values, 2839, "should be lossless");
121
1
  ccfree(values);
122
1
  ccfree(output_values);
123
1
  ccfree(compressed);
124
1
}
125
126
TEST_CASE("quantize double to 6-bit and dequantize on CPU losslessly")
127
1
{
128
1
  double lut[64];
129
1
  int i;
130
65
  for (i = 0; i < 64; 
i++64
)
131
64
    lut[i] = (double)i;
132
1
  double* const values = ccmalloc(sizeof(double) * 2839);
133
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
134
2.83k
    values[i] = lut[i % 64];
135
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2130 + 6 * 64 * 8));
136
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_64F, CCV_TENSOR_CPU_MEMORY, 2839, 6, 512, compressed, 2130 + 6 * 64 * 8);
137
1
  REQUIRE_EQ(output_size, 2130 + 6 * 64 * 8, "output size should match");
138
1
  double* const output_values = ccmalloc(sizeof(double) * 2839);
139
1
  ccv_nnc_depalettize(compressed, CCV_64F, CCV_TENSOR_CPU_MEMORY, output_size, 6, 512, output_values, 2839);
140
1
  REQUIRE_ARRAY_EQ(double, values, output_values, 2839, "should be lossless");
141
1
  ccfree(values);
142
1
  ccfree(output_values);
143
1
  ccfree(compressed);
144
1
}
145
146
TEST_CASE("quantize float to 6-bit and dequantize on CPU losslessly")
147
1
{
148
1
  float lut[64];
149
1
  int i;
150
65
  for (i = 0; i < 64; 
i++64
)
151
64
    lut[i] = (float)i;
152
1
  float* const values = ccmalloc(sizeof(float) * 2839);
153
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
154
2.83k
    values[i] = lut[i % 64];
155
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2130 + 6 * 64 * 4));
156
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_32F, CCV_TENSOR_CPU_MEMORY, 2839, 6, 512, compressed, 2130 + 6 * 64 * 4);
157
1
  REQUIRE_EQ(output_size, 2130 + 6 * 64 * 4, "output size should match");
158
1
  float* const output_values = ccmalloc(sizeof(float) * 2839);
159
1
  ccv_nnc_depalettize(compressed, CCV_32F, CCV_TENSOR_CPU_MEMORY, output_size, 6, 512, output_values, 2839);
160
1
  REQUIRE_ARRAY_EQ(float, values, output_values, 2839, "should be lossless");
161
1
  ccfree(values);
162
1
  ccfree(output_values);
163
1
  ccfree(compressed);
164
1
}
165
166
TEST_CASE("quantize half-precision to 6-bit and dequantize on CPU losslessly")
167
1
{
168
1
  float lut_f32[64];
169
1
  int i;
170
65
  for (i = 0; i < 64; 
i++64
)
171
64
    lut_f32[i] = (float)i;
172
1
  uint16_t lut[64];
173
1
  ccv_float_to_half_precision(lut_f32, lut, 64);
174
1
  uint16_t* const values = ccmalloc(sizeof(uint16_t) * 2839);
175
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
176
2.83k
    values[i] = lut[i % 64];
177
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2130 + 6 * 64 * 2));
178
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_16F, CCV_TENSOR_CPU_MEMORY, 2839, 6, 512, compressed, 2130 + 6 * 64 * 2);
179
1
  REQUIRE_EQ(output_size, 2130 + 6 * 64 * 2, "output size should match");
180
1
  uint16_t* const output_values = ccmalloc(sizeof(uint16_t) * 2839);
181
1
  ccv_nnc_depalettize(compressed, CCV_16F, CCV_TENSOR_CPU_MEMORY, output_size, 6, 512, output_values, 2839);
182
1
  REQUIRE_ARRAY_EQ(uint16_t, values, output_values, 2839, "should be lossless");
183
1
  ccfree(values);
184
1
  ccfree(output_values);
185
1
  ccfree(compressed);
186
1
}
187
188
TEST_CASE("quantize double to 7-bit and dequantize on CPU losslessly")
189
1
{
190
1
  double lut[128];
191
1
  int i;
192
129
  for (i = 0; i < 128; 
i++128
)
193
128
    lut[i] = (double)i;
194
1
  double* const values = ccmalloc(sizeof(double) * 2839);
195
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
196
2.83k
    values[i] = lut[i % 128];
197
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2485 + 6 * 128 * 8));
198
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_64F, CCV_TENSOR_CPU_MEMORY, 2839, 7, 512, compressed, 2485 + 6 * 128 * 8);
199
1
  REQUIRE_EQ(output_size, 2485 + 6 * 128 * 8, "output size should match");
200
1
  double* const output_values = ccmalloc(sizeof(double) * 2839);
201
1
  ccv_nnc_depalettize(compressed, CCV_64F, CCV_TENSOR_CPU_MEMORY, output_size, 7, 512, output_values, 2839);
202
1
  REQUIRE_ARRAY_EQ(double, values, output_values, 2839, "should be lossless");
203
1
  ccfree(values);
204
1
  ccfree(output_values);
205
1
  ccfree(compressed);
206
1
}
207
208
TEST_CASE("quantize float to 7-bit and dequantize on CPU losslessly")
209
1
{
210
1
  float lut[128];
211
1
  int i;
212
129
  for (i = 0; i < 128; 
i++128
)
213
128
    lut[i] = (float)i;
214
1
  float* const values = ccmalloc(sizeof(float) * 2839);
215
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
216
2.83k
    values[i] = lut[i % 128];
217
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2485 + 6 * 128 * 4));
218
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_32F, CCV_TENSOR_CPU_MEMORY, 2839, 7, 512, compressed, 2485 + 6 * 128 * 4);
219
1
  REQUIRE_EQ(output_size, 2485 + 6 * 128 * 4, "output size should match");
220
1
  float* const output_values = ccmalloc(sizeof(float) * 2839);
221
1
  ccv_nnc_depalettize(compressed, CCV_32F, CCV_TENSOR_CPU_MEMORY, output_size, 7, 512, output_values, 2839);
222
1
  REQUIRE_ARRAY_EQ(float, values, output_values, 2839, "should be lossless");
223
1
  ccfree(values);
224
1
  ccfree(output_values);
225
1
  ccfree(compressed);
226
1
}
227
228
TEST_CASE("quantize half-precision to 7-bit and dequantize on CPU losslessly")
229
1
{
230
1
  float lut_f32[128];
231
1
  int i;
232
129
  for (i = 0; i < 128; 
i++128
)
233
128
    lut_f32[i] = (float)i;
234
1
  uint16_t lut[128];
235
1
  ccv_float_to_half_precision(lut_f32, lut, 128);
236
1
  uint16_t* const values = ccmalloc(sizeof(uint16_t) * 2839);
237
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
238
2.83k
    values[i] = lut[i % 128];
239
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2485 + 6 * 128 * 2));
240
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_16F, CCV_TENSOR_CPU_MEMORY, 2839, 7, 512, compressed, 2485 + 6 * 128 * 2);
241
1
  REQUIRE_EQ(output_size, 2485 + 6 * 128 * 2, "output size should match");
242
1
  uint16_t* const output_values = ccmalloc(sizeof(uint16_t) * 2839);
243
1
  ccv_nnc_depalettize(compressed, CCV_16F, CCV_TENSOR_CPU_MEMORY, output_size, 7, 512, output_values, 2839);
244
1
  REQUIRE_ARRAY_EQ(uint16_t, values, output_values, 2839, "should be lossless");
245
1
  ccfree(values);
246
1
  ccfree(output_values);
247
1
  ccfree(compressed);
248
1
}
249
250
TEST_CASE("quantize double to 8-bit and dequantize on CPU losslessly")
251
1
{
252
1
  double lut[256];
253
1
  int i;
254
257
  for (i = 0; i < 256; 
i++256
)
255
256
    lut[i] = (double)i;
256
1
  double* const values = ccmalloc(sizeof(double) * 2839);
257
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
258
2.83k
    values[i] = lut[i % 256];
259
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2839 + 3 * 256 * 8));
260
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_64F, CCV_TENSOR_CPU_MEMORY, 2839, 8, 1280, compressed, 2839 + 3 * 256 * 8);
261
1
  REQUIRE_EQ(output_size, 2839 + 3 * 256 * 8, "output size should match");
262
1
  double* const output_values = ccmalloc(sizeof(double) * 2839);
263
1
  ccv_nnc_depalettize(compressed, CCV_64F, CCV_TENSOR_CPU_MEMORY, output_size, 8, 1280, output_values, 2839);
264
1
  REQUIRE_ARRAY_EQ(double, values, output_values, 2839, "should be lossless");
265
1
  ccfree(values);
266
1
  ccfree(output_values);
267
1
  ccfree(compressed);
268
1
}
269
270
TEST_CASE("quantize float to 8-bit and dequantize on CPU losslessly")
271
1
{
272
1
  float lut[256];
273
1
  int i;
274
257
  for (i = 0; i < 256; 
i++256
)
275
256
    lut[i] = (float)i;
276
1
  float* const values = ccmalloc(sizeof(float) * 2839);
277
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
278
2.83k
    values[i] = lut[i % 256];
279
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2839 + 3 * 256 * 4));
280
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_32F, CCV_TENSOR_CPU_MEMORY, 2839, 8, 1280, compressed, 2839 + 3 * 256 * 4);
281
1
  REQUIRE_EQ(output_size, 2839 + 3 * 256 * 4, "output size should match");
282
1
  float* const output_values = ccmalloc(sizeof(float) * 2839);
283
1
  ccv_nnc_depalettize(compressed, CCV_32F, CCV_TENSOR_CPU_MEMORY, output_size, 8, 1280, output_values, 2839);
284
1
  REQUIRE_ARRAY_EQ(float, values, output_values, 2839, "should be lossless");
285
1
  ccfree(values);
286
1
  ccfree(output_values);
287
1
  ccfree(compressed);
288
1
}
289
290
TEST_CASE("quantize half-precision to 8-bit and dequantize on CPU losslessly")
291
1
{
292
1
  float lut_f32[256];
293
1
  int i;
294
257
  for (i = 0; i < 256; 
i++256
)
295
256
    lut_f32[i] = (float)i;
296
1
  uint16_t lut[256];
297
1
  ccv_float_to_half_precision(lut_f32, lut, 256);
298
1
  uint16_t* const values = ccmalloc(sizeof(uint16_t) * 2839);
299
2.84k
  for (i = 0; i < 2839; 
i++2.83k
)
300
2.83k
    values[i] = lut[i % 256];
301
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2839 + 3 * 256 * 2));
302
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_16F, CCV_TENSOR_CPU_MEMORY, 2839, 8, 1280, compressed, 2839 + 3 * 256 * 2);
303
1
  REQUIRE_EQ(output_size, 2839 + 3 * 256 * 2, "output size should match");
304
1
  uint16_t* const output_values = ccmalloc(sizeof(uint16_t) * 2839);
305
1
  ccv_nnc_depalettize(compressed, CCV_16F, CCV_TENSOR_CPU_MEMORY, output_size, 8, 1280, output_values, 2839);
306
1
  REQUIRE_ARRAY_EQ(uint16_t, values, output_values, 2839, "should be lossless");
307
1
  ccfree(values);
308
1
  ccfree(output_values);
309
1
  ccfree(compressed);
310
1
}
311
312
TEST_CASE("quantize double to 4-bit and dequantize on CPU losslessly, fast path")
313
1
{
314
1
  double lut[16] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0};
315
1
  double* const values = ccmalloc(sizeof(double) * 2840);
316
1
  int i;
317
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
318
2.84k
    values[i] = lut[i % 16];
319
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1420 + 2944));
320
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_64F, CCV_TENSOR_CPU_MEMORY, 2840, 4, 128, compressed, 1420 + 2944);
321
1
  REQUIRE_EQ(output_size, 1420 + 2944, "output size should match");
322
1
  double* const output_values = ccmalloc(sizeof(double) * 2840);
323
1
  ccv_nnc_depalettize(compressed, CCV_64F, CCV_TENSOR_CPU_MEMORY, output_size, 4, 128, output_values, 2840);
324
1
  REQUIRE_ARRAY_EQ(double, values, output_values, 2840, "should be lossless");
325
1
  ccfree(values);
326
1
  ccfree(output_values);
327
1
  ccfree(compressed);
328
1
}
329
330
TEST_CASE("quantize float to 4-bit and dequantize on CPU losslessly, fast path")
331
1
{
332
1
  float lut[16] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0};
333
1
  float* const values = ccmalloc(sizeof(float) * 2840);
334
1
  int i;
335
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
336
2.84k
    values[i] = lut[i % 16];
337
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1420 + 2944 / 2));
338
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_32F, CCV_TENSOR_CPU_MEMORY, 2840, 4, 128, compressed, 1420 + 2944 / 2);
339
1
  REQUIRE_EQ(output_size, 1420 + 2944 / 2, "output size should match");
340
1
  float* const output_values = ccmalloc(sizeof(double) * 2840);
341
1
  ccv_nnc_depalettize(compressed, CCV_32F, CCV_TENSOR_CPU_MEMORY, output_size, 4, 128, output_values, 2840);
342
1
  REQUIRE_ARRAY_EQ(float, values, output_values, 2840, "should be lossless");
343
1
  ccfree(values);
344
1
  ccfree(output_values);
345
1
  ccfree(compressed);
346
1
}
347
348
TEST_CASE("quantize half-precision to 4-bit and dequantize on CPU losslessly, fast path")
349
1
{
350
1
  float lut_f32[16] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0};
351
1
  uint16_t lut[16];
352
1
  ccv_float_to_half_precision(lut_f32, lut, 16);
353
1
  uint16_t* const values = ccmalloc(sizeof(uint16_t) * 2840);
354
1
  int i;
355
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
356
2.84k
    values[i] = lut[i % 16];
357
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1420 + 2944 / 4));
358
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_16F, CCV_TENSOR_CPU_MEMORY, 2840, 4, 128, compressed, 1420 + 2944 / 4);
359
1
  REQUIRE_EQ(output_size, 1420 + 2944 / 4, "output size should match");
360
1
  uint16_t* const output_values = ccmalloc(sizeof(uint16_t) * 2840);
361
1
  ccv_nnc_depalettize(compressed, CCV_16F, CCV_TENSOR_CPU_MEMORY, output_size, 4, 128, output_values, 2840);
362
1
  REQUIRE_ARRAY_EQ(uint16_t, values, output_values, 2840, "should be lossless");
363
1
  ccfree(values);
364
1
  ccfree(output_values);
365
1
  ccfree(compressed);
366
1
}
367
368
TEST_CASE("quantize double to 5-bit and dequantize on CPU losslessly, fast path")
369
1
{
370
1
  double lut[32] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, -14.0, -15.0};
371
1
  double* const values = ccmalloc(sizeof(double) * 2840);
372
1
  int i;
373
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
374
2.84k
    values[i] = lut[i % 32];
375
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1775 + 23 * 32 * 8));
376
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_64F, CCV_TENSOR_CPU_MEMORY, 2840, 5, 128, compressed, 1775 + 23 * 32 * 8);
377
1
  REQUIRE_EQ(output_size, 1775 + 23 * 32 * 8, "output size should match");
378
1
  double* const output_values = ccmalloc(sizeof(double) * 2840);
379
1
  ccv_nnc_depalettize(compressed, CCV_64F, CCV_TENSOR_CPU_MEMORY, output_size, 5, 128, output_values, 2840);
380
1
  REQUIRE_ARRAY_EQ(double, values, output_values, 2840, "should be lossless");
381
1
  ccfree(values);
382
1
  ccfree(output_values);
383
1
  ccfree(compressed);
384
1
}
385
386
TEST_CASE("quantize float to 5-bit and dequantize on CPU losslessly, fast path")
387
1
{
388
1
  float lut[32] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, -14.0, -15.0};
389
1
  float* const values = ccmalloc(sizeof(float) * 2840);
390
1
  int i;
391
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
392
2.84k
    values[i] = lut[i % 32];
393
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1775 + 23 * 32 * 4));
394
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_32F, CCV_TENSOR_CPU_MEMORY, 2840, 5, 128, compressed, 1775 + 23 * 32 * 4);
395
1
  REQUIRE_EQ(output_size, 1775 + 23 * 32 * 4, "output size should match");
396
1
  float* const output_values = ccmalloc(sizeof(double) * 2840);
397
1
  ccv_nnc_depalettize(compressed, CCV_32F, CCV_TENSOR_CPU_MEMORY, output_size, 5, 128, output_values, 2840);
398
1
  REQUIRE_ARRAY_EQ(float, values, output_values, 2840, "should be lossless");
399
1
  ccfree(values);
400
1
  ccfree(output_values);
401
1
  ccfree(compressed);
402
1
}
403
404
TEST_CASE("quantize half-precision to 5-bit and dequantize on CPU losslessly, fast path")
405
1
{
406
1
  float lut_f32[32] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, -1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0, -9.0, -10.0, -11.0, -12.0, -13.0, -14.0, -15.0};
407
1
  uint16_t lut[32];
408
1
  ccv_float_to_half_precision(lut_f32, lut, 32);
409
1
  uint16_t* const values = ccmalloc(sizeof(uint16_t) * 2840);
410
1
  int i;
411
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
412
2.84k
    values[i] = lut[i % 32];
413
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (1775 + 23 * 32 * 2));
414
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_16F, CCV_TENSOR_CPU_MEMORY, 2840, 5, 128, compressed, 1775 + 23 * 32 * 2);
415
1
  REQUIRE_EQ(output_size, 1775 + 23 * 32 * 2, "output size should match");
416
1
  uint16_t* const output_values = ccmalloc(sizeof(uint16_t) * 2840);
417
1
  ccv_nnc_depalettize(compressed, CCV_16F, CCV_TENSOR_CPU_MEMORY, output_size, 5, 128, output_values, 2840);
418
1
  REQUIRE_ARRAY_EQ(uint16_t, values, output_values, 2840, "should be lossless");
419
1
  ccfree(values);
420
1
  ccfree(output_values);
421
1
  ccfree(compressed);
422
1
}
423
424
TEST_CASE("quantize double to 6-bit and dequantize on CPU losslessly, fast path")
425
1
{
426
1
  double lut[64];
427
1
  int i;
428
65
  for (i = 0; i < 64; 
i++64
)
429
64
    lut[i] = (double)i;
430
1
  double* const values = ccmalloc(sizeof(double) * 2840);
431
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
432
2.84k
    values[i] = lut[i % 64];
433
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2130 + 6 * 64 * 8));
434
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_64F, CCV_TENSOR_CPU_MEMORY, 2840, 6, 512, compressed, 2130 + 6 * 64 * 8);
435
1
  REQUIRE_EQ(output_size, 2130 + 6 * 64 * 8, "output size should match");
436
1
  double* const output_values = ccmalloc(sizeof(double) * 2840);
437
1
  ccv_nnc_depalettize(compressed, CCV_64F, CCV_TENSOR_CPU_MEMORY, output_size, 6, 512, output_values, 2840);
438
1
  REQUIRE_ARRAY_EQ(double, values, output_values, 2840, "should be lossless");
439
1
  ccfree(values);
440
1
  ccfree(output_values);
441
1
  ccfree(compressed);
442
1
}
443
444
TEST_CASE("quantize float to 6-bit and dequantize on CPU losslessly, fast path")
445
1
{
446
1
  float lut[64];
447
1
  int i;
448
65
  for (i = 0; i < 64; 
i++64
)
449
64
    lut[i] = (float)i;
450
1
  float* const values = ccmalloc(sizeof(float) * 2840);
451
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
452
2.84k
    values[i] = lut[i % 64];
453
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2130 + 6 * 64 * 4));
454
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_32F, CCV_TENSOR_CPU_MEMORY, 2840, 6, 512, compressed, 2130 + 6 * 64 * 4);
455
1
  REQUIRE_EQ(output_size, 2130 + 6 * 64 * 4, "output size should match");
456
1
  float* const output_values = ccmalloc(sizeof(float) * 2840);
457
1
  ccv_nnc_depalettize(compressed, CCV_32F, CCV_TENSOR_CPU_MEMORY, output_size, 6, 512, output_values, 2840);
458
1
  REQUIRE_ARRAY_EQ(float, values, output_values, 2840, "should be lossless");
459
1
  ccfree(values);
460
1
  ccfree(output_values);
461
1
  ccfree(compressed);
462
1
}
463
464
TEST_CASE("quantize half-precision to 6-bit and dequantize on CPU losslessly, fast path")
465
1
{
466
1
  float lut_f32[64];
467
1
  int i;
468
65
  for (i = 0; i < 64; 
i++64
)
469
64
    lut_f32[i] = (float)i;
470
1
  uint16_t lut[64];
471
1
  ccv_float_to_half_precision(lut_f32, lut, 64);
472
1
  uint16_t* const values = ccmalloc(sizeof(uint16_t) * 2840);
473
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
474
2.84k
    values[i] = lut[i % 64];
475
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2130 + 6 * 64 * 2));
476
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_16F, CCV_TENSOR_CPU_MEMORY, 2840, 6, 512, compressed, 2130 + 6 * 64 * 2);
477
1
  REQUIRE_EQ(output_size, 2130 + 6 * 64 * 2, "output size should match");
478
1
  uint16_t* const output_values = ccmalloc(sizeof(uint16_t) * 2840);
479
1
  ccv_nnc_depalettize(compressed, CCV_16F, CCV_TENSOR_CPU_MEMORY, output_size, 6, 512, output_values, 2840);
480
1
  REQUIRE_ARRAY_EQ(uint16_t, values, output_values, 2840, "should be lossless");
481
1
  ccfree(values);
482
1
  ccfree(output_values);
483
1
  ccfree(compressed);
484
1
}
485
486
TEST_CASE("quantize double to 7-bit and dequantize on CPU losslessly, fast path")
487
1
{
488
1
  double lut[128];
489
1
  int i;
490
129
  for (i = 0; i < 128; 
i++128
)
491
128
    lut[i] = (double)i;
492
1
  double* const values = ccmalloc(sizeof(double) * 2840);
493
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
494
2.84k
    values[i] = lut[i % 128];
495
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2485 + 6 * 128 * 8));
496
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_64F, CCV_TENSOR_CPU_MEMORY, 2840, 7, 512, compressed, 2485 + 6 * 128 * 8);
497
1
  REQUIRE_EQ(output_size, 2485 + 6 * 128 * 8, "output size should match");
498
1
  double* const output_values = ccmalloc(sizeof(double) * 2840);
499
1
  ccv_nnc_depalettize(compressed, CCV_64F, CCV_TENSOR_CPU_MEMORY, output_size, 7, 512, output_values, 2840);
500
1
  REQUIRE_ARRAY_EQ(double, values, output_values, 2840, "should be lossless");
501
1
  ccfree(values);
502
1
  ccfree(output_values);
503
1
  ccfree(compressed);
504
1
}
505
506
TEST_CASE("quantize float to 7-bit and dequantize on CPU losslessly, fast path")
507
1
{
508
1
  float lut[128];
509
1
  int i;
510
129
  for (i = 0; i < 128; 
i++128
)
511
128
    lut[i] = (float)i;
512
1
  float* const values = ccmalloc(sizeof(float) * 2840);
513
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
514
2.84k
    values[i] = lut[i % 128];
515
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2485 + 6 * 128 * 4));
516
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_32F, CCV_TENSOR_CPU_MEMORY, 2840, 7, 512, compressed, 2485 + 6 * 128 * 4);
517
1
  REQUIRE_EQ(output_size, 2485 + 6 * 128 * 4, "output size should match");
518
1
  float* const output_values = ccmalloc(sizeof(float) * 2840);
519
1
  ccv_nnc_depalettize(compressed, CCV_32F, CCV_TENSOR_CPU_MEMORY, output_size, 7, 512, output_values, 2840);
520
1
  REQUIRE_ARRAY_EQ(float, values, output_values, 2840, "should be lossless");
521
1
  ccfree(values);
522
1
  ccfree(output_values);
523
1
  ccfree(compressed);
524
1
}
525
526
TEST_CASE("quantize half-precision to 7-bit and dequantize on CPU losslessly, fast path")
527
1
{
528
1
  float lut_f32[128];
529
1
  int i;
530
129
  for (i = 0; i < 128; 
i++128
)
531
128
    lut_f32[i] = (float)i;
532
1
  uint16_t lut[128];
533
1
  ccv_float_to_half_precision(lut_f32, lut, 128);
534
1
  uint16_t* const values = ccmalloc(sizeof(uint16_t) * 2840);
535
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
536
2.84k
    values[i] = lut[i % 128];
537
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2485 + 6 * 128 * 2));
538
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_16F, CCV_TENSOR_CPU_MEMORY, 2840, 7, 512, compressed, 2485 + 6 * 128 * 2);
539
1
  REQUIRE_EQ(output_size, 2485 + 6 * 128 * 2, "output size should match");
540
1
  uint16_t* const output_values = ccmalloc(sizeof(uint16_t) * 2840);
541
1
  ccv_nnc_depalettize(compressed, CCV_16F, CCV_TENSOR_CPU_MEMORY, output_size, 7, 512, output_values, 2840);
542
1
  REQUIRE_ARRAY_EQ(uint16_t, values, output_values, 2840, "should be lossless");
543
1
  ccfree(values);
544
1
  ccfree(output_values);
545
1
  ccfree(compressed);
546
1
}
547
548
TEST_CASE("quantize double to 8-bit and dequantize on CPU losslessly, fast path")
549
1
{
550
1
  double lut[256];
551
1
  int i;
552
257
  for (i = 0; i < 256; 
i++256
)
553
256
    lut[i] = (double)i;
554
1
  double* const values = ccmalloc(sizeof(double) * 2840);
555
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
556
2.84k
    values[i] = lut[i % 256];
557
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2840 + 3 * 256 * 8));
558
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_64F, CCV_TENSOR_CPU_MEMORY, 2840, 8, 1280, compressed, 2840 + 3 * 256 * 8);
559
1
  REQUIRE_EQ(output_size, 2840 + 3 * 256 * 8, "output size should match");
560
1
  double* const output_values = ccmalloc(sizeof(double) * 2840);
561
1
  ccv_nnc_depalettize(compressed, CCV_64F, CCV_TENSOR_CPU_MEMORY, output_size, 8, 1280, output_values, 2840);
562
1
  REQUIRE_ARRAY_EQ(double, values, output_values, 2840, "should be lossless");
563
1
  ccfree(values);
564
1
  ccfree(output_values);
565
1
  ccfree(compressed);
566
1
}
567
568
TEST_CASE("quantize float to 8-bit and dequantize on CPU losslessly, fast path")
569
1
{
570
1
  float lut[256];
571
1
  int i;
572
257
  for (i = 0; i < 256; 
i++256
)
573
256
    lut[i] = (float)i;
574
1
  float* const values = ccmalloc(sizeof(float) * 2840);
575
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
576
2.84k
    values[i] = lut[i % 256];
577
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2840 + 3 * 256 * 4));
578
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_32F, CCV_TENSOR_CPU_MEMORY, 2840, 8, 1280, compressed, 2840 + 3 * 256 * 4);
579
1
  REQUIRE_EQ(output_size, 2840 + 3 * 256 * 4, "output size should match");
580
1
  float* const output_values = ccmalloc(sizeof(float) * 2840);
581
1
  ccv_nnc_depalettize(compressed, CCV_32F, CCV_TENSOR_CPU_MEMORY, output_size, 8, 1280, output_values, 2840);
582
1
  REQUIRE_ARRAY_EQ(float, values, output_values, 2840, "should be lossless");
583
1
  ccfree(values);
584
1
  ccfree(output_values);
585
1
  ccfree(compressed);
586
1
}
587
588
TEST_CASE("quantize half-precision to 8-bit and dequantize on CPU losslessly, fast path")
589
1
{
590
1
  float lut_f32[256];
591
1
  int i;
592
257
  for (i = 0; i < 256; 
i++256
)
593
256
    lut_f32[i] = (float)i;
594
1
  uint16_t lut[256];
595
1
  ccv_float_to_half_precision(lut_f32, lut, 256);
596
1
  uint16_t* const values = ccmalloc(sizeof(uint16_t) * 2840);
597
2.84k
  for (i = 0; i < 2840; 
i++2.84k
)
598
2.84k
    values[i] = lut[i % 256];
599
1
  uint8_t* compressed = ccmalloc(sizeof(uint8_t) * (2840 + 3 * 256 * 2));
600
1
  const size_t output_size = ccv_nnc_palettize(values, CCV_16F, CCV_TENSOR_CPU_MEMORY, 2840, 8, 1280, compressed, 2840 + 3 * 256 * 2);
601
1
  REQUIRE_EQ(output_size, 2840 + 3 * 256 * 2, "output size should match");
602
1
  uint16_t* const output_values = ccmalloc(sizeof(uint16_t) * 2840);
603
1
  ccv_nnc_depalettize(compressed, CCV_16F, CCV_TENSOR_CPU_MEMORY, output_size, 8, 1280, output_values, 2840);
604
1
  REQUIRE_ARRAY_EQ(uint16_t, values, output_values, 2840, "should be lossless");
605
1
  ccfree(values);
606
1
  ccfree(output_values);
607
1
  ccfree(compressed);
608
1
}
609
610
#include "case_main.h"