/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/compression.tests.c
Line | Count | Source |
1 | | #include "case.h" |
2 | | #include "ccv_case.h" |
3 | | #include "ccv_nnc_case.h" |
4 | | #include <ccv.h> |
5 | | #include <nnc/ccv_nnc.h> |
6 | | #include <nnc/ccv_nnc_easy.h> |
7 | | #include "3rdparty/dsfmt/dSFMT.h" |
8 | | |
9 | | TEST_SETUP() |
10 | | { |
11 | | ccv_nnc_init(); |
12 | | } |
13 | | |
14 | | static void prepare_nchw(const int N, const int C, const int H, const int W, ccv_nnc_tensor_t** const c_ptr, ccv_nnc_tensor_t** const cgc_ptr) |
15 | 4 | { |
16 | 4 | ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, N, C, H, W), 0); |
17 | 4 | ccv_nnc_tensor_param_t a_params = CPU_TENSOR_NCHW(16F, N, C, H, W); |
18 | 4 | ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, a_params, 0); |
19 | 4 | ccv_nnc_tensor_param_t b_params; |
20 | 4 | ccv_nnc_hint_tensor_auto(CMD_COMPRESSION_LSSC_FORWARD(), &a_params, 1, ccv_nnc_no_hint, &b_params, 1); |
21 | 4 | ccv_nnc_tensor_t* const b16 = ccv_nnc_tensor_new(0, b_params, 0); |
22 | 4 | ccv_nnc_tensor_t* const c16 = ccv_nnc_tensor_new(0, a_params, 0); |
23 | 4 | ccv_nnc_tensor_t* const c = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, N, C, H, W), 0); |
24 | 4 | dsfmt_t dsfmt; |
25 | 4 | int i; |
26 | 4 | dsfmt_init_gen_rand(&dsfmt, 1); |
27 | 147M | for (i = 0; i < N * C * H * W; i++147M ) |
28 | 147M | a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1; |
29 | 4 | ccv_float_to_half_precision(a->data.f32, (uint16_t*)a16->data.f16, N * C * H * W); |
30 | 4 | ccv_half_precision_to_float((uint16_t*)a16->data.f16, a->data.f32, N * C * H * W); |
31 | 4 | int x, y, ix, iy; |
32 | 172k | for (i = 0; i < N * C; i++172k ) |
33 | 1.03M | for (y = 0; 172k y < H; y += 4860k ) |
34 | 10.6M | for (x = 0; 860k x < W; x += 49.80M ) |
35 | 9.80M | { |
36 | 9.80M | float* const ap = a->data.f32 + x + y * W + i * H * W; |
37 | 9.80M | float v[4] = { ap[0], ap[1], ap[0] * 2 / 3 + ap[1] / 3, ap[0] / 3 + ap[1] * 2 / 3 }; |
38 | 47.6M | for (iy = 0; iy < ccv_min(y + 4, H) - y; iy++37.8M ) |
39 | 185M | for (ix = 0; 37.8M ix < ccv_min(x + 4, W) - x; ix++147M ) |
40 | 147M | ap[iy * W + ix] = v[dsfmt_genrand_uint32(&dsfmt) % 4]; |
41 | 9.80M | ap[0] = v[0]; |
42 | 9.80M | ap[1] = v[1]; // Make sure we still have max min. |
43 | 9.80M | } |
44 | 4 | ccv_float_to_half_precision(a->data.f32, (uint16_t*)a16->data.f16, N * C * H * W); |
45 | 4 | ccv_half_precision_to_float((uint16_t*)a16->data.f16, a->data.f32, N * C * H * W); |
46 | 4 | ccv_nnc_cmd_exec(CMD_COMPRESSION_LSSC_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b16), 0); |
47 | 4 | ccv_nnc_cmd_exec(CMD_COMPRESSION_LSSC_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16), TENSOR_LIST(c16), 0); |
48 | 4 | ccv_half_precision_to_float((uint16_t*)c16->data.f16, c->data.f32, N * C * H * W); |
49 | | // Compare against GPU computation |
50 | 4 | ccv_nnc_tensor_param_t ag_params = GPU_TENSOR_NCHW(000, 16F, N, C, H, W); |
51 | 4 | ccv_nnc_tensor_t* const a16g = ccv_nnc_tensor_new(0, ag_params, 0); |
52 | 4 | ccv_nnc_tensor_param_t bg_params; |
53 | 4 | ccv_nnc_hint_tensor_auto(CMD_COMPRESSION_LSSC_FORWARD(), &ag_params, 1, ccv_nnc_no_hint, &bg_params, 1); |
54 | 4 | ccv_nnc_tensor_t* const b16g = ccv_nnc_tensor_new(0, bg_params, 0); |
55 | 4 | ccv_nnc_tensor_t* const c16g = ccv_nnc_tensor_new(0, ag_params, 0); |
56 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(a16g), 0); |
57 | 4 | ccv_nnc_cmd_exec(CMD_COMPRESSION_LSSC_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16g), TENSOR_LIST(b16g), 0); |
58 | 4 | ccv_nnc_cmd_exec(CMD_COMPRESSION_LSSC_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16g), TENSOR_LIST(c16g), 0); |
59 | 4 | memset(c16->data.f16, 0, sizeof(ccv_float16_t) * N * C * H * W); |
60 | 4 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c16g), TENSOR_LIST(c16), 0); |
61 | 4 | ccv_nnc_tensor_t* const cgc = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, N, C, H, W), 0); |
62 | 4 | ccv_half_precision_to_float((uint16_t*)c16->data.f16, cgc->data.f32, N * C * H * W); |
63 | 4 | *c_ptr = c; |
64 | 4 | *cgc_ptr = cgc; |
65 | 4 | ccv_nnc_tensor_free(a); |
66 | 4 | ccv_nnc_tensor_free(a16); |
67 | 4 | ccv_nnc_tensor_free(b16); |
68 | 4 | ccv_nnc_tensor_free(c16); |
69 | 4 | ccv_nnc_tensor_free(a16g); |
70 | 4 | ccv_nnc_tensor_free(b16g); |
71 | 4 | ccv_nnc_tensor_free(c16g); |
72 | 4 | } |
73 | | |
74 | | TEST_CASE("LSSC should give exact result from GPU for 128x512x7x7") |
75 | 1 | { |
76 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_FORWARD, CCV_NNC_BACKEND_GPU_REF) && |
77 | 1 | ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_BACKWARD, CCV_NNC_BACKEND_GPU_REF)); |
78 | 1 | ccv_nnc_tensor_t* c; |
79 | 1 | ccv_nnc_tensor_t* cgc; |
80 | 1 | prepare_nchw(128, 512, 7, 7, &c, &cgc); |
81 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, c->data.f32, cgc->data.f32, 128 * 512 * 7 * 7, 1e-3, "GPU and CPU computed result should match"); |
82 | 1 | } |
83 | | |
84 | | TEST_CASE("LSSC should give exact result from GPU for 128x512x14x14") |
85 | 1 | { |
86 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_FORWARD, CCV_NNC_BACKEND_GPU_REF) && |
87 | 1 | ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_BACKWARD, CCV_NNC_BACKEND_GPU_REF)); |
88 | 1 | ccv_nnc_tensor_t* c; |
89 | 1 | ccv_nnc_tensor_t* cgc; |
90 | 1 | prepare_nchw(128, 512, 14, 14, &c, &cgc); |
91 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, c->data.f32, cgc->data.f32, 128 * 512 * 14 * 14, 1e-3, "GPU and CPU computed result should match"); |
92 | 1 | } |
93 | | |
94 | | TEST_CASE("LSSC should give exact result from GPU for 128x64x113x114") |
95 | 1 | { |
96 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_FORWARD, CCV_NNC_BACKEND_GPU_REF) && |
97 | 1 | ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_BACKWARD, CCV_NNC_BACKEND_GPU_REF)); |
98 | 1 | ccv_nnc_tensor_t* c; |
99 | 1 | ccv_nnc_tensor_t* cgc; |
100 | 1 | prepare_nchw(128, 64, 113, 114, &c, &cgc); |
101 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, c->data.f32, cgc->data.f32, 128 * 64 * 113 * 114, 1e-3, "GPU and CPU computed result should match"); |
102 | 1 | } |
103 | | |
104 | | TEST_CASE("LSSC should give exact result from GPU for 128x256x28x28") |
105 | 1 | { |
106 | 1 | GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_FORWARD, CCV_NNC_BACKEND_GPU_REF) && |
107 | 1 | ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_BACKWARD, CCV_NNC_BACKEND_GPU_REF)); |
108 | 1 | ccv_nnc_tensor_t* c; |
109 | 1 | ccv_nnc_tensor_t* cgc; |
110 | 1 | prepare_nchw(128, 256, 28, 28, &c, &cgc); |
111 | 1 | REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, c->data.f32, cgc->data.f32, 128 * 256 * 28 * 28, 1e-3, "GPU and CPU computed result should match"); |
112 | 1 | } |
113 | | |
114 | | #include "case_main.h" |