Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/compression.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include "3rdparty/dsfmt/dSFMT.h"
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
static void prepare_nchw(const int N, const int C, const int H, const int W, ccv_nnc_tensor_t** const c_ptr, ccv_nnc_tensor_t** const cgc_ptr)
15
4
{
16
4
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, N, C, H, W), 0);
17
4
  ccv_nnc_tensor_param_t a_params = CPU_TENSOR_NCHW(16F, N, C, H, W);
18
4
  ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, a_params, 0);
19
4
  ccv_nnc_tensor_param_t b_params;
20
4
  ccv_nnc_hint_tensor_auto(CMD_COMPRESSION_LSSC_FORWARD(), &a_params, 1, ccv_nnc_no_hint, &b_params, 1);
21
4
  ccv_nnc_tensor_t* const b16 = ccv_nnc_tensor_new(0, b_params, 0);
22
4
  ccv_nnc_tensor_t* const c16 = ccv_nnc_tensor_new(0, a_params, 0);
23
4
  ccv_nnc_tensor_t* const c = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, N, C, H, W), 0);
24
4
  dsfmt_t dsfmt;
25
4
  int i;
26
4
  dsfmt_init_gen_rand(&dsfmt, 1);
27
147M
  for (i = 0; i < N * C * H * W; 
i++147M
)
28
147M
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 2 - 1;
29
4
  ccv_float_to_half_precision(a->data.f32, (uint16_t*)a16->data.f16, N * C * H * W);
30
4
  ccv_half_precision_to_float((uint16_t*)a16->data.f16, a->data.f32, N * C * H * W);
31
4
  int x, y, ix, iy;
32
172k
  for (i = 0; i < N * C; 
i++172k
)
33
1.03M
    
for (y = 0; 172k
y < H;
y += 4860k
)
34
10.6M
      
for (x = 0; 860k
x < W;
x += 49.80M
)
35
9.80M
      {
36
9.80M
        float* const ap = a->data.f32 + x + y * W + i * H * W;
37
9.80M
        float v[4] = { ap[0], ap[1], ap[0] * 2 / 3 + ap[1] / 3, ap[0] / 3 + ap[1] * 2 / 3 };
38
47.6M
        for (iy = 0; iy < ccv_min(y + 4, H) - y; 
iy++37.8M
)
39
185M
          
for (ix = 0; 37.8M
ix < ccv_min(x + 4, W) - x;
ix++147M
)
40
147M
            ap[iy * W + ix] = v[dsfmt_genrand_uint32(&dsfmt) % 4];
41
9.80M
        ap[0] = v[0];
42
9.80M
        ap[1] = v[1]; // Make sure we still have max min.
43
9.80M
      }
44
4
  ccv_float_to_half_precision(a->data.f32, (uint16_t*)a16->data.f16, N * C * H * W);
45
4
  ccv_half_precision_to_float((uint16_t*)a16->data.f16, a->data.f32, N * C * H * W);
46
4
  ccv_nnc_cmd_exec(CMD_COMPRESSION_LSSC_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b16), 0);
47
4
  ccv_nnc_cmd_exec(CMD_COMPRESSION_LSSC_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16), TENSOR_LIST(c16), 0);
48
4
  ccv_half_precision_to_float((uint16_t*)c16->data.f16, c->data.f32, N * C * H * W);
49
  // Compare against GPU computation
50
4
  ccv_nnc_tensor_param_t ag_params = GPU_TENSOR_NCHW(000, 16F, N, C, H, W);
51
4
  ccv_nnc_tensor_t* const a16g = ccv_nnc_tensor_new(0, ag_params, 0);
52
4
  ccv_nnc_tensor_param_t bg_params;
53
4
  ccv_nnc_hint_tensor_auto(CMD_COMPRESSION_LSSC_FORWARD(), &ag_params, 1, ccv_nnc_no_hint, &bg_params, 1);
54
4
  ccv_nnc_tensor_t* const b16g = ccv_nnc_tensor_new(0, bg_params, 0);
55
4
  ccv_nnc_tensor_t* const c16g = ccv_nnc_tensor_new(0, ag_params, 0);
56
4
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(a16g), 0);
57
4
  ccv_nnc_cmd_exec(CMD_COMPRESSION_LSSC_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a16g), TENSOR_LIST(b16g), 0);
58
4
  ccv_nnc_cmd_exec(CMD_COMPRESSION_LSSC_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16g), TENSOR_LIST(c16g), 0);
59
4
  memset(c16->data.f16, 0, sizeof(ccv_float16_t) * N * C * H * W);
60
4
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c16g), TENSOR_LIST(c16), 0);
61
4
  ccv_nnc_tensor_t* const cgc = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, N, C, H, W), 0);
62
4
  ccv_half_precision_to_float((uint16_t*)c16->data.f16, cgc->data.f32, N * C * H * W);
63
4
  *c_ptr = c;
64
4
  *cgc_ptr = cgc;
65
4
  ccv_nnc_tensor_free(a);
66
4
  ccv_nnc_tensor_free(a16);
67
4
  ccv_nnc_tensor_free(b16);
68
4
  ccv_nnc_tensor_free(c16);
69
4
  ccv_nnc_tensor_free(a16g);
70
4
  ccv_nnc_tensor_free(b16g);
71
4
  ccv_nnc_tensor_free(c16g);
72
4
}
73
74
TEST_CASE("LSSC should give exact result from GPU for 128x512x7x7")
75
1
{
76
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
77
1
    ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
78
1
  ccv_nnc_tensor_t* c;
79
1
  ccv_nnc_tensor_t* cgc;
80
1
  prepare_nchw(128, 512, 7, 7, &c, &cgc);
81
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, c->data.f32, cgc->data.f32, 128 * 512 * 7 * 7, 1e-3, "GPU and CPU computed result should match");
82
1
}
83
84
TEST_CASE("LSSC should give exact result from GPU for 128x512x14x14")
85
1
{
86
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
87
1
    ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
88
1
  ccv_nnc_tensor_t* c;
89
1
  ccv_nnc_tensor_t* cgc;
90
1
  prepare_nchw(128, 512, 14, 14, &c, &cgc);
91
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, c->data.f32, cgc->data.f32, 128 * 512 * 14 * 14, 1e-3, "GPU and CPU computed result should match");
92
1
}
93
94
TEST_CASE("LSSC should give exact result from GPU for 128x64x113x114")
95
1
{
96
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
97
1
    ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
98
1
  ccv_nnc_tensor_t* c;
99
1
  ccv_nnc_tensor_t* cgc;
100
1
  prepare_nchw(128, 64, 113, 114, &c, &cgc);
101
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, c->data.f32, cgc->data.f32, 128 * 64 * 113 * 114, 1e-3, "GPU and CPU computed result should match");
102
1
}
103
104
TEST_CASE("LSSC should give exact result from GPU for 128x256x28x28")
105
1
{
106
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
107
1
    ccv_nnc_cmd_ok(CCV_NNC_COMPRESSION_LSSC_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
108
1
  ccv_nnc_tensor_t* c;
109
1
  ccv_nnc_tensor_t* cgc;
110
1
  prepare_nchw(128, 256, 28, 28, &c, &cgc);
111
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, c->data.f32, cgc->data.f32, 128 * 256 * 28 * 28, 1e-3, "GPU and CPU computed result should match");
112
1
}
113
114
#include "case_main.h"