Coverage Report

Created: 2025-04-03 22:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/upsample.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
#ifdef HAVE_LIBPNG
15
TEST_CASE("upsample bilinear NHWC in float")
16
1
{
17
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_FORWARD, CCV_NNC_BACKEND_MPS));
18
1
  ccv_dense_matrix_t* image = 0;
19
1
  ccv_read("../../../samples/chessbox.png", &image, CCV_IO_ANY_FILE | CCV_IO_RGB_COLOR);
20
1
  ccv_dense_matrix_t* fimage = 0;
21
1
  ccv_shift(image, (ccv_matrix_t**)&fimage, CCV_32F, 0, 0);
22
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, image->rows, image->cols, 3), 0);
23
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)fimage), TENSOR_LIST(a), 0);
24
1
  ccv_matrix_free(fimage);
25
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, image->rows * 2, image->cols * 2, 3), 0);
26
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_FORWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
27
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, image->rows * 2, image->cols * 2, 3), 0);
28
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
29
1
  REQUIRE_MATRIX_FILE_EQ((ccv_matrix_t*)hb, "../../unit/nnc/data/upsample.forward.bin", "the forward of upsample should be equal");
30
1
  ccv_nnc_tensor_free(a);
31
1
  ccv_nnc_tensor_free(b);
32
1
  ccv_nnc_tensor_free(hb);
33
1
  ccv_matrix_free(image);
34
1
}
35
36
TEST_CASE("upsample bilinear NCHW in float")
37
1
{
38
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_FORWARD, CCV_NNC_BACKEND_MPS));
39
1
  ccv_dense_matrix_t* image = 0;
40
1
  ccv_read("../../../samples/chessbox.png", &image, CCV_IO_ANY_FILE | CCV_IO_RGB_COLOR);
41
1
  ccv_dense_matrix_t* fimage = 0;
42
1
  ccv_shift(image, (ccv_matrix_t**)&fimage, CCV_32F, 0, 0);
43
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, image->rows, image->cols, 3), 0);
44
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)fimage), TENSOR_LIST(a), 0);
45
1
  ccv_matrix_free(fimage);
46
1
  ccv_nnc_tensor_t* const at = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 3, image->rows, image->cols), 0);
47
1
  ccv_nnc_cmd_exec(CMD_FORMAT_TRANSFORM_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(at), 0);
48
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 3, image->rows * 2, image->cols * 2), 0);
49
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_FORWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(at), TENSOR_LIST(bt), 0);
50
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, image->rows * 2, image->cols * 2, 3), 0);
51
1
  ccv_nnc_cmd_exec(CMD_FORMAT_TRANSFORM_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(bt), TENSOR_LIST(b), 0);
52
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, image->rows * 2, image->cols * 2, 3), 0);
53
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
54
1
  REQUIRE_MATRIX_FILE_EQ((ccv_matrix_t*)hb, "../../unit/nnc/data/upsample.forward.bin", "the forward of upsample should be equal");
55
1
  ccv_nnc_tensor_free(a);
56
1
  ccv_nnc_tensor_free(at);
57
1
  ccv_nnc_tensor_free(bt);
58
1
  ccv_nnc_tensor_free(b);
59
1
  ccv_nnc_tensor_free(hb);
60
1
  ccv_matrix_free(image);
61
1
}
62
63
TEST_CASE("downsample bilinear NHWC in float")
64
1
{
65
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
66
1
  ccv_dense_matrix_t* image = 0;
67
1
  ccv_read("../../../samples/chessbox.png", &image, CCV_IO_ANY_FILE | CCV_IO_RGB_COLOR);
68
1
  ccv_dense_matrix_t* fimage = 0;
69
1
  ccv_shift(image, (ccv_matrix_t**)&fimage, CCV_32F, 0, 0);
70
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, image->rows, image->cols, 3), 0);
71
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)fimage), TENSOR_LIST(a), 0);
72
1
  ccv_matrix_free(fimage);
73
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, image->rows / 2, image->cols / 2, 3), 0);
74
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
75
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, image->rows / 2, image->cols / 2, 3), 0);
76
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
77
1
  REQUIRE_MATRIX_FILE_EQ((ccv_matrix_t*)hb, "../../unit/nnc/data/upsample.backward.bin", "the backward of upsample should be equal");
78
1
  ccv_nnc_tensor_free(a);
79
1
  ccv_nnc_tensor_free(b);
80
1
  ccv_nnc_tensor_free(hb);
81
1
  ccv_matrix_free(image);
82
1
}
83
84
TEST_CASE("downsample bilinear NCHW in float")
85
1
{
86
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
87
1
  ccv_dense_matrix_t* image = 0;
88
1
  ccv_read("../../../samples/chessbox.png", &image, CCV_IO_ANY_FILE | CCV_IO_RGB_COLOR);
89
1
  ccv_dense_matrix_t* fimage = 0;
90
1
  ccv_shift(image, (ccv_matrix_t**)&fimage, CCV_32F, 0, 0);
91
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, image->rows, image->cols, 3), 0);
92
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)fimage), TENSOR_LIST(a), 0);
93
1
  ccv_matrix_free(fimage);
94
1
  ccv_nnc_tensor_t* const at = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 3, image->rows, image->cols), 0);
95
1
  ccv_nnc_cmd_exec(CMD_FORMAT_TRANSFORM_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(at), 0);
96
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 3, image->rows / 2, image->cols / 2), 0);
97
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(at), TENSOR_LIST(bt), 0);
98
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, image->rows / 2, image->cols / 2, 3), 0);
99
1
  ccv_nnc_cmd_exec(CMD_FORMAT_TRANSFORM_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(bt), TENSOR_LIST(b), 0);
100
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, image->rows / 2, image->cols / 2, 3), 0);
101
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
102
1
  REQUIRE_MATRIX_FILE_EQ((ccv_matrix_t*)hb, "../../unit/nnc/data/upsample.backward.bin", "the backward of upsample should be equal");
103
1
  ccv_nnc_tensor_free(a);
104
1
  ccv_nnc_tensor_free(at);
105
1
  ccv_nnc_tensor_free(bt);
106
1
  ccv_nnc_tensor_free(b);
107
1
  ccv_nnc_tensor_free(hb);
108
1
  ccv_matrix_free(image);
109
1
}
110
#endif
111
112
TEST_CASE("downsample bilinear NHWC in half precision")
113
1
{
114
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
115
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 14, 14, 5), 0);
116
1
  ccv_nnc_tensor_t* a16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 14, 14, 5), 0);
117
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 7, 7, 5), 0);
118
1
  ccv_nnc_tensor_t* b16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 7, 7, 5), 0);
119
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 14, 14, 5), 0);
120
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
121
1
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
122
1
  dsfmt_t dsfmt;
123
1
  dsfmt_init_gen_rand(&dsfmt, 0);
124
1
  int i;
125
981
  for (i = 0; i < 14 * 14 * 5; 
i++980
)
126
980
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
127
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
128
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0);
129
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
130
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b16), 0);
131
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16), TENSOR_LIST(b), 0);
132
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
133
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 7 * 7 * 5, 1e-2, "CPU and GPU results should match.");
134
1
  ccv_nnc_tensor_free(a);
135
1
  ccv_nnc_tensor_free(b);
136
1
  ccv_nnc_tensor_free(ha);
137
1
  ccv_nnc_tensor_free(hb);
138
1
  ccv_nnc_tensor_free(a16);
139
1
  ccv_nnc_tensor_free(b16);
140
1
  ccv_nnc_tensor_free(hbt);
141
1
}
142
143
TEST_CASE("downsample bilinear NCHW in half precision")
144
1
{
145
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
146
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 14, 6), 0);
147
1
  ccv_nnc_tensor_t* a16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 16F, 15, 14, 6), 0);
148
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 7, 3), 0);
149
1
  ccv_nnc_tensor_t* b16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 16F, 15, 7, 3), 0);
150
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 14, 6), 0);
151
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
152
1
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
153
1
  dsfmt_t dsfmt;
154
1
  dsfmt_init_gen_rand(&dsfmt, 0);
155
1
  int i;
156
1.26k
  for (i = 0; i < 15 * 14 * 6; 
i++1.26k
)
157
1.26k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
158
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
159
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0);
160
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
161
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b16), 0);
162
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16), TENSOR_LIST(b), 0);
163
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
164
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 15 * 7 * 3, 1e-2, "CPU and GPU results should match.");
165
1
  ccv_nnc_tensor_free(a);
166
1
  ccv_nnc_tensor_free(b);
167
1
  ccv_nnc_tensor_free(ha);
168
1
  ccv_nnc_tensor_free(hb);
169
1
  ccv_nnc_tensor_free(a16);
170
1
  ccv_nnc_tensor_free(b16);
171
1
  ccv_nnc_tensor_free(hbt);
172
1
}
173
174
TEST_CASE("upsample nearest NHWC in float")
175
1
{
176
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_FORWARD, CCV_NNC_BACKEND_MPS));
177
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 15, 15, 5), 0);
178
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 30, 30, 5), 0);
179
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 15, 15, 5), 0);
180
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 30, 30, 5), 0);
181
1
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 30, 30, 5), 0);
182
1
  dsfmt_t dsfmt;
183
1
  dsfmt_init_gen_rand(&dsfmt, 0);
184
1
  int i;
185
1.12k
  for (i = 0; i < 15 * 15 * 5; 
i++1.12k
)
186
1.12k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
187
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
188
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_FORWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
189
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_FORWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
190
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
191
1
  REQUIRE_TENSOR_EQ(hb, hbt, "CPU and GPU results should match.");
192
1
  ccv_nnc_tensor_free(a);
193
1
  ccv_nnc_tensor_free(b);
194
1
  ccv_nnc_tensor_free(ha);
195
1
  ccv_nnc_tensor_free(hb);
196
1
  ccv_nnc_tensor_free(hbt);
197
1
}
198
199
TEST_CASE("upsample nearest NCHW in float")
200
1
{
201
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_FORWARD, CCV_NNC_BACKEND_MPS));
202
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 15, 5), 0);
203
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 30, 10), 0);
204
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 15, 5), 0);
205
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 30, 10), 0);
206
1
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 30, 10), 0);
207
1
  dsfmt_t dsfmt;
208
1
  dsfmt_init_gen_rand(&dsfmt, 0);
209
1
  int i;
210
1.12k
  for (i = 0; i < 15 * 15 * 5; 
i++1.12k
)
211
1.12k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
212
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
213
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_FORWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
214
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_FORWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
215
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
216
1
  REQUIRE_TENSOR_EQ(hb, hbt, "CPU and GPU results should match.");
217
1
  ccv_nnc_tensor_free(a);
218
1
  ccv_nnc_tensor_free(b);
219
1
  ccv_nnc_tensor_free(ha);
220
1
  ccv_nnc_tensor_free(hb);
221
1
  ccv_nnc_tensor_free(hbt);
222
1
}
223
224
TEST_CASE("downsample nearest NHWC in float")
225
1
{
226
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
227
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 14, 14, 5), 0);
228
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 7, 7, 5), 0);
229
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 14, 14, 5), 0);
230
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
231
1
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
232
1
  dsfmt_t dsfmt;
233
1
  dsfmt_init_gen_rand(&dsfmt, 0);
234
1
  int i;
235
981
  for (i = 0; i < 14 * 14 * 5; 
i++980
)
236
980
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
237
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
238
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
239
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
240
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
241
1
  REQUIRE_TENSOR_EQ(hb, hbt, "CPU and GPU results should match.");
242
1
  ccv_nnc_tensor_free(a);
243
1
  ccv_nnc_tensor_free(b);
244
1
  ccv_nnc_tensor_free(ha);
245
1
  ccv_nnc_tensor_free(hb);
246
1
  ccv_nnc_tensor_free(hbt);
247
1
}
248
249
TEST_CASE("downsample nearest NCHW in float")
250
1
{
251
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
252
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 14, 6), 0);
253
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 7, 3), 0);
254
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 14, 6), 0);
255
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
256
1
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
257
1
  dsfmt_t dsfmt;
258
1
  dsfmt_init_gen_rand(&dsfmt, 0);
259
1
  int i;
260
1.26k
  for (i = 0; i < 15 * 14 * 6; 
i++1.26k
)
261
1.26k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
262
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
263
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
264
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
265
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
266
1
  REQUIRE_TENSOR_EQ(hb, hbt, "CPU and GPU results should match.");
267
1
  ccv_nnc_tensor_free(a);
268
1
  ccv_nnc_tensor_free(b);
269
1
  ccv_nnc_tensor_free(ha);
270
1
  ccv_nnc_tensor_free(hb);
271
1
  ccv_nnc_tensor_free(hbt);
272
1
}
273
274
TEST_CASE("downsample nearest NCHW in float, align corners")
275
1
{
276
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
277
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 14, 6), 0);
278
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 7, 3), 0);
279
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 14, 6), 0);
280
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
281
1
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
282
1
  dsfmt_t dsfmt;
283
1
  dsfmt_init_gen_rand(&dsfmt, 0);
284
1
  int i;
285
1.26k
  for (i = 0; i < 15 * 14 * 6; 
i++1.26k
)
286
1.26k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
287
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
288
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
289
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
290
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
291
1
  REQUIRE_TENSOR_EQ(hb, hbt, "CPU and GPU results should match.");
292
1
  ccv_nnc_tensor_free(a);
293
1
  ccv_nnc_tensor_free(b);
294
1
  ccv_nnc_tensor_free(ha);
295
1
  ccv_nnc_tensor_free(hb);
296
1
  ccv_nnc_tensor_free(hbt);
297
1
}
298
299
TEST_CASE("downsample nearest NHWC in half precision")
300
1
{
301
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
302
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 14, 14, 5), 0);
303
1
  ccv_nnc_tensor_t* a16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 14, 14, 5), 0);
304
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 7, 7, 5), 0);
305
1
  ccv_nnc_tensor_t* b16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 7, 7, 5), 0);
306
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 14, 14, 5), 0);
307
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
308
1
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
309
1
  dsfmt_t dsfmt;
310
1
  dsfmt_init_gen_rand(&dsfmt, 0);
311
1
  int i;
312
981
  for (i = 0; i < 14 * 14 * 5; 
i++980
)
313
980
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
314
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
315
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0);
316
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
317
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b16), 0);
318
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16), TENSOR_LIST(b), 0);
319
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
320
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 7 * 7 * 5, 1e-2, "CPU and GPU results should match.");
321
1
  ccv_nnc_tensor_free(a);
322
1
  ccv_nnc_tensor_free(b);
323
1
  ccv_nnc_tensor_free(ha);
324
1
  ccv_nnc_tensor_free(hb);
325
1
  ccv_nnc_tensor_free(a16);
326
1
  ccv_nnc_tensor_free(b16);
327
1
  ccv_nnc_tensor_free(hbt);
328
1
}
329
330
TEST_CASE("downsample nearest NCHW in half precision")
331
1
{
332
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
333
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 14, 6), 0);
334
1
  ccv_nnc_tensor_t* a16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 16F, 15, 14, 6), 0);
335
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 7, 3), 0);
336
1
  ccv_nnc_tensor_t* b16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 16F, 15, 7, 3), 0);
337
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 14, 6), 0);
338
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
339
1
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
340
1
  dsfmt_t dsfmt;
341
1
  dsfmt_init_gen_rand(&dsfmt, 0);
342
1
  int i;
343
1.26k
  for (i = 0; i < 15 * 14 * 6; 
i++1.26k
)
344
1.26k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
345
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
346
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0);
347
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
348
1
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b16), 0);
349
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16), TENSOR_LIST(b), 0);
350
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
351
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 15 * 7 * 3, 1e-2, "CPU and GPU results should match.");
352
1
  ccv_nnc_tensor_free(a);
353
1
  ccv_nnc_tensor_free(b);
354
1
  ccv_nnc_tensor_free(ha);
355
1
  ccv_nnc_tensor_free(hb);
356
1
  ccv_nnc_tensor_free(a16);
357
1
  ccv_nnc_tensor_free(b16);
358
1
  ccv_nnc_tensor_free(hbt);
359
1
}
360
361
362
TEST_CASE("mps downsample bilinear NCHW")
363
1
{
364
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_MPS));
365
0
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 14, 6), 0);
366
0
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 7, 3), 0);
367
0
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 14, 6), 0);
368
0
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
369
0
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
370
0
  dsfmt_t dsfmt;
371
0
  dsfmt_init_gen_rand(&dsfmt, 0);
372
0
  int i;
373
0
  for (i = 0; i < 15 * 14 * 6; i++)
374
0
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
375
376
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
377
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
378
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
379
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
380
381
0
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 2 * 2 * 3, 1e-2, "CPU and GPU results should match.");
382
0
  ccv_nnc_tensor_free(a);
383
0
  ccv_nnc_tensor_free(b);
384
0
  ccv_nnc_tensor_free(ha);
385
0
  ccv_nnc_tensor_free(hb);
386
0
  ccv_nnc_tensor_free(hbt);
387
0
}
388
389
TEST_CASE("mps downsample bilinear NCHW in half precision")
390
1
{
391
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_MPS));
392
0
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 14, 6), 0);
393
0
  ccv_nnc_tensor_t* a16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 16F, 15, 14, 6), 0);
394
0
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 7, 3), 0);
395
0
  ccv_nnc_tensor_t* b16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 16F, 15, 7, 3), 0);
396
0
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 14, 6), 0);
397
0
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
398
0
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
399
0
  dsfmt_t dsfmt;
400
0
  dsfmt_init_gen_rand(&dsfmt, 0);
401
0
  int i;
402
0
  for (i = 0; i < 15 * 14 * 6; i++)
403
0
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
404
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
405
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0);
406
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
407
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b16), 0);
408
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16), TENSOR_LIST(b), 0);
409
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
410
0
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 15 * 7 * 3, 1e-2, "CPU and GPU results should match.");
411
0
  ccv_nnc_tensor_free(a);
412
0
  ccv_nnc_tensor_free(b);
413
0
  ccv_nnc_tensor_free(ha);
414
0
  ccv_nnc_tensor_free(hb);
415
0
  ccv_nnc_tensor_free(a16);
416
0
  ccv_nnc_tensor_free(b16);
417
0
  ccv_nnc_tensor_free(hbt);
418
0
}
419
420
TEST_CASE("mps downsample bilinear NHWC")
421
1
{
422
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_MPS));
423
0
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 14, 14, 5), 0);
424
0
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 7, 7, 5), 0);
425
0
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 14, 14, 5), 0);
426
0
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
427
0
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
428
0
  dsfmt_t dsfmt;
429
0
  dsfmt_init_gen_rand(&dsfmt, 0);
430
0
  int i;
431
0
  for (i = 0; i < 14 * 14 * 5; i++)
432
0
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
433
434
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
435
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
436
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
437
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
438
439
0
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 1 * 2 * 2 * 3, 1e-2, "CPU and GPU results should match.");
440
0
  ccv_nnc_tensor_free(a);
441
0
  ccv_nnc_tensor_free(b);
442
0
  ccv_nnc_tensor_free(ha);
443
0
  ccv_nnc_tensor_free(hb);
444
0
  ccv_nnc_tensor_free(hbt);
445
0
}
446
447
TEST_CASE("mps upsample bilinear NHWC, align corners")
448
1
{
449
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_FORWARD, CCV_NNC_BACKEND_MPS));
450
0
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 14, 14, 5), 0);
451
0
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 28, 28, 5), 0);
452
0
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 14, 14, 5), 0);
453
0
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 28, 28, 5), 0);
454
0
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 28, 28, 5), 0);
455
0
  dsfmt_t dsfmt;
456
0
  dsfmt_init_gen_rand(&dsfmt, 0);
457
0
  int i;
458
0
  for (i = 0; i < 14 * 14 * 5; i++)
459
0
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
460
461
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
462
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_FORWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
463
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_FORWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
464
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
465
466
0
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 1 * 2 * 2 * 3, 1e-2, "CPU and GPU results should match.");
467
0
  ccv_nnc_tensor_free(a);
468
0
  ccv_nnc_tensor_free(b);
469
0
  ccv_nnc_tensor_free(ha);
470
0
  ccv_nnc_tensor_free(hb);
471
0
  ccv_nnc_tensor_free(hbt);
472
0
}
473
474
TEST_CASE("mps downsample bilinear NHWC, align corners")
475
1
{
476
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_MPS));
477
0
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 14, 14, 5), 0);
478
0
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 7, 7, 5), 0);
479
0
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 14, 14, 5), 0);
480
0
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
481
0
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
482
0
  dsfmt_t dsfmt;
483
0
  dsfmt_init_gen_rand(&dsfmt, 0);
484
0
  int i;
485
0
  for (i = 0; i < 14 * 14 * 5; i++)
486
0
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
487
488
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
489
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
490
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
491
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
492
493
0
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 1 * 2 * 2 * 3, 1e-2, "CPU and GPU results should match.");
494
0
  ccv_nnc_tensor_free(a);
495
0
  ccv_nnc_tensor_free(b);
496
0
  ccv_nnc_tensor_free(ha);
497
0
  ccv_nnc_tensor_free(hb);
498
0
  ccv_nnc_tensor_free(hbt);
499
0
}
500
501
TEST_CASE("mps downsample bilinear NHWC half precision")
502
1
{
503
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_MPS));
504
0
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 14, 14, 5), 0);
505
0
  ccv_nnc_tensor_t* a16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 14, 14, 5), 0);
506
0
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 7, 7, 5), 0);
507
0
  ccv_nnc_tensor_t* b16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 7, 7, 5), 0);
508
0
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 14, 14, 5), 0);
509
0
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
510
0
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
511
0
  dsfmt_t dsfmt;
512
0
  dsfmt_init_gen_rand(&dsfmt, 0);
513
0
  int i;
514
0
  for (i = 0; i < 14 * 14 * 5; i++)
515
0
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
516
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
517
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0);
518
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
519
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_BILINEAR, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b16), 0);
520
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16), TENSOR_LIST(b), 0);
521
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
522
0
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 7 * 7 * 5, 1e-2, "CPU and GPU results should match.");
523
0
  ccv_nnc_tensor_free(a);
524
0
  ccv_nnc_tensor_free(b);
525
0
  ccv_nnc_tensor_free(ha);
526
0
  ccv_nnc_tensor_free(hb);
527
0
  ccv_nnc_tensor_free(a16);
528
0
  ccv_nnc_tensor_free(b16);
529
0
  ccv_nnc_tensor_free(hbt);
530
0
}
531
532
TEST_CASE("mps downsample nearest NHWC in float")
533
1
{
534
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_MPS));
535
0
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 14, 14, 5), 0);
536
0
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 7, 7, 5), 0);
537
0
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 14, 14, 5), 0);
538
0
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
539
0
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
540
0
  dsfmt_t dsfmt;
541
0
  dsfmt_init_gen_rand(&dsfmt, 0);
542
0
  int i;
543
0
  for (i = 0; i < 14 * 14 * 5; i++)
544
0
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
545
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
546
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
547
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
548
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
549
0
  REQUIRE_TENSOR_EQ(hb, hbt, "CPU and GPU results should match.");
550
0
  ccv_nnc_tensor_free(a);
551
0
  ccv_nnc_tensor_free(b);
552
0
  ccv_nnc_tensor_free(ha);
553
0
  ccv_nnc_tensor_free(hb);
554
0
  ccv_nnc_tensor_free(hbt);
555
0
}
556
557
TEST_CASE("mps downsample nearest NCHW in float")
558
1
{
559
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_MPS));
560
0
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 14, 6), 0);
561
0
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 7, 3), 0);
562
0
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 14, 6), 0);
563
0
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
564
0
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
565
0
  dsfmt_t dsfmt;
566
0
  dsfmt_init_gen_rand(&dsfmt, 0);
567
0
  int i;
568
0
  for (i = 0; i < 15 * 14 * 6; i++)
569
0
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
570
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
571
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
572
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
573
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
574
0
  REQUIRE_TENSOR_EQ(hb, hbt, "CPU and GPU results should match.");
575
0
  ccv_nnc_tensor_free(a);
576
0
  ccv_nnc_tensor_free(b);
577
0
  ccv_nnc_tensor_free(ha);
578
0
  ccv_nnc_tensor_free(hb);
579
0
  ccv_nnc_tensor_free(hbt);
580
0
}
581
582
TEST_CASE("mps downsample nearest NHWC in half precision")
583
1
{
584
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_MPS));
585
0
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 14, 14, 5), 0);
586
0
  ccv_nnc_tensor_t* a16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 14, 14, 5), 0);
587
0
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 7, 7, 5), 0);
588
0
  ccv_nnc_tensor_t* b16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 7, 7, 5), 0);
589
0
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 14, 14, 5), 0);
590
0
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
591
0
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 7, 5), 0);
592
0
  dsfmt_t dsfmt;
593
0
  dsfmt_init_gen_rand(&dsfmt, 0);
594
0
  int i;
595
0
  for (i = 0; i < 14 * 14 * 5; i++)
596
0
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
597
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
598
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0);
599
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
600
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b16), 0);
601
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16), TENSOR_LIST(b), 0);
602
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
603
0
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 7 * 7 * 5, 1e-2, "CPU and GPU results should match.");
604
0
  ccv_nnc_tensor_free(a);
605
0
  ccv_nnc_tensor_free(b);
606
0
  ccv_nnc_tensor_free(ha);
607
0
  ccv_nnc_tensor_free(hb);
608
0
  ccv_nnc_tensor_free(a16);
609
0
  ccv_nnc_tensor_free(b16);
610
0
  ccv_nnc_tensor_free(hbt);
611
0
}
612
613
TEST_CASE("mps downsample nearest NCHW in half precision")
614
1
{
615
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_UPSAMPLE_BACKWARD, CCV_NNC_BACKEND_MPS));
616
0
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 14, 6), 0);
617
0
  ccv_nnc_tensor_t* a16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 16F, 15, 14, 6), 0);
618
0
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 15, 7, 3), 0);
619
0
  ccv_nnc_tensor_t* b16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 16F, 15, 7, 3), 0);
620
0
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 14, 6), 0);
621
0
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
622
0
  ccv_nnc_tensor_t* hbt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 15, 7, 3), 0);
623
0
  dsfmt_t dsfmt;
624
0
  dsfmt_init_gen_rand(&dsfmt, 0);
625
0
  int i;
626
0
  for (i = 0; i < 15 * 14 * 6; i++)
627
0
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
628
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
629
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0);
630
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hbt), 0);
631
0
  ccv_nnc_cmd_exec(CMD_UPSAMPLE_BACKWARD(CCV_NNC_UPSAMPLE_NEAREST, 2, 2, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b16), 0);
632
0
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b16), TENSOR_LIST(b), 0);
633
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
634
0
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, hbt->data.f32, 15 * 7 * 3, 1e-2, "CPU and GPU results should match.");
635
0
  ccv_nnc_tensor_free(a);
636
0
  ccv_nnc_tensor_free(b);
637
0
  ccv_nnc_tensor_free(ha);
638
0
  ccv_nnc_tensor_free(hb);
639
0
  ccv_nnc_tensor_free(a16);
640
0
  ccv_nnc_tensor_free(b16);
641
0
  ccv_nnc_tensor_free(hbt);
642
0
}
643
644
#include "case_main.h"