Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/transform.tests.c
Line
Count
Source (jump to first uncovered line)
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("data conversion from float to half precision")
15
{
16
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_MPS));
17
  dsfmt_t dsfmt;
18
  dsfmt_init_gen_rand(&dsfmt, 0);
19
  int i;
20
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
21
  for (i = 0; i < 128; i++)
22
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
23
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 1, 128), 0);
24
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 1, 128), 0);
25
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
26
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
27
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 1, 128), 0);
28
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
29
  ccv_nnc_tensor_t* bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 1, 128), 0);
30
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
31
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(short, (short*)hb->data.f16, (short*)bt->data.f16, 128, 1, "Result should be exactly equal");
32
  ccv_nnc_tensor_free(a);
33
  ccv_nnc_tensor_free(b);
34
  ccv_nnc_tensor_free(ha);
35
  ccv_nnc_tensor_free(hb);
36
  ccv_nnc_tensor_free(bt);
37
}
38
39
TEST_CASE("data conversion from double to half precision")
40
1
{
41
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_GPU_REF));
42
1
  dsfmt_t dsfmt;
43
1
  dsfmt_init_gen_rand(&dsfmt, 0);
44
1
  int i;
45
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(64F, 1, 128), 0);
46
129
  for (i = 0; i < 128; 
i++128
)
47
128
    ha->data.f64[i] = (double)dsfmt_genrand_open_close(&dsfmt);
48
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 64F, 1, 128), 0);
49
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 1, 128), 0);
50
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
51
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
52
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 1, 128), 0);
53
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
54
1
  ccv_nnc_tensor_t* bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 1, 128), 0);
55
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
56
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(short, (short*)hb->data.f16, (short*)bt->data.f16, 128, 1, "Result should be exactly equal");
57
1
  ccv_nnc_tensor_free(a);
58
1
  ccv_nnc_tensor_free(b);
59
1
  ccv_nnc_tensor_free(ha);
60
1
  ccv_nnc_tensor_free(hb);
61
1
  ccv_nnc_tensor_free(bt);
62
1
}
63
64
TEST_CASE("data conversion from double to float")
65
1
{
66
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_GPU_REF));
67
1
  dsfmt_t dsfmt;
68
1
  dsfmt_init_gen_rand(&dsfmt, 0);
69
1
  int i;
70
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(64F, 1, 128), 0);
71
129
  for (i = 0; i < 128; 
i++128
)
72
128
    ha->data.f64[i] = (double)dsfmt_genrand_open_close(&dsfmt);
73
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 64F, 1, 128), 0);
74
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 1, 128), 0);
75
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
76
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
77
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
78
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
79
1
  ccv_nnc_tensor_t* bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
80
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
81
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, bt->data.f32, 128, 1e-5, "Result should be exactly equal");
82
1
  ccv_nnc_tensor_free(a);
83
1
  ccv_nnc_tensor_free(b);
84
1
  ccv_nnc_tensor_free(ha);
85
1
  ccv_nnc_tensor_free(hb);
86
1
  ccv_nnc_tensor_free(bt);
87
1
}
88
89
TEST_CASE("data conversion from float to double")
90
1
{
91
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_GPU_REF));
92
1
  dsfmt_t dsfmt;
93
1
  dsfmt_init_gen_rand(&dsfmt, 0);
94
1
  int i;
95
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
96
129
  for (i = 0; i < 128; 
i++128
)
97
128
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
98
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 1, 128), 0);
99
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 64F, 1, 128), 0);
100
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
101
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
102
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(64F, 1, 128), 0);
103
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
104
1
  ccv_nnc_tensor_t* bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(64F, 1, 128), 0);
105
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
106
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(double, hb->data.f64, bt->data.f64, 128, 1e-5, "Result should be exactly equal");
107
1
  ccv_nnc_tensor_free(a);
108
1
  ccv_nnc_tensor_free(b);
109
1
  ccv_nnc_tensor_free(ha);
110
1
  ccv_nnc_tensor_free(hb);
111
1
  ccv_nnc_tensor_free(bt);
112
1
}
113
114
TEST_CASE("data conversion from double to half precision and to float")
115
1
{
116
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_GPU_REF));
117
1
  dsfmt_t dsfmt;
118
1
  dsfmt_init_gen_rand(&dsfmt, 0);
119
1
  int i;
120
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(64F, 1, 128), 0);
121
129
  for (i = 0; i < 128; 
i++128
)
122
128
    ha->data.f64[i] = (double)dsfmt_genrand_open_close(&dsfmt);
123
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 64F, 1, 128), 0);
124
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 1, 128), 0);
125
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 1, 128), 0);
126
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
127
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
128
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(c), 0);
129
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 1, 128), 0);
130
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
131
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
132
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hb), TENSOR_LIST(hc), 0);
133
1
  ccv_nnc_tensor_t* ct = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
134
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(ct), 0);
135
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hc->data.f32, ct->data.f32, 128, 1, "Result should be exactly equal");
136
1
  ccv_nnc_tensor_free(a);
137
1
  ccv_nnc_tensor_free(b);
138
1
  ccv_nnc_tensor_free(c);
139
1
  ccv_nnc_tensor_free(ha);
140
1
  ccv_nnc_tensor_free(hb);
141
1
  ccv_nnc_tensor_free(hc);
142
1
  ccv_nnc_tensor_free(ct);
143
1
}
144
145
TEST_CASE("masked fill forward with integer")
146
1
{
147
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MASKED_FILL_FORWARD, CCV_NNC_BACKEND_GPU_REF));
148
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
149
1
  int i;
150
1
  dsfmt_t dsfmt;
151
1
  dsfmt_init_gen_rand(&dsfmt, 0);
152
121
  for (i = 0; i < 6 * 5 * 4; 
i++120
)
153
120
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
154
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 5, 4), 0);
155
21
  for (i = 0; i < 5 * 4; 
i++20
)
156
20
    hb->data.i32[i] = (i % 2 == 1) ? 
010
:
110
;
157
1
  ccv_nnc_tensor_t* const hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
158
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_FORWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
159
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
160
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 5, 4), 0);
161
1
  ccv_nnc_tensor_t* const c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
162
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
163
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_FORWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
164
1
  ccv_nnc_tensor_t* const hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
165
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(hd), 0);
166
1
  REQUIRE_TENSOR_EQ(hc, hd, "cpu and gpu result should be equal");
167
1
  ccv_nnc_tensor_free(a);
168
1
  ccv_nnc_tensor_free(b);
169
1
  ccv_nnc_tensor_free(c);
170
1
  ccv_nnc_tensor_free(ha);
171
1
  ccv_nnc_tensor_free(hb);
172
1
  ccv_nnc_tensor_free(hc);
173
1
  ccv_nnc_tensor_free(hd);
174
1
}
175
176
TEST_CASE("masked fill forward with float")
177
1
{
178
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MASKED_FILL_FORWARD, CCV_NNC_BACKEND_GPU_REF));
179
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
180
1
  int i;
181
1
  dsfmt_t dsfmt;
182
1
  dsfmt_init_gen_rand(&dsfmt, 0);
183
121
  for (i = 0; i < 6 * 5 * 4; 
i++120
)
184
120
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
185
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 4), 0);
186
21
  for (i = 0; i < 5 * 4; 
i++20
)
187
20
    hb->data.f32[i] = (i % 2 == 1) ? 
010
:
110
;
188
1
  ccv_nnc_tensor_t* const hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
189
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_FORWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
190
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
191
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 4), 0);
192
1
  ccv_nnc_tensor_t* const c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
193
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
194
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_FORWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
195
1
  ccv_nnc_tensor_t* const hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
196
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(hd), 0);
197
1
  REQUIRE_TENSOR_EQ(hc, hd, "cpu and gpu result should be equal");
198
1
  ccv_nnc_tensor_free(a);
199
1
  ccv_nnc_tensor_free(b);
200
1
  ccv_nnc_tensor_free(c);
201
1
  ccv_nnc_tensor_free(ha);
202
1
  ccv_nnc_tensor_free(hb);
203
1
  ccv_nnc_tensor_free(hc);
204
1
  ccv_nnc_tensor_free(hd);
205
1
}
206
207
TEST_CASE("masked fill backward with integer")
208
1
{
209
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MASKED_FILL_FORWARD, CCV_NNC_BACKEND_GPU_REF));
210
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
211
1
  int i;
212
1
  dsfmt_t dsfmt;
213
1
  dsfmt_init_gen_rand(&dsfmt, 0);
214
121
  for (i = 0; i < 6 * 5 * 4; 
i++120
)
215
120
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
216
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 5, 4), 0);
217
21
  for (i = 0; i < 5 * 4; 
i++20
)
218
20
    hb->data.i32[i] = (i % 2 == 1) ? 
010
:
110
;
219
1
  ccv_nnc_tensor_t* const hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
220
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_BACKWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, 0, hb), TENSOR_LIST(hc), 0);
221
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
222
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 5, 4), 0);
223
1
  ccv_nnc_tensor_t* const c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
224
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
225
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_BACKWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(a, 0, b), TENSOR_LIST(c), 0);
226
1
  ccv_nnc_tensor_t* const hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
227
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(hd), 0);
228
1
  REQUIRE_TENSOR_EQ(hc, hd, "cpu and gpu result should be equal");
229
1
  ccv_nnc_tensor_free(a);
230
1
  ccv_nnc_tensor_free(b);
231
1
  ccv_nnc_tensor_free(c);
232
1
  ccv_nnc_tensor_free(ha);
233
1
  ccv_nnc_tensor_free(hb);
234
1
  ccv_nnc_tensor_free(hc);
235
1
  ccv_nnc_tensor_free(hd);
236
1
}
237
238
TEST_CASE("masked fill backward with float")
239
1
{
240
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MASKED_FILL_FORWARD, CCV_NNC_BACKEND_GPU_REF));
241
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
242
1
  int i;
243
1
  dsfmt_t dsfmt;
244
1
  dsfmt_init_gen_rand(&dsfmt, 0);
245
121
  for (i = 0; i < 6 * 5 * 4; 
i++120
)
246
120
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
247
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 4), 0);
248
21
  for (i = 0; i < 5 * 4; 
i++20
)
249
20
    hb->data.f32[i] = (i % 2 == 1) ? 
010
:
110
;
250
1
  ccv_nnc_tensor_t* const hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
251
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_BACKWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, 0, hb), TENSOR_LIST(hc), 0);
252
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
253
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 4), 0);
254
1
  ccv_nnc_tensor_t* const c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
255
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
256
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_BACKWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(a, 0, b), TENSOR_LIST(c), 0);
257
1
  ccv_nnc_tensor_t* const hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
258
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(hd), 0);
259
1
  REQUIRE_TENSOR_EQ(hc, hd, "cpu and gpu result should be equal");
260
1
  ccv_nnc_tensor_free(a);
261
1
  ccv_nnc_tensor_free(b);
262
1
  ccv_nnc_tensor_free(c);
263
1
  ccv_nnc_tensor_free(ha);
264
1
  ccv_nnc_tensor_free(hb);
265
1
  ccv_nnc_tensor_free(hc);
266
1
  ccv_nnc_tensor_free(hd);
267
1
}
268
269
TEST_CASE("format transform from for mps")
270
1
{
271
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_FORMAT_TRANSFORM_FORWARD, CCV_NNC_BACKEND_MPS));
272
0
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 3, 4, 5), 0);
273
0
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 3, 4), 0);
274
0
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 3, 4, 5), 0);
275
0
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 2, 3, 4), 0);
276
0
  int i, j, k;
277
0
  for (i = 0; i < 3 * 4 * 5; i++)
278
0
    ha->data.f32[i] = i;
279
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
280
0
  ccv_nnc_tensor_view_t* const av = ccv_nnc_tensor_view_new(a, GPU_TENSOR_NCHW(000, 32F, 2, 3, 4), DIM_ALLOC(1, 0, 1), DIM_ALLOC(4 * 5, 5, 1));
281
0
  ccv_nnc_cmd_exec(CMD_FORMAT_TRANSFORM_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST((ccv_nnc_tensor_t*)av), TENSOR_LIST(b), 0);
282
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
283
0
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 3, 4), 0);
284
0
  for (i = 0; i < 3; i++)
285
0
    for (j = 0; j < 4; j++)
286
0
      for (k = 0; k < 5; k++)
287
0
        if (i > 0 && k > 0 && j < 3)
288
0
          bt->data.f32[(i - 1) * 3 * 4 + j * 4 + k - 1] = i * 4 * 5 + j * 5 + k;
289
0
  REQUIRE_TENSOR_EQ(hb, bt, "cpu and gpu result should be the same");
290
0
  ccv_nnc_tensor_free(ha);
291
0
  ccv_nnc_tensor_free(hb);
292
0
  ccv_nnc_tensor_view_free(av);
293
0
  ccv_nnc_tensor_free(a);
294
0
  ccv_nnc_tensor_free(b);
295
0
  ccv_nnc_tensor_free(bt);
296
0
}
297
298
TEST_CASE("format transform to fill for mps")
299
1
{
300
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_FORMAT_TRANSFORM_FORWARD, CCV_NNC_BACKEND_MPS));
301
0
  ccv_nnc_tensor_t* const ha0 = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1, 3, 2), 0);
302
0
  ccv_nnc_tensor_t* const ha1 = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1, 3, 2), 0);
303
0
  ccv_nnc_tensor_t* const hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1, 3, 4), 0);
304
0
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 3, 4), 0);
305
0
  ccv_nnc_tensor_t* const a0 = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 1, 3, 2), 0);
306
0
  ccv_nnc_tensor_t* const a1 = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 1, 3, 2), 0);
307
0
  ccv_nnc_tensor_t* const c = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 1, 3, 4), 0);
308
0
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 2, 3, 4), 0);
309
0
  int i;
310
0
  for (i = 0; i < 3 * 2; i++)
311
0
    ha0->data.f32[i] = i;
312
0
  for (i = 0; i < 3 * 2; i++)
313
0
    ha1->data.f32[i] = i - 5;
314
0
  for (i = 0; i < 3 * 4; i++)
315
0
    hc->data.f32[i] = i + 2;
316
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha0, ha1, hc), TENSOR_LIST(a0, a1, c), 0);
317
0
  ccv_nnc_tensor_view_t* const bv0 = ccv_nnc_tensor_view_new(b, GPU_TENSOR_NCHW(000, 32F, 1, 3, 4), DIM_ALLOC(0, 0, 0), DIM_ALLOC(3 * 4, 4, 1));
318
0
  ccv_nnc_tensor_view_t* const bv10 = ccv_nnc_tensor_view_new(b, GPU_TENSOR_NCHW(000, 32F, 1, 3, 2), DIM_ALLOC(1, 0, 0), DIM_ALLOC(3 * 4, 4, 1));
319
0
  ccv_nnc_tensor_view_t* const bv11 = ccv_nnc_tensor_view_new(b, GPU_TENSOR_NCHW(000, 32F, 1, 3, 2), DIM_ALLOC(1, 0, 2), DIM_ALLOC(3 * 4, 4, 1));
320
0
  ccv_nnc_cmd_exec(CMD_FORMAT_TRANSFORM_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST((ccv_nnc_tensor_t*)bv0), 0);
321
0
  ccv_nnc_cmd_exec(CMD_FORMAT_TRANSFORM_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a0), TENSOR_LIST((ccv_nnc_tensor_t*)bv10), 0);
322
0
  ccv_nnc_cmd_exec(CMD_FORMAT_TRANSFORM_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a1), TENSOR_LIST((ccv_nnc_tensor_t*)bv11), 0);
323
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
324
0
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 3, 4), 0);
325
0
  for (i = 0; i < 3 * 4; i++)
326
0
    bt->data.f32[i] = i + 2;
327
0
  for (i = 0; i < 3 * 2; i++)
328
0
    bt->data.f32[(i / 2) * 4 + (i % 2) + 3 * 4] = i;
329
0
  for (i = 0; i < 3 * 2; i++)
330
0
    bt->data.f32[(i / 2) * 4 + (i % 2) + 2 + 3 * 4] = i - 5;
331
0
  REQUIRE_TENSOR_EQ(hb, bt, "cpu and gpu result should be the same");
332
0
  ccv_nnc_tensor_free(ha0);
333
0
  ccv_nnc_tensor_free(ha1);
334
0
  ccv_nnc_tensor_free(hc);
335
0
  ccv_nnc_tensor_free(hb);
336
0
  ccv_nnc_tensor_view_free(bv0);
337
0
  ccv_nnc_tensor_view_free(bv10);
338
0
  ccv_nnc_tensor_view_free(bv11);
339
0
  ccv_nnc_tensor_free(a0);
340
0
  ccv_nnc_tensor_free(a1);
341
0
  ccv_nnc_tensor_free(c);
342
0
  ccv_nnc_tensor_free(b);
343
0
  ccv_nnc_tensor_free(bt);
344
0
}
345
346
TEST_CASE("format transform to new strides for mps")
347
1
{
348
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_FORMAT_TRANSFORM_FORWARD, CCV_NNC_BACKEND_MPS));
349
0
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1, 10, 9, 3), 0);
350
0
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1, 3, 10, 9), 0);
351
0
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1, 3, 10, 9), 0);
352
0
  ccv_nnc_tensor_view_t* const btv = ccv_nnc_tensor_view_new(bt, CPU_TENSOR_NCHW(32F, 1, 10, 9, 3), DIM_ALLOC(), DIM_ALLOC(3 * 10 * 9, 9, 1, 10 * 9));
353
0
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 1, 10, 9, 3), 0);
354
0
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NCHW(000, 32F, 1, 3, 10, 9), 0);
355
0
  ccv_nnc_tensor_view_t* const bv = ccv_nnc_tensor_view_new(b, GPU_TENSOR_NCHW(000, 32F, 1, 10, 9, 3), DIM_ALLOC(), DIM_ALLOC(3 * 10 * 9, 9, 1, 10 * 9));
356
0
  int i;
357
0
  for (i = 0; i < 10 * 9 * 3; i++)
358
0
    ha->data.f32[i] = i;
359
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
360
0
  ccv_nnc_cmd_exec(CMD_FORMAT_TRANSFORM_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST((ccv_nnc_tensor_t*)bv), 0);
361
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(hb), 0);
362
0
  ccv_nnc_cmd_exec(CMD_FORMAT_TRANSFORM_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST((ccv_nnc_tensor_t*)btv), 0);
363
0
  REQUIRE_TENSOR_EQ(hb, bt, "cpu and gpu result should be the same");
364
0
  ccv_nnc_tensor_free(ha);
365
0
  ccv_nnc_tensor_free(hb);
366
0
  ccv_nnc_tensor_free(bt);
367
0
  ccv_nnc_tensor_view_free(btv);
368
0
  ccv_nnc_tensor_view_free(bv);
369
0
  ccv_nnc_tensor_free(a);
370
0
  ccv_nnc_tensor_free(b);
371
0
}
372
373
#include "case_main.h"