Coverage Report

Created: 2021-04-05 03:19

/home/liu/buildslave/linux-x64-runtests/build/test/int/nnc/transform.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("data conversion from float to half precision")
15
{
16
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_GPU_REF));
17
  dsfmt_t dsfmt;
18
  dsfmt_init_gen_rand(&dsfmt, 0);
19
  int i;
20
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
21
  for (i = 0; i < 128; i++)
22
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
23
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 1, 128), 0);
24
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 1, 128), 0);
25
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
26
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
27
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 1, 128), 0);
28
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
29
  ccv_nnc_tensor_t* bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 1, 128), 0);
30
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
31
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(short, (short*)hb->data.f16, (short*)bt->data.f16, 128, 1, "Result should be exactly equal");
32
  ccv_nnc_tensor_free(a);
33
  ccv_nnc_tensor_free(b);
34
  ccv_nnc_tensor_free(ha);
35
  ccv_nnc_tensor_free(hb);
36
  ccv_nnc_tensor_free(bt);
37
}
38
39
TEST_CASE("data conversion from double to half precision")
40
1
{
41
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_GPU_REF));
42
1
  dsfmt_t dsfmt;
43
1
  dsfmt_init_gen_rand(&dsfmt, 0);
44
1
  int i;
45
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(64F, 1, 128), 0);
46
129
  for (i = 0; i < 128; 
i++128
)
47
128
    ha->data.f64[i] = (double)dsfmt_genrand_open_close(&dsfmt);
48
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 64F, 1, 128), 0);
49
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 1, 128), 0);
50
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
51
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
52
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 1, 128), 0);
53
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
54
1
  ccv_nnc_tensor_t* bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 1, 128), 0);
55
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
56
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(short, (short*)hb->data.f16, (short*)bt->data.f16, 128, 1, "Result should be exactly equal");
57
1
  ccv_nnc_tensor_free(a);
58
1
  ccv_nnc_tensor_free(b);
59
1
  ccv_nnc_tensor_free(ha);
60
1
  ccv_nnc_tensor_free(hb);
61
1
  ccv_nnc_tensor_free(bt);
62
1
}
63
64
TEST_CASE("data conversion from double to float")
65
1
{
66
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_GPU_REF));
67
1
  dsfmt_t dsfmt;
68
1
  dsfmt_init_gen_rand(&dsfmt, 0);
69
1
  int i;
70
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(64F, 1, 128), 0);
71
129
  for (i = 0; i < 128; 
i++128
)
72
128
    ha->data.f64[i] = (double)dsfmt_genrand_open_close(&dsfmt);
73
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 64F, 1, 128), 0);
74
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 1, 128), 0);
75
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
76
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
77
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
78
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
79
1
  ccv_nnc_tensor_t* bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
80
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
81
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hb->data.f32, bt->data.f32, 128, 1e-5, "Result should be exactly equal");
82
1
  ccv_nnc_tensor_free(a);
83
1
  ccv_nnc_tensor_free(b);
84
1
  ccv_nnc_tensor_free(ha);
85
1
  ccv_nnc_tensor_free(hb);
86
1
  ccv_nnc_tensor_free(bt);
87
1
}
88
89
TEST_CASE("data conversion from float to double")
90
1
{
91
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_GPU_REF));
92
1
  dsfmt_t dsfmt;
93
1
  dsfmt_init_gen_rand(&dsfmt, 0);
94
1
  int i;
95
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
96
129
  for (i = 0; i < 128; 
i++128
)
97
128
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
98
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 1, 128), 0);
99
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 64F, 1, 128), 0);
100
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
101
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
102
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(64F, 1, 128), 0);
103
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
104
1
  ccv_nnc_tensor_t* bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(64F, 1, 128), 0);
105
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
106
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(double, hb->data.f64, bt->data.f64, 128, 1e-5, "Result should be exactly equal");
107
1
  ccv_nnc_tensor_free(a);
108
1
  ccv_nnc_tensor_free(b);
109
1
  ccv_nnc_tensor_free(ha);
110
1
  ccv_nnc_tensor_free(hb);
111
1
  ccv_nnc_tensor_free(bt);
112
1
}
113
114
TEST_CASE("data conversion from double to half precision and to float")
115
1
{
116
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_GPU_REF));
117
1
  dsfmt_t dsfmt;
118
1
  dsfmt_init_gen_rand(&dsfmt, 0);
119
1
  int i;
120
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(64F, 1, 128), 0);
121
129
  for (i = 0; i < 128; 
i++128
)
122
128
    ha->data.f64[i] = (double)dsfmt_genrand_open_close(&dsfmt);
123
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 64F, 1, 128), 0);
124
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 1, 128), 0);
125
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 1, 128), 0);
126
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
127
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
128
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(c), 0);
129
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 1, 128), 0);
130
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
131
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
132
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hb), TENSOR_LIST(hc), 0);
133
1
  ccv_nnc_tensor_t* ct = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1, 128), 0);
134
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(ct), 0);
135
1
  REQUIRE_ARRAY_EQ_WITH_TOLERANCE(float, hc->data.f32, ct->data.f32, 128, 1, "Result should be exactly equal");
136
1
  ccv_nnc_tensor_free(a);
137
1
  ccv_nnc_tensor_free(b);
138
1
  ccv_nnc_tensor_free(c);
139
1
  ccv_nnc_tensor_free(ha);
140
1
  ccv_nnc_tensor_free(hb);
141
1
  ccv_nnc_tensor_free(hc);
142
1
  ccv_nnc_tensor_free(ct);
143
1
}
144
145
TEST_CASE("masked fill forward with integer")
146
1
{
147
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MASKED_FILL_FORWARD, CCV_NNC_BACKEND_GPU_REF));
148
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
149
1
  int i;
150
1
  dsfmt_t dsfmt;
151
1
  dsfmt_init_gen_rand(&dsfmt, 0);
152
121
  for (i = 0; i < 6 * 5 * 4; 
i++120
)
153
120
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
154
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 5, 4), 0);
155
21
  for (i = 0; i < 5 * 4; 
i++20
)
156
20
    hb->data.i32[i] = (i % 2 == 1) ? 
010
:
110
;
157
1
  ccv_nnc_tensor_t* const hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
158
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_FORWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
159
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
160
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 5, 4), 0);
161
1
  ccv_nnc_tensor_t* const c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
162
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
163
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_FORWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
164
1
  ccv_nnc_tensor_t* const hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
165
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(hd), 0);
166
1
  REQUIRE_TENSOR_EQ(hc, hd, "cpu and gpu result should be equal");
167
1
  ccv_nnc_tensor_free(a);
168
1
  ccv_nnc_tensor_free(b);
169
1
  ccv_nnc_tensor_free(c);
170
1
  ccv_nnc_tensor_free(ha);
171
1
  ccv_nnc_tensor_free(hb);
172
1
  ccv_nnc_tensor_free(hc);
173
1
  ccv_nnc_tensor_free(hd);
174
1
}
175
176
TEST_CASE("masked fill forward with float")
177
1
{
178
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MASKED_FILL_FORWARD, CCV_NNC_BACKEND_GPU_REF));
179
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
180
1
  int i;
181
1
  dsfmt_t dsfmt;
182
1
  dsfmt_init_gen_rand(&dsfmt, 0);
183
121
  for (i = 0; i < 6 * 5 * 4; 
i++120
)
184
120
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
185
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 4), 0);
186
21
  for (i = 0; i < 5 * 4; 
i++20
)
187
20
    hb->data.f32[i] = (i % 2 == 1) ? 
010
:
110
;
188
1
  ccv_nnc_tensor_t* const hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
189
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_FORWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
190
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
191
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 4), 0);
192
1
  ccv_nnc_tensor_t* const c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
193
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
194
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_FORWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
195
1
  ccv_nnc_tensor_t* const hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
196
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(hd), 0);
197
1
  REQUIRE_TENSOR_EQ(hc, hd, "cpu and gpu result should be equal");
198
1
  ccv_nnc_tensor_free(a);
199
1
  ccv_nnc_tensor_free(b);
200
1
  ccv_nnc_tensor_free(c);
201
1
  ccv_nnc_tensor_free(ha);
202
1
  ccv_nnc_tensor_free(hb);
203
1
  ccv_nnc_tensor_free(hc);
204
1
  ccv_nnc_tensor_free(hd);
205
1
}
206
207
TEST_CASE("masked fill backward with integer")
208
1
{
209
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MASKED_FILL_FORWARD, CCV_NNC_BACKEND_GPU_REF));
210
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
211
1
  int i;
212
1
  dsfmt_t dsfmt;
213
1
  dsfmt_init_gen_rand(&dsfmt, 0);
214
121
  for (i = 0; i < 6 * 5 * 4; 
i++120
)
215
120
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
216
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 5, 4), 0);
217
21
  for (i = 0; i < 5 * 4; 
i++20
)
218
20
    hb->data.i32[i] = (i % 2 == 1) ? 
010
:
110
;
219
1
  ccv_nnc_tensor_t* const hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
220
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_BACKWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, 0, hb), TENSOR_LIST(hc), 0);
221
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
222
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 5, 4), 0);
223
1
  ccv_nnc_tensor_t* const c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
224
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
225
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_BACKWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(a, 0, b), TENSOR_LIST(c), 0);
226
1
  ccv_nnc_tensor_t* const hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
227
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(hd), 0);
228
1
  REQUIRE_TENSOR_EQ(hc, hd, "cpu and gpu result should be equal");
229
1
  ccv_nnc_tensor_free(a);
230
1
  ccv_nnc_tensor_free(b);
231
1
  ccv_nnc_tensor_free(c);
232
1
  ccv_nnc_tensor_free(ha);
233
1
  ccv_nnc_tensor_free(hb);
234
1
  ccv_nnc_tensor_free(hc);
235
1
  ccv_nnc_tensor_free(hd);
236
1
}
237
238
TEST_CASE("masked fill backward with float")
239
1
{
240
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MASKED_FILL_FORWARD, CCV_NNC_BACKEND_GPU_REF));
241
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
242
1
  int i;
243
1
  dsfmt_t dsfmt;
244
1
  dsfmt_init_gen_rand(&dsfmt, 0);
245
121
  for (i = 0; i < 6 * 5 * 4; 
i++120
)
246
120
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
247
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 4), 0);
248
21
  for (i = 0; i < 5 * 4; 
i++20
)
249
20
    hb->data.f32[i] = (i % 2 == 1) ? 
010
:
110
;
250
1
  ccv_nnc_tensor_t* const hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
251
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_BACKWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, 0, hb), TENSOR_LIST(hc), 0);
252
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
253
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 5, 4), 0);
254
1
  ccv_nnc_tensor_t* const c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 5, 4), 0);
255
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
256
1
  ccv_nnc_cmd_exec(CMD_MASKED_FILL_BACKWARD(0, -1e8), ccv_nnc_no_hint, 0, TENSOR_LIST(a, 0, b), TENSOR_LIST(c), 0);
257
1
  ccv_nnc_tensor_t* const hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 5, 4), 0);
258
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(hd), 0);
259
1
  REQUIRE_TENSOR_EQ(hc, hd, "cpu and gpu result should be equal");
260
1
  ccv_nnc_tensor_free(a);
261
1
  ccv_nnc_tensor_free(b);
262
1
  ccv_nnc_tensor_free(c);
263
1
  ccv_nnc_tensor_free(ha);
264
1
  ccv_nnc_tensor_free(hb);
265
1
  ccv_nnc_tensor_free(hc);
266
1
  ccv_nnc_tensor_free(hd);
267
1
}
268
269
#include "case_main.h"