Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/loss.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("cross entropy loss forward")
15
{
16
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
17
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
18
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
19
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
20
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
21
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
22
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
23
  dsfmt_t dsfmt;
24
  dsfmt_init_gen_rand(&dsfmt, 0);
25
  int i;
26
  for (i = 0; i < 1000; i++)
27
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
28
  for (i = 0; i < 10; i++)
29
    hb->data.f32[i] = (i + 1) * 9;
30
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
31
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
32
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
33
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
34
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(tc), 0);
35
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
36
  ccv_nnc_tensor_free(a);
37
  ccv_nnc_tensor_free(b);
38
  ccv_nnc_tensor_free(c);
39
  ccv_nnc_tensor_free(ha);
40
  ccv_nnc_tensor_free(hb);
41
  ccv_nnc_tensor_free(hc);
42
  ccv_nnc_tensor_free(tc);
43
}
44
45
TEST_CASE("cross entropy loss forward with label smoothing")
46
1
{
47
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
48
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
49
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
50
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
51
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
52
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
53
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
54
1
  dsfmt_t dsfmt;
55
1
  dsfmt_init_gen_rand(&dsfmt, 0);
56
1
  int i;
57
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
58
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
59
11
  for (i = 0; i < 10; 
i++10
)
60
10
    hb->data.f32[i] = (i + 1) * 9;
61
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
62
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
63
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
64
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
65
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(tc), 0);
66
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
67
1
  ccv_nnc_tensor_free(a);
68
1
  ccv_nnc_tensor_free(b);
69
1
  ccv_nnc_tensor_free(c);
70
1
  ccv_nnc_tensor_free(ha);
71
1
  ccv_nnc_tensor_free(hb);
72
1
  ccv_nnc_tensor_free(hc);
73
1
  ccv_nnc_tensor_free(tc);
74
1
}
75
76
TEST_CASE("cross entropy loss backward")
77
1
{
78
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
79
1
    ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
80
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
81
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
82
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
83
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
84
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
85
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
86
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
87
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
88
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
89
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
90
1
  dsfmt_t dsfmt;
91
1
  dsfmt_init_gen_rand(&dsfmt, 0);
92
1
  int i;
93
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
94
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
95
11
  for (i = 0; i < 10; 
i++10
)
96
10
    hb->data.f32[i] = (i + 1) * 9;
97
11
  for (i = 0; i < 10; 
i++10
)
98
10
    hg->data.f32[i] = 1;
99
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
100
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
101
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hd), 0);
102
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
103
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(d), 0);
104
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
105
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
106
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
107
1
  ccv_nnc_tensor_free(a);
108
1
  ccv_nnc_tensor_free(b);
109
1
  ccv_nnc_tensor_free(c);
110
1
  ccv_nnc_tensor_free(d);
111
1
  ccv_nnc_tensor_free(g);
112
1
  ccv_nnc_tensor_free(ha);
113
1
  ccv_nnc_tensor_free(hb);
114
1
  ccv_nnc_tensor_free(hc);
115
1
  ccv_nnc_tensor_free(hd);
116
1
  ccv_nnc_tensor_free(hg);
117
1
  ccv_nnc_tensor_free(td);
118
1
}
119
120
TEST_CASE("cross entropy loss backward with label smoothing")
121
1
{
122
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
123
1
    ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
124
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
125
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
126
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
127
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
128
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
129
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
130
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
131
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
132
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
133
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
134
1
  dsfmt_t dsfmt;
135
1
  dsfmt_init_gen_rand(&dsfmt, 0);
136
1
  int i;
137
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
138
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
139
11
  for (i = 0; i < 10; 
i++10
)
140
10
    hb->data.f32[i] = (i + 1) * 9;
141
11
  for (i = 0; i < 10; 
i++10
)
142
10
    hg->data.f32[i] = 1;
143
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
144
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
145
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_BACKWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hd), 0);
146
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
147
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_BACKWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(d), 0);
148
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
149
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
150
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
151
1
  ccv_nnc_tensor_free(a);
152
1
  ccv_nnc_tensor_free(b);
153
1
  ccv_nnc_tensor_free(c);
154
1
  ccv_nnc_tensor_free(d);
155
1
  ccv_nnc_tensor_free(g);
156
1
  ccv_nnc_tensor_free(ha);
157
1
  ccv_nnc_tensor_free(hb);
158
1
  ccv_nnc_tensor_free(hc);
159
1
  ccv_nnc_tensor_free(hd);
160
1
  ccv_nnc_tensor_free(hg);
161
1
  ccv_nnc_tensor_free(td);
162
1
}
163
164
TEST_CASE("binary cross entropy loss forward")
165
1
{
166
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
167
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
168
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
169
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
170
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
171
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
172
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
173
1
  dsfmt_t dsfmt;
174
1
  dsfmt_init_gen_rand(&dsfmt, 0);
175
1
  int i;
176
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
177
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
178
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
179
1.00k
    hb->data.f32[i] = (i % 2);
180
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
181
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
182
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
183
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
184
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(tc), 0);
185
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
186
1
  ccv_nnc_tensor_free(a);
187
1
  ccv_nnc_tensor_free(b);
188
1
  ccv_nnc_tensor_free(c);
189
1
  ccv_nnc_tensor_free(ha);
190
1
  ccv_nnc_tensor_free(hb);
191
1
  ccv_nnc_tensor_free(hc);
192
1
  ccv_nnc_tensor_free(tc);
193
1
}
194
195
TEST_CASE("binary cross entropy loss backward")
196
1
{
197
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
198
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
199
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
200
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
201
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
202
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
203
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
204
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
205
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
206
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
207
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
208
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
209
1
  dsfmt_t dsfmt;
210
1
  dsfmt_init_gen_rand(&dsfmt, 0);
211
1
  int i;
212
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
213
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
214
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
215
1.00k
    hb->data.f32[i] = (i % 2);
216
11
  for (i = 0; i < 10; 
i++10
)
217
10
    hg->data.f32[i] = 1;
218
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
219
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
220
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hd), 0);
221
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
222
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(d), 0);
223
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
224
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
225
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
226
1
  ccv_nnc_tensor_free(a);
227
1
  ccv_nnc_tensor_free(b);
228
1
  ccv_nnc_tensor_free(c);
229
1
  ccv_nnc_tensor_free(d);
230
1
  ccv_nnc_tensor_free(g);
231
1
  ccv_nnc_tensor_free(ha);
232
1
  ccv_nnc_tensor_free(hb);
233
1
  ccv_nnc_tensor_free(hc);
234
1
  ccv_nnc_tensor_free(hd);
235
1
  ccv_nnc_tensor_free(hg);
236
1
  ccv_nnc_tensor_free(td);
237
1
}
238
239
TEST_CASE("binary cross entropy loss backward no input gradient")
240
1
{
241
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
242
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
243
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
244
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
245
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
246
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
247
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
248
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
249
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
250
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
251
1
  dsfmt_t dsfmt;
252
1
  dsfmt_init_gen_rand(&dsfmt, 0);
253
1
  int i;
254
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
255
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
256
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
257
1.00k
    hb->data.f32[i] = (i % 2);
258
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
259
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
260
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, ha, hb), TENSOR_LIST(hd), 0);
261
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
262
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, a, b), TENSOR_LIST(d), 0);
263
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
264
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
265
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
266
1
  ccv_nnc_tensor_free(a);
267
1
  ccv_nnc_tensor_free(b);
268
1
  ccv_nnc_tensor_free(c);
269
1
  ccv_nnc_tensor_free(d);
270
1
  ccv_nnc_tensor_free(ha);
271
1
  ccv_nnc_tensor_free(hb);
272
1
  ccv_nnc_tensor_free(hc);
273
1
  ccv_nnc_tensor_free(hd);
274
1
  ccv_nnc_tensor_free(td);
275
1
}
276
277
TEST_CASE("sigmoid binary cross entropy loss forward")
278
1
{
279
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
280
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
281
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
282
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
283
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
284
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
285
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
286
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
287
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
288
1
  dsfmt_t dsfmt;
289
1
  dsfmt_init_gen_rand(&dsfmt, 0);
290
1
  int i;
291
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
292
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 10 - 5;
293
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
294
1.00k
    hb->data.f32[i] = (i % 2);
295
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
296
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc, hd), 0);
297
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c, d), 0);
298
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
299
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
300
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c, d), TENSOR_LIST(tc, td), 0);
301
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
302
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
303
1
  ccv_nnc_tensor_free(a);
304
1
  ccv_nnc_tensor_free(b);
305
1
  ccv_nnc_tensor_free(c);
306
1
  ccv_nnc_tensor_free(d);
307
1
  ccv_nnc_tensor_free(ha);
308
1
  ccv_nnc_tensor_free(hb);
309
1
  ccv_nnc_tensor_free(hc);
310
1
  ccv_nnc_tensor_free(hd);
311
1
  ccv_nnc_tensor_free(tc);
312
1
  ccv_nnc_tensor_free(td);
313
1
}
314
315
TEST_CASE("sigmoid binary cross entropy loss forward no loss")
316
1
{
317
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
318
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
319
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
320
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
321
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
322
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
323
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
324
1
  dsfmt_t dsfmt;
325
1
  dsfmt_init_gen_rand(&dsfmt, 0);
326
1
  int i;
327
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
328
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 10 - 5;
329
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
330
1.00k
    hb->data.f32[i] = (i % 2);
331
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
332
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hd), 0);
333
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, d), 0);
334
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
335
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
336
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
337
1
  ccv_nnc_tensor_free(a);
338
1
  ccv_nnc_tensor_free(b);
339
1
  ccv_nnc_tensor_free(d);
340
1
  ccv_nnc_tensor_free(ha);
341
1
  ccv_nnc_tensor_free(hb);
342
1
  ccv_nnc_tensor_free(hd);
343
1
  ccv_nnc_tensor_free(td);
344
1
}
345
346
TEST_CASE("sigmoid binary cross entropy loss backward")
347
1
{
348
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
349
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
350
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
351
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
352
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
353
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
354
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
355
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
356
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
357
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
358
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
359
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
360
1
  dsfmt_t dsfmt;
361
1
  dsfmt_init_gen_rand(&dsfmt, 0);
362
1
  int i;
363
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
364
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
365
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
366
1.00k
    hb->data.f32[i] = (i % 2);
367
11
  for (i = 0; i < 10; 
i++10
)
368
10
    hg->data.f32[i] = 1;
369
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
370
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hc), 0);
371
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, 0, 0, hb, 0, hc), TENSOR_LIST(hd), 0);
372
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, c), 0);
373
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, 0, 0, b, 0, c), TENSOR_LIST(d), 0);
374
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
375
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
376
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
377
1
  ccv_nnc_tensor_free(a);
378
1
  ccv_nnc_tensor_free(b);
379
1
  ccv_nnc_tensor_free(c);
380
1
  ccv_nnc_tensor_free(d);
381
1
  ccv_nnc_tensor_free(g);
382
1
  ccv_nnc_tensor_free(ha);
383
1
  ccv_nnc_tensor_free(hb);
384
1
  ccv_nnc_tensor_free(hc);
385
1
  ccv_nnc_tensor_free(hd);
386
1
  ccv_nnc_tensor_free(hg);
387
1
  ccv_nnc_tensor_free(td);
388
1
}
389
390
TEST_CASE("sigmoid binary cross entropy loss backward no input gradient")
391
1
{
392
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
393
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
394
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
395
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
396
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
397
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
398
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
399
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
400
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
401
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
402
1
  dsfmt_t dsfmt;
403
1
  dsfmt_init_gen_rand(&dsfmt, 0);
404
1
  int i;
405
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
406
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
407
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
408
1.00k
    hb->data.f32[i] = (i % 2);
409
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
410
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hc), 0);
411
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, 0, hb, 0, hc), TENSOR_LIST(hd), 0);
412
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, c), 0);
413
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, 0, b, 0, c), TENSOR_LIST(d), 0);
414
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
415
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
416
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
417
1
  ccv_nnc_tensor_free(a);
418
1
  ccv_nnc_tensor_free(b);
419
1
  ccv_nnc_tensor_free(c);
420
1
  ccv_nnc_tensor_free(d);
421
1
  ccv_nnc_tensor_free(ha);
422
1
  ccv_nnc_tensor_free(hb);
423
1
  ccv_nnc_tensor_free(hc);
424
1
  ccv_nnc_tensor_free(hd);
425
1
  ccv_nnc_tensor_free(td);
426
1
}
427
428
TEST_CASE("binary cross entropy loss forward with pos_weight")
429
1
{
430
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
431
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
432
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
433
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
434
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
435
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
436
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
437
1
  dsfmt_t dsfmt;
438
1
  dsfmt_init_gen_rand(&dsfmt, 0);
439
1
  int i;
440
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
441
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
442
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
443
1.00k
    hb->data.f32[i] = (i % 2);
444
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
445
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
446
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
447
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
448
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(tc), 0);
449
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
450
1
  ccv_nnc_tensor_free(a);
451
1
  ccv_nnc_tensor_free(b);
452
1
  ccv_nnc_tensor_free(c);
453
1
  ccv_nnc_tensor_free(ha);
454
1
  ccv_nnc_tensor_free(hb);
455
1
  ccv_nnc_tensor_free(hc);
456
1
  ccv_nnc_tensor_free(tc);
457
1
}
458
459
TEST_CASE("binary cross entropy loss backward with pos_weight")
460
1
{
461
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
462
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
463
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
464
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
465
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
466
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
467
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
468
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
469
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
470
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
471
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
472
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
473
1
  dsfmt_t dsfmt;
474
1
  dsfmt_init_gen_rand(&dsfmt, 0);
475
1
  int i;
476
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
477
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
478
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
479
1.00k
    hb->data.f32[i] = (i % 2);
480
11
  for (i = 0; i < 10; 
i++10
)
481
10
    hg->data.f32[i] = 1;
482
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
483
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
484
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hd), 0);
485
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
486
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(d), 0);
487
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
488
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
489
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
490
1
  ccv_nnc_tensor_free(a);
491
1
  ccv_nnc_tensor_free(b);
492
1
  ccv_nnc_tensor_free(c);
493
1
  ccv_nnc_tensor_free(d);
494
1
  ccv_nnc_tensor_free(g);
495
1
  ccv_nnc_tensor_free(ha);
496
1
  ccv_nnc_tensor_free(hb);
497
1
  ccv_nnc_tensor_free(hc);
498
1
  ccv_nnc_tensor_free(hd);
499
1
  ccv_nnc_tensor_free(hg);
500
1
  ccv_nnc_tensor_free(td);
501
1
}
502
503
TEST_CASE("binary cross entropy loss backward no input gradient with pos_weight")
504
1
{
505
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
506
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
507
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
508
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
509
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
510
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
511
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
512
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
513
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
514
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
515
1
  dsfmt_t dsfmt;
516
1
  dsfmt_init_gen_rand(&dsfmt, 0);
517
1
  int i;
518
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
519
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
520
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
521
1.00k
    hb->data.f32[i] = (i % 2);
522
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
523
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
524
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(0, ha, hb), TENSOR_LIST(hd), 0);
525
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
526
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(0, a, b), TENSOR_LIST(d), 0);
527
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
528
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
529
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
530
1
  ccv_nnc_tensor_free(a);
531
1
  ccv_nnc_tensor_free(b);
532
1
  ccv_nnc_tensor_free(c);
533
1
  ccv_nnc_tensor_free(d);
534
1
  ccv_nnc_tensor_free(ha);
535
1
  ccv_nnc_tensor_free(hb);
536
1
  ccv_nnc_tensor_free(hc);
537
1
  ccv_nnc_tensor_free(hd);
538
1
  ccv_nnc_tensor_free(td);
539
1
}
540
541
TEST_CASE("sigmoid binary cross entropy loss forward with pos_weight")
542
1
{
543
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
544
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
545
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
546
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
547
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
548
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
549
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
550
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
551
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
552
1
  dsfmt_t dsfmt;
553
1
  dsfmt_init_gen_rand(&dsfmt, 0);
554
1
  int i;
555
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
556
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 10 - 5;
557
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
558
1.00k
    hb->data.f32[i] = (i % 2);
559
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
560
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc, hd), 0);
561
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c, d), 0);
562
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
563
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
564
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c, d), TENSOR_LIST(tc, td), 0);
565
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
566
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
567
1
  ccv_nnc_tensor_free(a);
568
1
  ccv_nnc_tensor_free(b);
569
1
  ccv_nnc_tensor_free(c);
570
1
  ccv_nnc_tensor_free(d);
571
1
  ccv_nnc_tensor_free(ha);
572
1
  ccv_nnc_tensor_free(hb);
573
1
  ccv_nnc_tensor_free(hc);
574
1
  ccv_nnc_tensor_free(hd);
575
1
  ccv_nnc_tensor_free(tc);
576
1
  ccv_nnc_tensor_free(td);
577
1
}
578
579
TEST_CASE("sigmoid binary cross entropy loss forward no loss with pos_weight")
580
1
{
581
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
582
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
583
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
584
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
585
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
586
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
587
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
588
1
  dsfmt_t dsfmt;
589
1
  dsfmt_init_gen_rand(&dsfmt, 0);
590
1
  int i;
591
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
592
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 10 - 5;
593
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
594
1.00k
    hb->data.f32[i] = (i % 2);
595
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
596
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hd), 0);
597
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, d), 0);
598
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
599
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
600
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
601
1
  ccv_nnc_tensor_free(a);
602
1
  ccv_nnc_tensor_free(b);
603
1
  ccv_nnc_tensor_free(d);
604
1
  ccv_nnc_tensor_free(ha);
605
1
  ccv_nnc_tensor_free(hb);
606
1
  ccv_nnc_tensor_free(hd);
607
1
  ccv_nnc_tensor_free(td);
608
1
}
609
610
TEST_CASE("sigmoid binary cross entropy loss backward with pos_weight")
611
1
{
612
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
613
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
614
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
615
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
616
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
617
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
618
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
619
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
620
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
621
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
622
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
623
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
624
1
  dsfmt_t dsfmt;
625
1
  dsfmt_init_gen_rand(&dsfmt, 0);
626
1
  int i;
627
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
628
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
629
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
630
1.00k
    hb->data.f32[i] = (i % 2);
631
11
  for (i = 0; i < 10; 
i++10
)
632
10
    hg->data.f32[i] = 1;
633
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
634
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hc), 0);
635
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, 0, 0, hb, 0, hc), TENSOR_LIST(hd), 0);
636
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, c), 0);
637
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(g, 0, 0, b, 0, c), TENSOR_LIST(d), 0);
638
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
639
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
640
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
641
1
  ccv_nnc_tensor_free(a);
642
1
  ccv_nnc_tensor_free(b);
643
1
  ccv_nnc_tensor_free(c);
644
1
  ccv_nnc_tensor_free(d);
645
1
  ccv_nnc_tensor_free(g);
646
1
  ccv_nnc_tensor_free(ha);
647
1
  ccv_nnc_tensor_free(hb);
648
1
  ccv_nnc_tensor_free(hc);
649
1
  ccv_nnc_tensor_free(hd);
650
1
  ccv_nnc_tensor_free(hg);
651
1
  ccv_nnc_tensor_free(td);
652
1
}
653
654
TEST_CASE("sigmoid binary cross entropy loss backward no input gradient with pos_weight")
655
1
{
656
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
657
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
658
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
659
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
660
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
661
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
662
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
663
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
664
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
665
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
666
1
  dsfmt_t dsfmt;
667
1
  dsfmt_init_gen_rand(&dsfmt, 0);
668
1
  int i;
669
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
670
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
671
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
672
1.00k
    hb->data.f32[i] = (i % 2);
673
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
674
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hc), 0);
675
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, 0, hb, 0, hc), TENSOR_LIST(hd), 0);
676
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, c), 0);
677
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, 0, b, 0, c), TENSOR_LIST(d), 0);
678
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
679
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
680
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
681
1
  ccv_nnc_tensor_free(a);
682
1
  ccv_nnc_tensor_free(b);
683
1
  ccv_nnc_tensor_free(c);
684
1
  ccv_nnc_tensor_free(d);
685
1
  ccv_nnc_tensor_free(ha);
686
1
  ccv_nnc_tensor_free(hb);
687
1
  ccv_nnc_tensor_free(hc);
688
1
  ccv_nnc_tensor_free(hd);
689
1
  ccv_nnc_tensor_free(td);
690
1
}
691
692
TEST_CASE("cross entropy loss forward")
693
1
{
694
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
695
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
696
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
697
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
698
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
699
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
700
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
701
1
  dsfmt_t dsfmt;
702
1
  dsfmt_init_gen_rand(&dsfmt, 0);
703
1
  int i;
704
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
705
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
706
11
  for (i = 0; i < 10; 
i++10
)
707
10
    hb->data.f32[i] = (i + 1) * 9;
708
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
709
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
710
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
711
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
712
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(tc), 0);
713
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
714
1
  ccv_nnc_tensor_free(a);
715
1
  ccv_nnc_tensor_free(b);
716
1
  ccv_nnc_tensor_free(c);
717
1
  ccv_nnc_tensor_free(ha);
718
1
  ccv_nnc_tensor_free(hb);
719
1
  ccv_nnc_tensor_free(hc);
720
1
  ccv_nnc_tensor_free(tc);
721
1
}
722
723
TEST_CASE("cross entropy loss forward with label smoothing")
724
1
{
725
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
726
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
727
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
728
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
729
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
730
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
731
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
732
1
  dsfmt_t dsfmt;
733
1
  dsfmt_init_gen_rand(&dsfmt, 0);
734
1
  int i;
735
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
736
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
737
11
  for (i = 0; i < 10; 
i++10
)
738
10
    hb->data.f32[i] = (i + 1) * 9;
739
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
740
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
741
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
742
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
743
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(tc), 0);
744
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
745
1
  ccv_nnc_tensor_free(a);
746
1
  ccv_nnc_tensor_free(b);
747
1
  ccv_nnc_tensor_free(c);
748
1
  ccv_nnc_tensor_free(ha);
749
1
  ccv_nnc_tensor_free(hb);
750
1
  ccv_nnc_tensor_free(hc);
751
1
  ccv_nnc_tensor_free(tc);
752
1
}
753
754
TEST_CASE("cross entropy loss backward")
755
1
{
756
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
757
1
    ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
758
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
759
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
760
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
761
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
762
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
763
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
764
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
765
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
766
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
767
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
768
1
  dsfmt_t dsfmt;
769
1
  dsfmt_init_gen_rand(&dsfmt, 0);
770
1
  int i;
771
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
772
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
773
11
  for (i = 0; i < 10; 
i++10
)
774
10
    hb->data.f32[i] = (i + 1) * 9;
775
11
  for (i = 0; i < 10; 
i++10
)
776
10
    hg->data.f32[i] = 1;
777
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
778
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
779
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hd), 0);
780
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
781
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(d), 0);
782
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
783
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
784
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
785
1
  ccv_nnc_tensor_free(a);
786
1
  ccv_nnc_tensor_free(b);
787
1
  ccv_nnc_tensor_free(c);
788
1
  ccv_nnc_tensor_free(d);
789
1
  ccv_nnc_tensor_free(g);
790
1
  ccv_nnc_tensor_free(ha);
791
1
  ccv_nnc_tensor_free(hb);
792
1
  ccv_nnc_tensor_free(hc);
793
1
  ccv_nnc_tensor_free(hd);
794
1
  ccv_nnc_tensor_free(hg);
795
1
  ccv_nnc_tensor_free(td);
796
1
}
797
798
TEST_CASE("cross entropy loss backward with label smoothing")
799
1
{
800
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
801
1
    ccv_nnc_cmd_ok(CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
802
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
803
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
804
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
805
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
806
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
807
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
808
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
809
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
810
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
811
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
812
1
  dsfmt_t dsfmt;
813
1
  dsfmt_init_gen_rand(&dsfmt, 0);
814
1
  int i;
815
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
816
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
817
11
  for (i = 0; i < 10; 
i++10
)
818
10
    hb->data.f32[i] = (i + 1) * 9;
819
11
  for (i = 0; i < 10; 
i++10
)
820
10
    hg->data.f32[i] = 1;
821
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
822
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
823
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_BACKWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hd), 0);
824
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_FORWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
825
1
  ccv_nnc_cmd_exec(CMD_CATEGORICAL_CROSSENTROPY_BACKWARD(0.1, 0.9), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(d), 0);
826
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
827
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
828
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
829
1
  ccv_nnc_tensor_free(a);
830
1
  ccv_nnc_tensor_free(b);
831
1
  ccv_nnc_tensor_free(c);
832
1
  ccv_nnc_tensor_free(d);
833
1
  ccv_nnc_tensor_free(g);
834
1
  ccv_nnc_tensor_free(ha);
835
1
  ccv_nnc_tensor_free(hb);
836
1
  ccv_nnc_tensor_free(hc);
837
1
  ccv_nnc_tensor_free(hd);
838
1
  ccv_nnc_tensor_free(hg);
839
1
  ccv_nnc_tensor_free(td);
840
1
}
841
842
TEST_CASE("binary cross entropy loss forward")
843
1
{
844
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
845
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
846
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
847
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
848
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
849
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
850
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
851
1
  dsfmt_t dsfmt;
852
1
  dsfmt_init_gen_rand(&dsfmt, 0);
853
1
  int i;
854
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
855
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
856
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
857
1.00k
    hb->data.f32[i] = (i % 2);
858
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
859
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
860
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
861
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
862
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(tc), 0);
863
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
864
1
  ccv_nnc_tensor_free(a);
865
1
  ccv_nnc_tensor_free(b);
866
1
  ccv_nnc_tensor_free(c);
867
1
  ccv_nnc_tensor_free(ha);
868
1
  ccv_nnc_tensor_free(hb);
869
1
  ccv_nnc_tensor_free(hc);
870
1
  ccv_nnc_tensor_free(tc);
871
1
}
872
873
TEST_CASE("binary cross entropy loss backward")
874
1
{
875
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
876
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
877
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
878
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
879
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
880
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
881
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
882
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
883
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
884
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
885
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
886
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
887
1
  dsfmt_t dsfmt;
888
1
  dsfmt_init_gen_rand(&dsfmt, 0);
889
1
  int i;
890
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
891
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
892
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
893
1.00k
    hb->data.f32[i] = (i % 2);
894
11
  for (i = 0; i < 10; 
i++10
)
895
10
    hg->data.f32[i] = 1;
896
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
897
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
898
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hd), 0);
899
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
900
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(d), 0);
901
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
902
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
903
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
904
1
  ccv_nnc_tensor_free(a);
905
1
  ccv_nnc_tensor_free(b);
906
1
  ccv_nnc_tensor_free(c);
907
1
  ccv_nnc_tensor_free(d);
908
1
  ccv_nnc_tensor_free(g);
909
1
  ccv_nnc_tensor_free(ha);
910
1
  ccv_nnc_tensor_free(hb);
911
1
  ccv_nnc_tensor_free(hc);
912
1
  ccv_nnc_tensor_free(hd);
913
1
  ccv_nnc_tensor_free(hg);
914
1
  ccv_nnc_tensor_free(td);
915
1
}
916
917
TEST_CASE("binary cross entropy loss backward no input gradient")
918
1
{
919
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
920
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
921
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
922
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
923
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
924
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
925
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
926
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
927
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
928
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
929
1
  dsfmt_t dsfmt;
930
1
  dsfmt_init_gen_rand(&dsfmt, 0);
931
1
  int i;
932
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
933
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
934
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
935
1.00k
    hb->data.f32[i] = (i % 2);
936
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
937
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
938
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, ha, hb), TENSOR_LIST(hd), 0);
939
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
940
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, a, b), TENSOR_LIST(d), 0);
941
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
942
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
943
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
944
1
  ccv_nnc_tensor_free(a);
945
1
  ccv_nnc_tensor_free(b);
946
1
  ccv_nnc_tensor_free(c);
947
1
  ccv_nnc_tensor_free(d);
948
1
  ccv_nnc_tensor_free(ha);
949
1
  ccv_nnc_tensor_free(hb);
950
1
  ccv_nnc_tensor_free(hc);
951
1
  ccv_nnc_tensor_free(hd);
952
1
  ccv_nnc_tensor_free(td);
953
1
}
954
955
TEST_CASE("sigmoid binary cross entropy loss forward")
956
1
{
957
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
958
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
959
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
960
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
961
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
962
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
963
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
964
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
965
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
966
1
  dsfmt_t dsfmt;
967
1
  dsfmt_init_gen_rand(&dsfmt, 0);
968
1
  int i;
969
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
970
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 10 - 5;
971
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
972
1.00k
    hb->data.f32[i] = (i % 2);
973
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
974
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc, hd), 0);
975
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c, d), 0);
976
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
977
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
978
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c, d), TENSOR_LIST(tc, td), 0);
979
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
980
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
981
1
  ccv_nnc_tensor_free(a);
982
1
  ccv_nnc_tensor_free(b);
983
1
  ccv_nnc_tensor_free(c);
984
1
  ccv_nnc_tensor_free(d);
985
1
  ccv_nnc_tensor_free(ha);
986
1
  ccv_nnc_tensor_free(hb);
987
1
  ccv_nnc_tensor_free(hc);
988
1
  ccv_nnc_tensor_free(hd);
989
1
  ccv_nnc_tensor_free(tc);
990
1
  ccv_nnc_tensor_free(td);
991
1
}
992
993
TEST_CASE("sigmoid binary cross entropy loss forward no loss")
994
1
{
995
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
996
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
997
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
998
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
999
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1000
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1001
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1002
1
  dsfmt_t dsfmt;
1003
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1004
1
  int i;
1005
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1006
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 10 - 5;
1007
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1008
1.00k
    hb->data.f32[i] = (i % 2);
1009
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
1010
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hd), 0);
1011
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, d), 0);
1012
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1013
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
1014
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
1015
1
  ccv_nnc_tensor_free(a);
1016
1
  ccv_nnc_tensor_free(b);
1017
1
  ccv_nnc_tensor_free(d);
1018
1
  ccv_nnc_tensor_free(ha);
1019
1
  ccv_nnc_tensor_free(hb);
1020
1
  ccv_nnc_tensor_free(hd);
1021
1
  ccv_nnc_tensor_free(td);
1022
1
}
1023
1024
TEST_CASE("sigmoid binary cross entropy loss backward")
1025
1
{
1026
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
1027
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
1028
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1029
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1030
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1031
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1032
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1033
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1034
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1035
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1036
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1037
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1038
1
  dsfmt_t dsfmt;
1039
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1040
1
  int i;
1041
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1042
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1043
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1044
1.00k
    hb->data.f32[i] = (i % 2);
1045
11
  for (i = 0; i < 10; 
i++10
)
1046
10
    hg->data.f32[i] = 1;
1047
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
1048
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hc), 0);
1049
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, 0, 0, hb, 0, hc), TENSOR_LIST(hd), 0);
1050
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, c), 0);
1051
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(g, 0, 0, b, 0, c), TENSOR_LIST(d), 0);
1052
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1053
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
1054
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
1055
1
  ccv_nnc_tensor_free(a);
1056
1
  ccv_nnc_tensor_free(b);
1057
1
  ccv_nnc_tensor_free(c);
1058
1
  ccv_nnc_tensor_free(d);
1059
1
  ccv_nnc_tensor_free(g);
1060
1
  ccv_nnc_tensor_free(ha);
1061
1
  ccv_nnc_tensor_free(hb);
1062
1
  ccv_nnc_tensor_free(hc);
1063
1
  ccv_nnc_tensor_free(hd);
1064
1
  ccv_nnc_tensor_free(hg);
1065
1
  ccv_nnc_tensor_free(td);
1066
1
}
1067
1068
TEST_CASE("sigmoid binary cross entropy loss backward no input gradient")
1069
1
{
1070
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
1071
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
1072
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1073
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1074
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1075
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1076
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1077
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1078
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1079
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1080
1
  dsfmt_t dsfmt;
1081
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1082
1
  int i;
1083
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1084
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1085
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1086
1.00k
    hb->data.f32[i] = (i % 2);
1087
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
1088
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hc), 0);
1089
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, 0, hb, 0, hc), TENSOR_LIST(hd), 0);
1090
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, c), 0);
1091
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, 0, b, 0, c), TENSOR_LIST(d), 0);
1092
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1093
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
1094
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
1095
1
  ccv_nnc_tensor_free(a);
1096
1
  ccv_nnc_tensor_free(b);
1097
1
  ccv_nnc_tensor_free(c);
1098
1
  ccv_nnc_tensor_free(d);
1099
1
  ccv_nnc_tensor_free(ha);
1100
1
  ccv_nnc_tensor_free(hb);
1101
1
  ccv_nnc_tensor_free(hc);
1102
1
  ccv_nnc_tensor_free(hd);
1103
1
  ccv_nnc_tensor_free(td);
1104
1
}
1105
1106
TEST_CASE("binary cross entropy loss forward with pos_weight")
1107
1
{
1108
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
1109
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1110
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1111
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1112
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1113
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1114
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1115
1
  dsfmt_t dsfmt;
1116
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1117
1
  int i;
1118
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1119
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1120
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1121
1.00k
    hb->data.f32[i] = (i % 2);
1122
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
1123
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
1124
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
1125
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1126
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(tc), 0);
1127
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
1128
1
  ccv_nnc_tensor_free(a);
1129
1
  ccv_nnc_tensor_free(b);
1130
1
  ccv_nnc_tensor_free(c);
1131
1
  ccv_nnc_tensor_free(ha);
1132
1
  ccv_nnc_tensor_free(hb);
1133
1
  ccv_nnc_tensor_free(hc);
1134
1
  ccv_nnc_tensor_free(tc);
1135
1
}
1136
1137
TEST_CASE("binary cross entropy loss backward with pos_weight")
1138
1
{
1139
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
1140
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
1141
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1142
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1143
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1144
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1145
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1146
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1147
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1148
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1149
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1150
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1151
1
  dsfmt_t dsfmt;
1152
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1153
1
  int i;
1154
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1155
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1156
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1157
1.00k
    hb->data.f32[i] = (i % 2);
1158
11
  for (i = 0; i < 10; 
i++10
)
1159
10
    hg->data.f32[i] = 1;
1160
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
1161
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
1162
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hd), 0);
1163
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
1164
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(d), 0);
1165
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1166
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
1167
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
1168
1
  ccv_nnc_tensor_free(a);
1169
1
  ccv_nnc_tensor_free(b);
1170
1
  ccv_nnc_tensor_free(c);
1171
1
  ccv_nnc_tensor_free(d);
1172
1
  ccv_nnc_tensor_free(g);
1173
1
  ccv_nnc_tensor_free(ha);
1174
1
  ccv_nnc_tensor_free(hb);
1175
1
  ccv_nnc_tensor_free(hc);
1176
1
  ccv_nnc_tensor_free(hd);
1177
1
  ccv_nnc_tensor_free(hg);
1178
1
  ccv_nnc_tensor_free(td);
1179
1
}
1180
1181
TEST_CASE("binary cross entropy loss backward no input gradient with pos_weight")
1182
1
{
1183
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
1184
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
1185
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1186
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1187
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1188
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1189
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1190
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1191
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1192
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1193
1
  dsfmt_t dsfmt;
1194
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1195
1
  int i;
1196
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1197
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1198
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1199
1.00k
    hb->data.f32[i] = (i % 2);
1200
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
1201
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
1202
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(0, ha, hb), TENSOR_LIST(hd), 0);
1203
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
1204
1
  ccv_nnc_cmd_exec(CMD_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(0, a, b), TENSOR_LIST(d), 0);
1205
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1206
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
1207
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
1208
1
  ccv_nnc_tensor_free(a);
1209
1
  ccv_nnc_tensor_free(b);
1210
1
  ccv_nnc_tensor_free(c);
1211
1
  ccv_nnc_tensor_free(d);
1212
1
  ccv_nnc_tensor_free(ha);
1213
1
  ccv_nnc_tensor_free(hb);
1214
1
  ccv_nnc_tensor_free(hc);
1215
1
  ccv_nnc_tensor_free(hd);
1216
1
  ccv_nnc_tensor_free(td);
1217
1
}
1218
1219
TEST_CASE("sigmoid binary cross entropy loss forward with pos_weight")
1220
1
{
1221
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
1222
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1223
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1224
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1225
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1226
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1227
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1228
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1229
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1230
1
  dsfmt_t dsfmt;
1231
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1232
1
  int i;
1233
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1234
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 10 - 5;
1235
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1236
1.00k
    hb->data.f32[i] = (i % 2);
1237
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
1238
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc, hd), 0);
1239
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c, d), 0);
1240
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1241
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1242
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c, d), TENSOR_LIST(tc, td), 0);
1243
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
1244
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
1245
1
  ccv_nnc_tensor_free(a);
1246
1
  ccv_nnc_tensor_free(b);
1247
1
  ccv_nnc_tensor_free(c);
1248
1
  ccv_nnc_tensor_free(d);
1249
1
  ccv_nnc_tensor_free(ha);
1250
1
  ccv_nnc_tensor_free(hb);
1251
1
  ccv_nnc_tensor_free(hc);
1252
1
  ccv_nnc_tensor_free(hd);
1253
1
  ccv_nnc_tensor_free(tc);
1254
1
  ccv_nnc_tensor_free(td);
1255
1
}
1256
1257
TEST_CASE("sigmoid binary cross entropy loss forward no loss with pos_weight")
1258
1
{
1259
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF));
1260
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1261
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1262
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1263
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1264
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1265
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1266
1
  dsfmt_t dsfmt;
1267
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1268
1
  int i;
1269
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1270
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) * 10 - 5;
1271
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1272
1.00k
    hb->data.f32[i] = (i % 2);
1273
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
1274
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hd), 0);
1275
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, d), 0);
1276
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1277
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
1278
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
1279
1
  ccv_nnc_tensor_free(a);
1280
1
  ccv_nnc_tensor_free(b);
1281
1
  ccv_nnc_tensor_free(d);
1282
1
  ccv_nnc_tensor_free(ha);
1283
1
  ccv_nnc_tensor_free(hb);
1284
1
  ccv_nnc_tensor_free(hd);
1285
1
  ccv_nnc_tensor_free(td);
1286
1
}
1287
1288
TEST_CASE("sigmoid binary cross entropy loss backward with pos_weight")
1289
1
{
1290
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
1291
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
1292
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1293
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1294
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1295
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1296
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1297
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1298
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1299
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1300
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1301
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1302
1
  dsfmt_t dsfmt;
1303
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1304
1
  int i;
1305
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1306
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1307
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1308
1.00k
    hb->data.f32[i] = (i % 2);
1309
11
  for (i = 0; i < 10; 
i++10
)
1310
10
    hg->data.f32[i] = 1;
1311
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
1312
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hc), 0);
1313
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, 0, 0, hb, 0, hc), TENSOR_LIST(hd), 0);
1314
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, c), 0);
1315
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(g, 0, 0, b, 0, c), TENSOR_LIST(d), 0);
1316
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1317
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
1318
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
1319
1
  ccv_nnc_tensor_free(a);
1320
1
  ccv_nnc_tensor_free(b);
1321
1
  ccv_nnc_tensor_free(c);
1322
1
  ccv_nnc_tensor_free(d);
1323
1
  ccv_nnc_tensor_free(g);
1324
1
  ccv_nnc_tensor_free(ha);
1325
1
  ccv_nnc_tensor_free(hb);
1326
1
  ccv_nnc_tensor_free(hc);
1327
1
  ccv_nnc_tensor_free(hd);
1328
1
  ccv_nnc_tensor_free(hg);
1329
1
  ccv_nnc_tensor_free(td);
1330
1
}
1331
1332
TEST_CASE("sigmoid binary cross entropy loss backward no input gradient with pos_weight")
1333
1
{
1334
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
1335
1
    ccv_nnc_cmd_ok(CCV_NNC_BINARY_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
1336
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1337
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1338
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1339
1
  ccv_nnc_tensor_t* d = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1340
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1341
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1342
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1343
1
  ccv_nnc_tensor_t* hd = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1344
1
  dsfmt_t dsfmt;
1345
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1346
1
  int i;
1347
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1348
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1349
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1350
1.00k
    hb->data.f32[i] = (i % 2);
1351
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
1352
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(0, hc), 0);
1353
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, 0, hb, 0, hc), TENSOR_LIST(hd), 0);
1354
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_FORWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(0, c), 0);
1355
1
  ccv_nnc_cmd_exec(CMD_SIGMOID_BINARY_CROSSENTROPY_BACKWARD(1.2), ccv_nnc_no_hint, 0, TENSOR_LIST(0, 0, 0, b, 0, c), TENSOR_LIST(d), 0);
1356
1
  ccv_nnc_tensor_t* td = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1357
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(d), TENSOR_LIST(td), 0);
1358
1
  REQUIRE_TENSOR_EQ(td, hd, "GPU computed output should be the same as CPU computed ones");
1359
1
  ccv_nnc_tensor_free(a);
1360
1
  ccv_nnc_tensor_free(b);
1361
1
  ccv_nnc_tensor_free(c);
1362
1
  ccv_nnc_tensor_free(d);
1363
1
  ccv_nnc_tensor_free(ha);
1364
1
  ccv_nnc_tensor_free(hb);
1365
1
  ccv_nnc_tensor_free(hc);
1366
1
  ccv_nnc_tensor_free(hd);
1367
1
  ccv_nnc_tensor_free(td);
1368
1
}
1369
1370
TEST_CASE("mse mean loss forward")
1371
1
{
1372
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MSE_FORWARD, CCV_NNC_BACKEND_GPU_REF));
1373
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1374
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1375
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1376
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1377
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1378
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1379
1
  dsfmt_t dsfmt;
1380
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1381
1
  int i;
1382
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1383
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1384
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1385
1.00k
    hb->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1386
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
1387
1
  ccv_nnc_cmd_exec(CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_MEAN), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
1388
1
  ccv_nnc_cmd_exec(CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_MEAN), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
1389
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1390
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(tc), 0);
1391
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
1392
1
  ccv_nnc_tensor_free(a);
1393
1
  ccv_nnc_tensor_free(b);
1394
1
  ccv_nnc_tensor_free(c);
1395
1
  ccv_nnc_tensor_free(ha);
1396
1
  ccv_nnc_tensor_free(hb);
1397
1
  ccv_nnc_tensor_free(hc);
1398
1
  ccv_nnc_tensor_free(tc);
1399
1
}
1400
1401
TEST_CASE("mse mean loss backward")
1402
1
{
1403
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MSE_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
1404
1
    ccv_nnc_cmd_ok(CCV_NNC_MSE_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
1405
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1406
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1407
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1408
1
  ccv_nnc_tensor_t* da = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1409
1
  ccv_nnc_tensor_t* db = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1410
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1411
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1412
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1413
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1414
1
  ccv_nnc_tensor_t* hda = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1415
1
  ccv_nnc_tensor_t* hdb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1416
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1417
1
  dsfmt_t dsfmt;
1418
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1419
1
  int i;
1420
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1421
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1422
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1423
1.00k
    hb->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1424
11
  for (i = 0; i < 10; 
i++10
)
1425
10
    hg->data.f32[i] = 1;
1426
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
1427
1
  ccv_nnc_cmd_exec(CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_MEAN), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
1428
1
  ccv_nnc_cmd_exec(CMD_MSE_BACKWARD(CCV_NNC_MSE_REDUCE_MEAN), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hda, hdb), 0);
1429
1
  ccv_nnc_cmd_exec(CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_MEAN), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
1430
1
  ccv_nnc_cmd_exec(CMD_MSE_BACKWARD(CCV_NNC_MSE_REDUCE_MEAN), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(da, db), 0);
1431
1
  ccv_nnc_tensor_t* tda = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1432
1
  ccv_nnc_tensor_t* tdb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1433
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(da, db), TENSOR_LIST(tda, tdb), 0);
1434
1
  REQUIRE_TENSOR_EQ(tda, hda, "GPU computed output should be the same as CPU computed ones");
1435
1
  REQUIRE_TENSOR_EQ(tdb, hdb, "GPU computed output should be the same as CPU computed ones");
1436
1
  ccv_nnc_tensor_free(a);
1437
1
  ccv_nnc_tensor_free(b);
1438
1
  ccv_nnc_tensor_free(c);
1439
1
  ccv_nnc_tensor_free(da);
1440
1
  ccv_nnc_tensor_free(db);
1441
1
  ccv_nnc_tensor_free(g);
1442
1
  ccv_nnc_tensor_free(ha);
1443
1
  ccv_nnc_tensor_free(hb);
1444
1
  ccv_nnc_tensor_free(hc);
1445
1
  ccv_nnc_tensor_free(hda);
1446
1
  ccv_nnc_tensor_free(hdb);
1447
1
  ccv_nnc_tensor_free(hg);
1448
1
  ccv_nnc_tensor_free(tda);
1449
1
  ccv_nnc_tensor_free(tdb);
1450
1
}
1451
1452
TEST_CASE("mse sum loss forward")
1453
1
{
1454
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MSE_FORWARD, CCV_NNC_BACKEND_GPU_REF));
1455
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1456
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1457
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1458
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1459
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1460
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1461
1
  dsfmt_t dsfmt;
1462
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1463
1
  int i;
1464
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1465
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1466
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1467
1.00k
    hb->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1468
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(a, b), 0);
1469
1
  ccv_nnc_cmd_exec(CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_SUM), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
1470
1
  ccv_nnc_cmd_exec(CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_SUM), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
1471
1
  ccv_nnc_tensor_t* tc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1472
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(c), TENSOR_LIST(tc), 0);
1473
1
  REQUIRE_TENSOR_EQ(tc, hc, "GPU computed output should be the same as CPU computed ones");
1474
1
  ccv_nnc_tensor_free(a);
1475
1
  ccv_nnc_tensor_free(b);
1476
1
  ccv_nnc_tensor_free(c);
1477
1
  ccv_nnc_tensor_free(ha);
1478
1
  ccv_nnc_tensor_free(hb);
1479
1
  ccv_nnc_tensor_free(hc);
1480
1
  ccv_nnc_tensor_free(tc);
1481
1
}
1482
1483
TEST_CASE("mse sum loss backward")
1484
1
{
1485
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_MSE_FORWARD, CCV_NNC_BACKEND_GPU_REF) &&
1486
1
    ccv_nnc_cmd_ok(CCV_NNC_MSE_BACKWARD, CCV_NNC_BACKEND_GPU_REF));
1487
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1488
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1489
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1490
1
  ccv_nnc_tensor_t* da = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1491
1
  ccv_nnc_tensor_t* db = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 100), 0);
1492
1
  ccv_nnc_tensor_t* g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10), 0);
1493
1
  ccv_nnc_tensor_t* ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1494
1
  ccv_nnc_tensor_t* hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1495
1
  ccv_nnc_tensor_t* hc = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1496
1
  ccv_nnc_tensor_t* hda = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1497
1
  ccv_nnc_tensor_t* hdb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1498
1
  ccv_nnc_tensor_t* hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10), 0);
1499
1
  dsfmt_t dsfmt;
1500
1
  dsfmt_init_gen_rand(&dsfmt, 0);
1501
1
  int i;
1502
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1503
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1504
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
1505
1.00k
    hb->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
1506
11
  for (i = 0; i < 10; 
i++10
)
1507
10
    hg->data.f32[i] = 1;
1508
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb, hg), TENSOR_LIST(a, b, g), 0);
1509
1
  ccv_nnc_cmd_exec(CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_SUM), ccv_nnc_no_hint, 0, TENSOR_LIST(ha, hb), TENSOR_LIST(hc), 0);
1510
1
  ccv_nnc_cmd_exec(CMD_MSE_BACKWARD(CCV_NNC_MSE_REDUCE_SUM), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hda, hdb), 0);
1511
1
  ccv_nnc_cmd_exec(CMD_MSE_FORWARD(CCV_NNC_MSE_REDUCE_SUM), ccv_nnc_no_hint, 0, TENSOR_LIST(a, b), TENSOR_LIST(c), 0);
1512
1
  ccv_nnc_cmd_exec(CMD_MSE_BACKWARD(CCV_NNC_MSE_REDUCE_SUM), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(da, db), 0);
1513
1
  ccv_nnc_tensor_t* tda = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1514
1
  ccv_nnc_tensor_t* tdb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 100), 0);
1515
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(da, db), TENSOR_LIST(tda, tdb), 0);
1516
1
  REQUIRE_TENSOR_EQ(tda, hda, "GPU computed output should be the same as CPU computed ones");
1517
1
  REQUIRE_TENSOR_EQ(tdb, hdb, "GPU computed output should be the same as CPU computed ones");
1518
1
  ccv_nnc_tensor_free(a);
1519
1
  ccv_nnc_tensor_free(b);
1520
1
  ccv_nnc_tensor_free(c);
1521
1
  ccv_nnc_tensor_free(da);
1522
1
  ccv_nnc_tensor_free(db);
1523
1
  ccv_nnc_tensor_free(g);
1524
1
  ccv_nnc_tensor_free(ha);
1525
1
  ccv_nnc_tensor_free(hb);
1526
1
  ccv_nnc_tensor_free(hc);
1527
1
  ccv_nnc_tensor_free(hda);
1528
1
  ccv_nnc_tensor_free(hdb);
1529
1
  ccv_nnc_tensor_free(hg);
1530
1
  ccv_nnc_tensor_free(tda);
1531
1
  ccv_nnc_tensor_free(tdb);
1532
1
}
1533
1534
#include "case_main.h"