Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/reduce.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("reduce sum forward")
15
{
16
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_SUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_SUM_FORWARD, CCV_NNC_BACKEND_MPS));
17
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
18
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
19
  ha->data.f32[0] = 1;
20
  ha->data.f32[1] = 2;
21
  ha->data.f32[2] = 3;
22
  ha->data.f32[3] = 4;
23
  ha->data.f32[4] = 5;
24
  ha->data.f32[5] = 6;
25
  ccv_nnc_cmd_exec(CMD_REDUCE_SUM_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
26
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0);
27
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0);
28
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
29
  ccv_nnc_cmd_exec(CMD_REDUCE_SUM_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
30
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
31
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
32
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
33
  ccv_nnc_tensor_free(ha);
34
  ccv_nnc_tensor_free(hb);
35
  ccv_nnc_tensor_free(a);
36
  ccv_nnc_tensor_free(b);
37
  ccv_nnc_tensor_free(bt);
38
}
39
40
TEST_CASE("reduce sum forward noop")
41
1
{
42
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_SUM_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_SUM_FORWARD, CCV_NNC_BACKEND_MPS));
43
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0);
44
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0);
45
1
  ha->data.f32[0] = 1;
46
1
  ha->data.f32[1] = 2;
47
1
  ha->data.f32[2] = 3;
48
1
  ha->data.f32[3] = 4;
49
1
  ha->data.f32[4] = 5;
50
1
  ha->data.f32[5] = 6;
51
1
  ccv_nnc_cmd_exec(CMD_REDUCE_SUM_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
52
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0);
53
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0);
54
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
55
1
  ccv_nnc_cmd_exec(CMD_REDUCE_SUM_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
56
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0);
57
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
58
1
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
59
1
  ccv_nnc_tensor_free(ha);
60
1
  ccv_nnc_tensor_free(hb);
61
1
  ccv_nnc_tensor_free(a);
62
1
  ccv_nnc_tensor_free(b);
63
1
  ccv_nnc_tensor_free(bt);
64
1
}
65
66
TEST_CASE("reduce sum backward")
67
1
{
68
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_SUM_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN));
69
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
70
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
71
1
  hb->data.f32[0] = 1;
72
1
  hb->data.f32[1] = 2;
73
1
  hb->data.f32[2] = 3;
74
1
  ccv_nnc_cmd_exec(CMD_REDUCE_SUM_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(hb), TENSOR_LIST(ha), 0);
75
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0);
76
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0);
77
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hb), TENSOR_LIST(b), 0);
78
1
  ccv_nnc_cmd_exec(CMD_REDUCE_SUM_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(a), 0);
79
1
  ccv_nnc_tensor_t* const at = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
80
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(at), 0);
81
1
  REQUIRE_TENSOR_EQ(ha, at, "result should be equal");
82
1
  ccv_nnc_tensor_free(ha);
83
1
  ccv_nnc_tensor_free(hb);
84
1
  ccv_nnc_tensor_free(a);
85
1
  ccv_nnc_tensor_free(b);
86
1
  ccv_nnc_tensor_free(at);
87
1
}
88
89
TEST_CASE("reduce mean forward")
90
1
{
91
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_FORWARD, CCV_NNC_BACKEND_MPS));
92
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
93
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
94
1
  ha->data.f32[0] = 1;
95
1
  ha->data.f32[1] = 2;
96
1
  ha->data.f32[2] = 3;
97
1
  ha->data.f32[3] = 4;
98
1
  ha->data.f32[4] = 5;
99
1
  ha->data.f32[5] = 6;
100
1
  ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
101
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0);
102
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0);
103
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
104
1
  ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
105
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
106
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
107
1
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
108
1
  ccv_nnc_tensor_free(ha);
109
1
  ccv_nnc_tensor_free(hb);
110
1
  ccv_nnc_tensor_free(a);
111
1
  ccv_nnc_tensor_free(b);
112
1
  ccv_nnc_tensor_free(bt);
113
1
}
114
115
TEST_CASE("reduce mean forward noop")
116
1
{
117
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_FORWARD, CCV_NNC_BACKEND_MPS));
118
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0);
119
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0);
120
1
  ha->data.f32[0] = 1;
121
1
  ha->data.f32[1] = 2;
122
1
  ha->data.f32[2] = 3;
123
1
  ha->data.f32[3] = 4;
124
1
  ha->data.f32[4] = 5;
125
1
  ha->data.f32[5] = 6;
126
1
  ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
127
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0);
128
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0);
129
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
130
1
  ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
131
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0);
132
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
133
1
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
134
1
  ccv_nnc_tensor_free(ha);
135
1
  ccv_nnc_tensor_free(hb);
136
1
  ccv_nnc_tensor_free(a);
137
1
  ccv_nnc_tensor_free(b);
138
1
  ccv_nnc_tensor_free(bt);
139
1
}
140
141
TEST_CASE("reduce mean backward")
142
1
{
143
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MEAN_BACKWARD, CCV_NNC_BACKEND_MPS));
144
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
145
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
146
1
  hb->data.f32[0] = 1;
147
1
  hb->data.f32[1] = 2;
148
1
  hb->data.f32[2] = 3;
149
1
  ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(hb), TENSOR_LIST(ha), 0);
150
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0);
151
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0);
152
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hb), TENSOR_LIST(b), 0);
153
1
  ccv_nnc_cmd_exec(CMD_REDUCE_MEAN_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(a), 0);
154
1
  ccv_nnc_tensor_t* const at = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
155
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(at), 0);
156
1
  REQUIRE_TENSOR_EQ(ha, at, "result should be equal");
157
1
  ccv_nnc_tensor_free(ha);
158
1
  ccv_nnc_tensor_free(hb);
159
1
  ccv_nnc_tensor_free(a);
160
1
  ccv_nnc_tensor_free(b);
161
1
  ccv_nnc_tensor_free(at);
162
1
}
163
164
TEST_CASE("reduce max forward")
165
1
{
166
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MAX_FORWARD, CCV_NNC_BACKEND_MPS));
167
0
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
168
0
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
169
0
  ha->data.f32[0] = 1;
170
0
  ha->data.f32[1] = 2;
171
0
  ha->data.f32[2] = 3;
172
0
  ha->data.f32[3] = 4;
173
0
  ha->data.f32[4] = 5;
174
0
  ha->data.f32[5] = 6;
175
0
  ccv_nnc_cmd_exec(CMD_REDUCE_MAX_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
176
0
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0);
177
0
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0);
178
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
179
0
  ccv_nnc_cmd_exec(CMD_REDUCE_MAX_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
180
0
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
181
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
182
0
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
183
0
  ccv_nnc_tensor_free(ha);
184
0
  ccv_nnc_tensor_free(hb);
185
0
  ccv_nnc_tensor_free(a);
186
0
  ccv_nnc_tensor_free(b);
187
0
  ccv_nnc_tensor_free(bt);
188
0
}
189
190
TEST_CASE("reduce min forward")
191
1
{
192
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_MIN_FORWARD, CCV_NNC_BACKEND_MPS));
193
0
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
194
0
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
195
0
  ha->data.f32[0] = 1;
196
0
  ha->data.f32[1] = 2;
197
0
  ha->data.f32[2] = 3;
198
0
  ha->data.f32[3] = 4;
199
0
  ha->data.f32[4] = 5;
200
0
  ha->data.f32[5] = 6;
201
0
  ccv_nnc_cmd_exec(CMD_REDUCE_MIN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
202
0
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0);
203
0
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0);
204
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
205
0
  ccv_nnc_cmd_exec(CMD_REDUCE_MIN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
206
0
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
207
0
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
208
0
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
209
0
  ccv_nnc_tensor_free(ha);
210
0
  ccv_nnc_tensor_free(hb);
211
0
  ccv_nnc_tensor_free(a);
212
0
  ccv_nnc_tensor_free(b);
213
0
  ccv_nnc_tensor_free(bt);
214
0
}
215
216
TEST_CASE("argmin with float")
217
{
218
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ARGMIN_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ARGMIN_FORWARD, CCV_NNC_BACKEND_MPS));
219
  dsfmt_t dsfmt;
220
  dsfmt_init_gen_rand(&dsfmt, 0);
221
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 3, 5, 3), 0);
222
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 10, 1, 5, 3), 0);
223
  int i;
224
  for (i = 0; i < 10 * 3 * 5 * 3; i++)
225
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
226
  ccv_nnc_cmd_exec(CMD_ARGMIN_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
227
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 3, 5, 3), 0);
228
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 10, 1, 5, 3), 0);
229
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
230
  ccv_nnc_cmd_exec(CMD_ARGMIN_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
231
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 10, 1, 5, 3), 0);
232
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
233
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
234
  ccv_nnc_tensor_free(ha);
235
  ccv_nnc_tensor_free(hb);
236
  ccv_nnc_tensor_free(a);
237
  ccv_nnc_tensor_free(b);
238
  ccv_nnc_tensor_free(bt);
239
}
240
241
TEST_CASE("argmax with float")
242
1
{
243
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_ARGMAX_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_ARGMAX_FORWARD, CCV_NNC_BACKEND_MPS));
244
1
  dsfmt_t dsfmt;
245
1
  dsfmt_init_gen_rand(&dsfmt, 0);
246
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10, 3, 5, 3), 0);
247
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 10, 1, 5, 3), 0);
248
1
  int i;
249
451
  for (i = 0; i < 10 * 3 * 5 * 3; 
i++450
)
250
450
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
251
1
  ccv_nnc_cmd_exec(CMD_ARGMAX_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
252
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10, 3, 5, 3), 0);
253
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 10, 1, 5, 3), 0);
254
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
255
1
  ccv_nnc_cmd_exec(CMD_ARGMAX_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
256
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 10, 1, 5, 3), 0);
257
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
258
1
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
259
1
  ccv_nnc_tensor_free(ha);
260
1
  ccv_nnc_tensor_free(hb);
261
1
  ccv_nnc_tensor_free(a);
262
1
  ccv_nnc_tensor_free(b);
263
1
  ccv_nnc_tensor_free(bt);
264
1
}
265
266
TEST_CASE("reduce norm2 forward")
267
{
268
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_MPS));
269
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
270
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
271
  ha->data.f32[0] = 1;
272
  ha->data.f32[1] = 2;
273
  ha->data.f32[2] = 3;
274
  ha->data.f32[3] = 4;
275
  ha->data.f32[4] = 5;
276
  ha->data.f32[5] = 6;
277
  ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
278
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0);
279
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0);
280
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
281
  ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
282
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
283
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
284
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
285
  ccv_nnc_tensor_free(ha);
286
  ccv_nnc_tensor_free(hb);
287
  ccv_nnc_tensor_free(a);
288
  ccv_nnc_tensor_free(b);
289
  ccv_nnc_tensor_free(bt);
290
}
291
292
TEST_CASE("reduce norm2 forward noop")
293
1
{
294
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_MPS));
295
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0);
296
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0);
297
1
  ha->data.f32[0] = 1;
298
1
  ha->data.f32[1] = 2;
299
1
  ha->data.f32[2] = 3;
300
1
  ha->data.f32[3] = 4;
301
1
  ha->data.f32[4] = 5;
302
1
  ha->data.f32[5] = 6;
303
1
  ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
304
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0);
305
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 6, 1), 0);
306
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
307
1
  ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
308
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 6, 1), 0);
309
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
310
1
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
311
1
  ccv_nnc_tensor_free(ha);
312
1
  ccv_nnc_tensor_free(hb);
313
1
  ccv_nnc_tensor_free(a);
314
1
  ccv_nnc_tensor_free(b);
315
1
  ccv_nnc_tensor_free(bt);
316
1
}
317
318
TEST_CASE("reduce norm2 backward")
319
1
{
320
1
  GUARD_ELSE_RETURN((ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_MPS)) &&
321
1
    (ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_BACKWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_NORM2_FORWARD, CCV_NNC_BACKEND_MPS)));
322
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
323
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
324
1
  ha->data.f32[0] = 1;
325
1
  ha->data.f32[1] = 2;
326
1
  ha->data.f32[2] = 3;
327
1
  ha->data.f32[3] = 4;
328
1
  ha->data.f32[4] = 5;
329
1
  ha->data.f32[5] = 6;
330
1
  ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
331
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0);
332
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0);
333
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
334
1
  ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
335
1
  ccv_nnc_tensor_t* const hh = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
336
1
  ccv_nnc_tensor_t* const hg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 3), 0);
337
1
  hg->data.f32[0] = 1;
338
1
  hg->data.f32[1] = 2;
339
1
  hg->data.f32[2] = 3;
340
1
  ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(hg, ha, hb), TENSOR_LIST(hh), 0);
341
1
  ccv_nnc_tensor_t* const h = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0);
342
1
  ccv_nnc_tensor_t* const g = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 3), 0);
343
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(hg), TENSOR_LIST(g), 0);
344
1
  ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_BACKWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(g, a, b), TENSOR_LIST(h), 0);
345
1
  ccv_nnc_tensor_t* const ht = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
346
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(h), TENSOR_LIST(ht), 0);
347
1
  REQUIRE_TENSOR_EQ(hh, ht, "result should be equal");
348
1
  ccv_nnc_tensor_free(hh);
349
1
  ccv_nnc_tensor_free(hg);
350
1
  ccv_nnc_tensor_free(h);
351
1
  ccv_nnc_tensor_free(g);
352
1
  ccv_nnc_tensor_free(ha);
353
1
  ccv_nnc_tensor_free(hb);
354
1
  ccv_nnc_tensor_free(a);
355
1
  ccv_nnc_tensor_free(b);
356
1
  ccv_nnc_tensor_free(ht);
357
1
}
358
359
TEST_CASE("reduce isnan float")
360
1
{
361
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_ISNAN_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_ISNAN_FORWARD, CCV_NNC_BACKEND_MPS));
362
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
363
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 3), 0);
364
1
  ha->data.f32[0] = NAN;
365
1
  ha->data.f32[1] = 2;
366
1
  ha->data.f32[2] = 3;
367
1
  ha->data.f32[3] = 4;
368
1
  ha->data.f32[4] = 5;
369
1
  ha->data.f32[5] = 6;
370
1
  ccv_nnc_cmd_exec(CMD_REDUCE_ISNAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
371
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0);
372
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 3), 0);
373
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
374
1
  ccv_nnc_cmd_exec(CMD_REDUCE_ISNAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
375
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 3), 0);
376
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
377
1
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
378
1
  ccv_nnc_tensor_free(ha);
379
1
  ccv_nnc_tensor_free(hb);
380
1
  ccv_nnc_tensor_free(a);
381
1
  ccv_nnc_tensor_free(b);
382
1
  ccv_nnc_tensor_free(bt);
383
1
}
384
385
TEST_CASE("reduce isnan in half precision")
386
1
{
387
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_REDUCE_ISNAN_FORWARD, CCV_NNC_BACKEND_GPU_CUDNN) || ccv_nnc_cmd_ok(CCV_NNC_REDUCE_ISNAN_FORWARD, CCV_NNC_BACKEND_MPS));
388
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3), 0);
389
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 1), 0);
390
1
  ha->data.f32[0] = NAN;
391
1
  ha->data.f32[1] = 2;
392
1
  ha->data.f32[2] = 3;
393
1
  ha->data.f32[3] = 4;
394
1
  ha->data.f32[4] = 5;
395
1
  ha->data.f32[5] = 6;
396
1
  ccv_nnc_cmd_exec(CMD_REDUCE_ISNAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(hb), 0);
397
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 2, 3), 0);
398
1
  ccv_nnc_tensor_t* const a16 = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 2, 3), 0);
399
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 1), 0);
400
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
401
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(a16), 0);
402
1
  ccv_nnc_cmd_exec(CMD_REDUCE_ISNAN_FORWARD(0), ccv_nnc_no_hint, 0, TENSOR_LIST(a16), TENSOR_LIST(b), 0);
403
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 1), 0);
404
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b), TENSOR_LIST(bt), 0);
405
1
  REQUIRE_TENSOR_EQ(hb, bt, "result should be equal");
406
1
  ccv_nnc_tensor_free(ha);
407
1
  ccv_nnc_tensor_free(hb);
408
1
  ccv_nnc_tensor_free(a);
409
1
  ccv_nnc_tensor_free(a16);
410
1
  ccv_nnc_tensor_free(b);
411
1
  ccv_nnc_tensor_free(bt);
412
1
}
413
414
#include "case_main.h"