Coverage Report

Created: 2025-05-07 17:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/test/int/nnc/partition.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include <3rdparty/dsfmt/dSFMT.h>
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("partition a 1d tensor")
15
1
{
16
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
17
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10000), 0);
18
1
  dsfmt_t dsfmt;
19
1
  dsfmt_init_gen_rand(&dsfmt, 0);
20
1
  int i;
21
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
22
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
23
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
24
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 1), 0);
25
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 1), 0);
26
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 1), 0);
27
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10000), 0);
28
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
29
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 0, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
30
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
31
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 1), 0);
32
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 0, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
33
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
34
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
35
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
36
1
  ccv_nnc_tensor_free(a);
37
1
  ccv_nnc_tensor_free(b);
38
1
  ccv_nnc_tensor_free(indices);
39
1
  ccv_nnc_tensor_free(ha);
40
1
  ccv_nnc_tensor_free(hb);
41
1
  ccv_nnc_tensor_free(hindices);
42
1
  ccv_nnc_tensor_free(bt);
43
1
  ccv_nnc_tensor_free(indicest);
44
1
}
45
46
TEST_CASE("partition a 1d tensor, descending")
47
1
{
48
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
49
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 10000), 0);
50
1
  dsfmt_t dsfmt;
51
1
  dsfmt_init_gen_rand(&dsfmt, 0);
52
1
  int i;
53
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
54
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
55
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
56
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 1), 0);
57
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 1), 0);
58
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 1), 0);
59
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 10000), 0);
60
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
61
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 0, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
62
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
63
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 1), 0);
64
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 0, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
65
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
66
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
67
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
68
1
  ccv_nnc_tensor_free(a);
69
1
  ccv_nnc_tensor_free(b);
70
1
  ccv_nnc_tensor_free(indices);
71
1
  ccv_nnc_tensor_free(ha);
72
1
  ccv_nnc_tensor_free(hb);
73
1
  ccv_nnc_tensor_free(hindices);
74
1
  ccv_nnc_tensor_free(bt);
75
1
  ccv_nnc_tensor_free(indicest);
76
1
}
77
78
TEST_CASE("partition a 2d tensor, top 1, last axis")
79
1
{
80
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
81
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 100), 0);
82
1
  dsfmt_t dsfmt;
83
1
  dsfmt_init_gen_rand(&dsfmt, 0);
84
1
  int i;
85
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
86
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
87
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1), 0);
88
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 1), 0);
89
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
90
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1), 0);
91
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 100), 0);
92
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
93
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
94
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1), 0);
95
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
96
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
97
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
98
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
99
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
100
1
  ccv_nnc_tensor_free(a);
101
1
  ccv_nnc_tensor_free(b);
102
1
  ccv_nnc_tensor_free(indices);
103
1
  ccv_nnc_tensor_free(ha);
104
1
  ccv_nnc_tensor_free(hb);
105
1
  ccv_nnc_tensor_free(hindices);
106
1
  ccv_nnc_tensor_free(bt);
107
1
  ccv_nnc_tensor_free(indicest);
108
1
}
109
110
TEST_CASE("partition a 2d tensor, top 1, last axis, descending")
111
1
{
112
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
113
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 100), 0);
114
1
  dsfmt_t dsfmt;
115
1
  dsfmt_init_gen_rand(&dsfmt, 0);
116
1
  int i;
117
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
118
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
119
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1), 0);
120
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 1), 0);
121
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
122
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1), 0);
123
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 100), 0);
124
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
125
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
126
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1), 0);
127
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
128
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
129
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
130
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
131
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
132
1
  ccv_nnc_tensor_free(a);
133
1
  ccv_nnc_tensor_free(b);
134
1
  ccv_nnc_tensor_free(indices);
135
1
  ccv_nnc_tensor_free(ha);
136
1
  ccv_nnc_tensor_free(hb);
137
1
  ccv_nnc_tensor_free(hindices);
138
1
  ccv_nnc_tensor_free(bt);
139
1
  ccv_nnc_tensor_free(indicest);
140
1
}
141
142
TEST_CASE("partition a 2d tensor, top 2, last axis")
143
1
{
144
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
145
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 100), 0);
146
1
  dsfmt_t dsfmt;
147
1
  dsfmt_init_gen_rand(&dsfmt, 0);
148
1
  int i;
149
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
150
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
151
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2), 0);
152
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 2), 0);
153
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
154
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2), 0);
155
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 100), 0);
156
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
157
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
158
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2), 0);
159
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
160
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
161
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
162
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
163
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
164
1
  ccv_nnc_tensor_free(a);
165
1
  ccv_nnc_tensor_free(b);
166
1
  ccv_nnc_tensor_free(indices);
167
1
  ccv_nnc_tensor_free(ha);
168
1
  ccv_nnc_tensor_free(hb);
169
1
  ccv_nnc_tensor_free(hindices);
170
1
  ccv_nnc_tensor_free(bt);
171
1
  ccv_nnc_tensor_free(indicest);
172
1
}
173
174
TEST_CASE("partition a 2d tensor, top 2, last axis, descending")
175
1
{
176
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
177
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 100), 0);
178
1
  dsfmt_t dsfmt;
179
1
  dsfmt_init_gen_rand(&dsfmt, 0);
180
1
  int i;
181
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
182
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
183
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2), 0);
184
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 2), 0);
185
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
186
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2), 0);
187
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 100), 0);
188
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
189
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
190
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2), 0);
191
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
192
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
193
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
194
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
195
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
196
1
  ccv_nnc_tensor_free(a);
197
1
  ccv_nnc_tensor_free(b);
198
1
  ccv_nnc_tensor_free(indices);
199
1
  ccv_nnc_tensor_free(ha);
200
1
  ccv_nnc_tensor_free(hb);
201
1
  ccv_nnc_tensor_free(hindices);
202
1
  ccv_nnc_tensor_free(bt);
203
1
  ccv_nnc_tensor_free(indicest);
204
1
}
205
206
TEST_CASE("partition a 3d tensor, top 1, middle axis")
207
1
{
208
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
209
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 10, 10), 0);
210
1
  dsfmt_t dsfmt;
211
1
  dsfmt_init_gen_rand(&dsfmt, 0);
212
1
  int i;
213
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
214
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
215
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1, 10), 0);
216
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 1, 10), 0);
217
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
218
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1, 10), 0);
219
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 10, 10), 0);
220
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
221
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
222
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1, 10), 0);
223
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
224
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
225
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
226
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
227
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
228
1
  ccv_nnc_tensor_free(a);
229
1
  ccv_nnc_tensor_free(b);
230
1
  ccv_nnc_tensor_free(indices);
231
1
  ccv_nnc_tensor_free(ha);
232
1
  ccv_nnc_tensor_free(hb);
233
1
  ccv_nnc_tensor_free(hindices);
234
1
  ccv_nnc_tensor_free(bt);
235
1
  ccv_nnc_tensor_free(indicest);
236
1
}
237
238
TEST_CASE("partition a 3d tensor, top 1, middle axis, descending")
239
1
{
240
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
241
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 10, 10), 0);
242
1
  dsfmt_t dsfmt;
243
1
  dsfmt_init_gen_rand(&dsfmt, 0);
244
1
  int i;
245
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
246
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
247
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1, 10), 0);
248
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 1, 10), 0);
249
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
250
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1, 10), 0);
251
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 10, 10), 0);
252
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
253
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
254
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1, 10), 0);
255
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
256
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
257
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
258
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
259
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
260
1
  ccv_nnc_tensor_free(a);
261
1
  ccv_nnc_tensor_free(b);
262
1
  ccv_nnc_tensor_free(indices);
263
1
  ccv_nnc_tensor_free(ha);
264
1
  ccv_nnc_tensor_free(hb);
265
1
  ccv_nnc_tensor_free(hindices);
266
1
  ccv_nnc_tensor_free(bt);
267
1
  ccv_nnc_tensor_free(indicest);
268
1
}
269
270
TEST_CASE("partition a 3d tensor, top 2, middle axis")
271
1
{
272
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
273
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 10, 10), 0);
274
1
  dsfmt_t dsfmt;
275
1
  dsfmt_init_gen_rand(&dsfmt, 0);
276
1
  int i;
277
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
278
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
279
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2, 10), 0);
280
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 2, 10), 0);
281
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
282
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2, 10), 0);
283
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 10, 10), 0);
284
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
285
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
286
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2, 10), 0);
287
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
288
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
289
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
290
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
291
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
292
1
  ccv_nnc_tensor_free(a);
293
1
  ccv_nnc_tensor_free(b);
294
1
  ccv_nnc_tensor_free(indices);
295
1
  ccv_nnc_tensor_free(ha);
296
1
  ccv_nnc_tensor_free(hb);
297
1
  ccv_nnc_tensor_free(hindices);
298
1
  ccv_nnc_tensor_free(bt);
299
1
  ccv_nnc_tensor_free(indicest);
300
1
}
301
302
TEST_CASE("partition a 3d tensor, top 2, middle axis, descending")
303
1
{
304
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
305
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 10, 10), 0);
306
1
  dsfmt_t dsfmt;
307
1
  dsfmt_init_gen_rand(&dsfmt, 0);
308
1
  int i;
309
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
310
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
311
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2, 10), 0);
312
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 2, 10), 0);
313
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
314
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2, 10), 0);
315
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32F, 100, 10, 10), 0);
316
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
317
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
318
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2, 10), 0);
319
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
320
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
321
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
322
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
323
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
324
1
  ccv_nnc_tensor_free(a);
325
1
  ccv_nnc_tensor_free(b);
326
1
  ccv_nnc_tensor_free(indices);
327
1
  ccv_nnc_tensor_free(ha);
328
1
  ccv_nnc_tensor_free(hb);
329
1
  ccv_nnc_tensor_free(hindices);
330
1
  ccv_nnc_tensor_free(bt);
331
1
  ccv_nnc_tensor_free(indicest);
332
1
}
333
334
TEST_CASE("partition a 2d tensor, top 1, last axis, int")
335
1
{
336
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
337
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 100), 0);
338
1
  dsfmt_t dsfmt;
339
1
  dsfmt_init_gen_rand(&dsfmt, 0);
340
1
  int i;
341
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
342
10.0k
    ha->data.i32[i] = (int)dsfmt_genrand_uint32(&dsfmt) >> 8;
343
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
344
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1), 0);
345
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
346
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1), 0);
347
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 100), 0);
348
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
349
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
350
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
351
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
352
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
353
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
354
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
355
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
356
1
  ccv_nnc_tensor_free(a);
357
1
  ccv_nnc_tensor_free(b);
358
1
  ccv_nnc_tensor_free(indices);
359
1
  ccv_nnc_tensor_free(ha);
360
1
  ccv_nnc_tensor_free(hb);
361
1
  ccv_nnc_tensor_free(hindices);
362
1
  ccv_nnc_tensor_free(bt);
363
1
  ccv_nnc_tensor_free(indicest);
364
1
}
365
366
TEST_CASE("partition a 2d tensor, top 1, last axis, int, descending")
367
1
{
368
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
369
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 100), 0);
370
1
  dsfmt_t dsfmt;
371
1
  dsfmt_init_gen_rand(&dsfmt, 0);
372
1
  int i;
373
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
374
10.0k
    ha->data.i32[i] = (int)dsfmt_genrand_uint32(&dsfmt) >> 8;
375
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
376
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1), 0);
377
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
378
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1), 0);
379
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 100), 0);
380
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
381
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
382
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
383
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
384
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
385
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
386
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
387
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
388
1
  ccv_nnc_tensor_free(a);
389
1
  ccv_nnc_tensor_free(b);
390
1
  ccv_nnc_tensor_free(indices);
391
1
  ccv_nnc_tensor_free(ha);
392
1
  ccv_nnc_tensor_free(hb);
393
1
  ccv_nnc_tensor_free(hindices);
394
1
  ccv_nnc_tensor_free(bt);
395
1
  ccv_nnc_tensor_free(indicest);
396
1
}
397
398
TEST_CASE("partition a 2d tensor, top 2, last axis, int")
399
1
{
400
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
401
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 100), 0);
402
1
  dsfmt_t dsfmt;
403
1
  dsfmt_init_gen_rand(&dsfmt, 0);
404
1
  int i;
405
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
406
10.0k
    ha->data.i32[i] = (int)dsfmt_genrand_uint32(&dsfmt) >> 8;
407
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
408
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2), 0);
409
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
410
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2), 0);
411
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 100), 0);
412
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
413
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
414
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
415
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
416
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
417
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
418
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
419
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
420
1
  ccv_nnc_tensor_free(a);
421
1
  ccv_nnc_tensor_free(b);
422
1
  ccv_nnc_tensor_free(indices);
423
1
  ccv_nnc_tensor_free(ha);
424
1
  ccv_nnc_tensor_free(hb);
425
1
  ccv_nnc_tensor_free(hindices);
426
1
  ccv_nnc_tensor_free(bt);
427
1
  ccv_nnc_tensor_free(indicest);
428
1
}
429
430
TEST_CASE("partition a 2d tensor, top 2, last axis, int, descending")
431
1
{
432
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
433
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 100), 0);
434
1
  dsfmt_t dsfmt;
435
1
  dsfmt_init_gen_rand(&dsfmt, 0);
436
1
  int i;
437
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
438
10.0k
    ha->data.i32[i] = (int)dsfmt_genrand_uint32(&dsfmt) >> 8;
439
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
440
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2), 0);
441
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
442
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2), 0);
443
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 100), 0);
444
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
445
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
446
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
447
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
448
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
449
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
450
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
451
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
452
1
  ccv_nnc_tensor_free(a);
453
1
  ccv_nnc_tensor_free(b);
454
1
  ccv_nnc_tensor_free(indices);
455
1
  ccv_nnc_tensor_free(ha);
456
1
  ccv_nnc_tensor_free(hb);
457
1
  ccv_nnc_tensor_free(hindices);
458
1
  ccv_nnc_tensor_free(bt);
459
1
  ccv_nnc_tensor_free(indicest);
460
1
}
461
462
TEST_CASE("partition a 3d tensor, top 1, middle axis, int")
463
1
{
464
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
465
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 10, 10), 0);
466
1
  dsfmt_t dsfmt;
467
1
  dsfmt_init_gen_rand(&dsfmt, 0);
468
1
  int i;
469
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
470
10.0k
    ha->data.i32[i] = (int)dsfmt_genrand_uint32(&dsfmt) >> 8;
471
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
472
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1, 10), 0);
473
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
474
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1, 10), 0);
475
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 10, 10), 0);
476
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
477
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
478
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
479
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
480
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
481
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
482
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
483
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
484
1
  ccv_nnc_tensor_free(a);
485
1
  ccv_nnc_tensor_free(b);
486
1
  ccv_nnc_tensor_free(indices);
487
1
  ccv_nnc_tensor_free(ha);
488
1
  ccv_nnc_tensor_free(hb);
489
1
  ccv_nnc_tensor_free(hindices);
490
1
  ccv_nnc_tensor_free(bt);
491
1
  ccv_nnc_tensor_free(indicest);
492
1
}
493
494
TEST_CASE("partition a 3d tensor, top 1, middle axis, int, descending")
495
1
{
496
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
497
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 10, 10), 0);
498
1
  dsfmt_t dsfmt;
499
1
  dsfmt_init_gen_rand(&dsfmt, 0);
500
1
  int i;
501
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
502
10.0k
    ha->data.i32[i] = (int)dsfmt_genrand_uint32(&dsfmt) >> 8;
503
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
504
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1, 10), 0);
505
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
506
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1, 10), 0);
507
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 10, 10), 0);
508
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
509
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
510
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
511
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
512
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
513
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
514
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
515
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
516
1
  ccv_nnc_tensor_free(a);
517
1
  ccv_nnc_tensor_free(b);
518
1
  ccv_nnc_tensor_free(indices);
519
1
  ccv_nnc_tensor_free(ha);
520
1
  ccv_nnc_tensor_free(hb);
521
1
  ccv_nnc_tensor_free(hindices);
522
1
  ccv_nnc_tensor_free(bt);
523
1
  ccv_nnc_tensor_free(indicest);
524
1
}
525
526
TEST_CASE("partition a 3d tensor, top 2, middle axis, int")
527
1
{
528
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
529
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 10, 10), 0);
530
1
  dsfmt_t dsfmt;
531
1
  dsfmt_init_gen_rand(&dsfmt, 0);
532
1
  int i;
533
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
534
10.0k
    ha->data.i32[i] = (int)dsfmt_genrand_uint32(&dsfmt) >> 8;
535
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
536
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2, 10), 0);
537
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
538
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2, 10), 0);
539
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 10, 10), 0);
540
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
541
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
542
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
543
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
544
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
545
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
546
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
547
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
548
1
  ccv_nnc_tensor_free(a);
549
1
  ccv_nnc_tensor_free(b);
550
1
  ccv_nnc_tensor_free(indices);
551
1
  ccv_nnc_tensor_free(ha);
552
1
  ccv_nnc_tensor_free(hb);
553
1
  ccv_nnc_tensor_free(hindices);
554
1
  ccv_nnc_tensor_free(bt);
555
1
  ccv_nnc_tensor_free(indicest);
556
1
}
557
558
TEST_CASE("partition a 3d tensor, top 2, middle axis, int, descending")
559
1
{
560
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
561
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 10, 10), 0);
562
1
  dsfmt_t dsfmt;
563
1
  dsfmt_init_gen_rand(&dsfmt, 0);
564
1
  int i;
565
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
566
10.0k
    ha->data.i32[i] = (int)dsfmt_genrand_uint32(&dsfmt) >> 8;
567
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
568
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2, 10), 0);
569
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
570
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2, 10), 0);
571
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 10, 10), 0);
572
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(a), 0);
573
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
574
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
575
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
576
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
577
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
578
1
  REQUIRE_TENSOR_EQ(hb, bt, "should be equal");
579
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
580
1
  ccv_nnc_tensor_free(a);
581
1
  ccv_nnc_tensor_free(b);
582
1
  ccv_nnc_tensor_free(indices);
583
1
  ccv_nnc_tensor_free(ha);
584
1
  ccv_nnc_tensor_free(hb);
585
1
  ccv_nnc_tensor_free(hindices);
586
1
  ccv_nnc_tensor_free(bt);
587
1
  ccv_nnc_tensor_free(indicest);
588
1
}
589
590
TEST_CASE("partition a 2d tensor, top 1, last axis, half")
591
1
{
592
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
593
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 100), 0);
594
1
  dsfmt_t dsfmt;
595
1
  dsfmt_init_gen_rand(&dsfmt, 0);
596
1
  int i;
597
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
598
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
599
1
  ccv_nnc_tensor_t* const ha16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 100), 0);
600
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(ha16), 0);
601
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(ha), 0);
602
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 1), 0);
603
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 1), 0);
604
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
605
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1), 0);
606
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 100), 0);
607
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(a), 0);
608
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
609
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1), 0);
610
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
611
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
612
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
613
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
614
1
  ccv_nnc_tensor_free(a);
615
1
  ccv_nnc_tensor_free(b);
616
1
  ccv_nnc_tensor_free(indices);
617
1
  ccv_nnc_tensor_free(ha);
618
1
  ccv_nnc_tensor_free(ha16);
619
1
  ccv_nnc_tensor_free(hb);
620
1
  ccv_nnc_tensor_free(hindices);
621
1
  ccv_nnc_tensor_free(bt);
622
1
  ccv_nnc_tensor_free(indicest);
623
1
}
624
625
TEST_CASE("partition a 2d tensor, top 1, last axis, half, descending")
626
1
{
627
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
628
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 100), 0);
629
1
  dsfmt_t dsfmt;
630
1
  dsfmt_init_gen_rand(&dsfmt, 0);
631
1
  int i;
632
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
633
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
634
1
  ccv_nnc_tensor_t* const ha16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 100), 0);
635
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(ha16), 0);
636
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(ha), 0);
637
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 1), 0);
638
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 1), 0);
639
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
640
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1), 0);
641
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 100), 0);
642
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(a), 0);
643
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
644
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1), 0);
645
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1), 0);
646
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
647
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
648
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
649
1
  ccv_nnc_tensor_free(a);
650
1
  ccv_nnc_tensor_free(b);
651
1
  ccv_nnc_tensor_free(indices);
652
1
  ccv_nnc_tensor_free(ha);
653
1
  ccv_nnc_tensor_free(ha16);
654
1
  ccv_nnc_tensor_free(hb);
655
1
  ccv_nnc_tensor_free(hindices);
656
1
  ccv_nnc_tensor_free(bt);
657
1
  ccv_nnc_tensor_free(indicest);
658
1
}
659
660
TEST_CASE("partition a 2d tensor, top 2, last axis, half")
661
1
{
662
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
663
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 100), 0);
664
1
  dsfmt_t dsfmt;
665
1
  dsfmt_init_gen_rand(&dsfmt, 0);
666
1
  int i;
667
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
668
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
669
1
  ccv_nnc_tensor_t* const ha16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 100), 0);
670
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(ha16), 0);
671
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(ha), 0);
672
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 2), 0);
673
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 2), 0);
674
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
675
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2), 0);
676
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 100), 0);
677
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(a), 0);
678
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
679
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2), 0);
680
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
681
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
682
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
683
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
684
1
  ccv_nnc_tensor_free(a);
685
1
  ccv_nnc_tensor_free(b);
686
1
  ccv_nnc_tensor_free(indices);
687
1
  ccv_nnc_tensor_free(ha);
688
1
  ccv_nnc_tensor_free(ha16);
689
1
  ccv_nnc_tensor_free(hb);
690
1
  ccv_nnc_tensor_free(hindices);
691
1
  ccv_nnc_tensor_free(bt);
692
1
  ccv_nnc_tensor_free(indicest);
693
1
}
694
695
TEST_CASE("partition a 2d tensor, top 2, last axis, half, descending")
696
1
{
697
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
698
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 10), 0);
699
1
  dsfmt_t dsfmt;
700
1
  dsfmt_init_gen_rand(&dsfmt, 0);
701
1
  int i;
702
1.00k
  for (i = 0; i < 1000; 
i++1.00k
)
703
1.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt) + ((float)i / 500.0);
704
1
  ccv_nnc_tensor_t* const ha16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 10), 0);
705
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(ha16), 0);
706
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(ha), 0);
707
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 2), 0);
708
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 2), 0);
709
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
710
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2), 0);
711
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 10), 0);
712
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(a), 0);
713
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
714
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2), 0);
715
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2), 0);
716
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
717
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
718
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
719
1
  ccv_nnc_tensor_free(a);
720
1
  ccv_nnc_tensor_free(b);
721
1
  ccv_nnc_tensor_free(indices);
722
1
  ccv_nnc_tensor_free(ha);
723
1
  ccv_nnc_tensor_free(ha16);
724
1
  ccv_nnc_tensor_free(hb);
725
1
  ccv_nnc_tensor_free(hindices);
726
1
  ccv_nnc_tensor_free(bt);
727
1
  ccv_nnc_tensor_free(indicest);
728
1
}
729
730
TEST_CASE("partition a 3d tensor, top 1, middle axis, half")
731
1
{
732
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
733
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 10, 10), 0);
734
1
  dsfmt_t dsfmt;
735
1
  dsfmt_init_gen_rand(&dsfmt, 0);
736
1
  int i;
737
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
738
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
739
1
  ccv_nnc_tensor_t* const ha16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 10, 10), 0);
740
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(ha16), 0);
741
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(ha), 0);
742
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 1, 10), 0);
743
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 1, 10), 0);
744
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
745
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1, 10), 0);
746
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 10, 10), 0);
747
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(a), 0);
748
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
749
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1, 10), 0);
750
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
751
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
752
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
753
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
754
1
  ccv_nnc_tensor_free(a);
755
1
  ccv_nnc_tensor_free(b);
756
1
  ccv_nnc_tensor_free(indices);
757
1
  ccv_nnc_tensor_free(ha);
758
1
  ccv_nnc_tensor_free(ha16);
759
1
  ccv_nnc_tensor_free(hb);
760
1
  ccv_nnc_tensor_free(hindices);
761
1
  ccv_nnc_tensor_free(bt);
762
1
  ccv_nnc_tensor_free(indicest);
763
1
}
764
765
TEST_CASE("partition a 3d tensor, top 1, middle axis, half, descending")
766
1
{
767
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
768
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 3, 10), 0);
769
1
  dsfmt_t dsfmt;
770
1
  dsfmt_init_gen_rand(&dsfmt, 0);
771
1
  int i;
772
3.00k
  for (i = 0; i < 3000; 
i++3.00k
)
773
3.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
774
1
  ccv_nnc_tensor_t* const ha16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 3, 10), 0);
775
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(ha16), 0);
776
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(ha), 0);
777
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 1, 10), 0);
778
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 1, 10), 0);
779
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
780
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 1, 10), 0);
781
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 3, 10), 0);
782
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(a), 0);
783
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
784
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 1, 10), 0);
785
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 1, 10), 0);
786
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(1, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
787
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
788
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
789
1
  ccv_nnc_tensor_free(a);
790
1
  ccv_nnc_tensor_free(b);
791
1
  ccv_nnc_tensor_free(indices);
792
1
  ccv_nnc_tensor_free(ha);
793
1
  ccv_nnc_tensor_free(ha16);
794
1
  ccv_nnc_tensor_free(hb);
795
1
  ccv_nnc_tensor_free(hindices);
796
1
  ccv_nnc_tensor_free(bt);
797
1
  ccv_nnc_tensor_free(indicest);
798
1
}
799
800
TEST_CASE("partition a 3d tensor, top 2, middle axis, half")
801
1
{
802
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
803
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 10, 10), 0);
804
1
  dsfmt_t dsfmt;
805
1
  dsfmt_init_gen_rand(&dsfmt, 0);
806
1
  int i;
807
10.0k
  for (i = 0; i < 10000; 
i++10.0k
)
808
10.0k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
809
1
  ccv_nnc_tensor_t* const ha16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 10, 10), 0);
810
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(ha16), 0);
811
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(ha), 0);
812
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 2, 10), 0);
813
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 2, 10), 0);
814
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
815
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2, 10), 0);
816
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 10, 10), 0);
817
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(a), 0);
818
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
819
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2, 10), 0);
820
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
821
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 0), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
822
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
823
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
824
1
  ccv_nnc_tensor_free(a);
825
1
  ccv_nnc_tensor_free(b);
826
1
  ccv_nnc_tensor_free(indices);
827
1
  ccv_nnc_tensor_free(ha);
828
1
  ccv_nnc_tensor_free(ha16);
829
1
  ccv_nnc_tensor_free(hb);
830
1
  ccv_nnc_tensor_free(hindices);
831
1
  ccv_nnc_tensor_free(bt);
832
1
  ccv_nnc_tensor_free(indicest);
833
1
}
834
835
TEST_CASE("partition a 3d tensor, top 2, middle axis, half, descending")
836
1
{
837
1
  GUARD_ELSE_RETURN(ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_GPU_REF) || ccv_nnc_cmd_ok(CCV_NNC_PARTITION_FORWARD, CCV_NNC_BACKEND_MPS));
838
1
  ccv_nnc_tensor_t* const ha = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 3, 10), 0);
839
1
  dsfmt_t dsfmt;
840
1
  dsfmt_init_gen_rand(&dsfmt, 0);
841
1
  int i;
842
3.00k
  for (i = 0; i < 3000; 
i++3.00k
)
843
3.00k
    ha->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
844
1
  ccv_nnc_tensor_t* const ha16 = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 3, 10), 0);
845
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(ha16), 0);
846
1
  ccv_nnc_cmd_exec(CMD_DATATYPE_CONVERSION_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(ha), 0);
847
1
  ccv_nnc_tensor_t* const hb = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(16F, 100, 2, 10), 0);
848
1
  ccv_nnc_tensor_t* const b = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 2, 10), 0);
849
1
  ccv_nnc_tensor_t* const hindices = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
850
1
  ccv_nnc_tensor_t* const indices = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 32S, 100, 2, 10), 0);
851
1
  ccv_nnc_tensor_t* const a = ccv_nnc_tensor_new(0, GPU_TENSOR_NHWC(000, 16F, 100, 3, 10), 0);
852
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(ha16), TENSOR_LIST(a), 0);
853
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(a), TENSOR_LIST(b, indices), 0);
854
1
  ccv_nnc_tensor_t* const bt = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 100, 2, 10), 0);
855
1
  ccv_nnc_tensor_t* const indicest = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, 100, 2, 10), 0);
856
1
  ccv_nnc_cmd_exec(CMD_PARTITION_FORWARD(2, 1, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(ha), TENSOR_LIST(bt, indicest), 0);
857
1
  ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(b, indices), TENSOR_LIST(hb, hindices), 0);
858
1
  REQUIRE_TENSOR_EQ(hindices, indicest, "should be equal");
859
1
  ccv_nnc_tensor_free(a);
860
1
  ccv_nnc_tensor_free(b);
861
1
  ccv_nnc_tensor_free(indices);
862
1
  ccv_nnc_tensor_free(ha);
863
1
  ccv_nnc_tensor_free(ha16);
864
1
  ccv_nnc_tensor_free(hb);
865
1
  ccv_nnc_tensor_free(hindices);
866
1
  ccv_nnc_tensor_free(bt);
867
1
  ccv_nnc_tensor_free(indicest);
868
1
}
869
870
#include "case_main.h"