Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/test/unit/nnc/forward.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include "3rdparty/dsfmt/dSFMT.h"
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("convolutional network of 11x11 on 225x185 with uniform weights")
15
1
{
16
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 225, 185, 3), 0);
17
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 45, 4), 0);
18
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 11, 11, 3);
19
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
20
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 11, 11, 3), 0);
21
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
22
  // configure the inlets.
23
1
  int i;
24
1.45k
  for (i = 0; i < 11 * 11 * 3 * 4; 
i++1.45k
)
25
1.45k
    w->data.f32[i] = 1;
26
124k
  for (i = 0; i < 225 * 185 * 3; 
i++124k
)
27
124k
    a->data.f32[i] = 1;
28
5
  for (i = 0; i < 4; 
i++4
)
29
4
    bias->data.f32[i] = 0;
30
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
31
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(55, 45, CCV_32F | 4, 0, 0);
32
1
  int x, y;
33
56
  for (y = 0; y < 55; 
y++55
)
34
2.53k
    
for (x = 0; 55
x < 45;
x++2.47k
)
35
12.3k
      
for (i = 0; 2.47k
i < 4;
i++9.90k
)
36
9.90k
      c->data.f32[(y * 45 + x) * 4 + i] = ((x == 0 && 
y == 0220
) ||
(9.89k
x == 09.89k
&&
y == 54216
) ||
(9.89k
x == 449.89k
&&
y == 0220
) ||
(9.88k
x == 449.88k
&&
y == 54216
)) ?
30016
:
(9.88k
(9.88k
x == 09.88k
||
y == 09.67k
||
x == 449.50k
||
y == 549.28k
) ?
330768
:
3639.11k
);
37
1
  REQUIRE_MATRIX_EQ(b, c, "55x45 matrix should be exactly a matrix fill 363, with 300 on the corner and 330 on the border");
38
1
  ccv_matrix_free(c);
39
1
  ccv_nnc_tensor_free(bias);
40
1
  ccv_nnc_tensor_free(w);
41
1
  ccv_nnc_tensor_free(b);
42
1
  ccv_nnc_tensor_free(a);
43
1
}
44
45
TEST_CASE("convolutional network of 5x3 on 17x27 with uniform weights")
46
1
{
47
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 17, 27, 1), 0);
48
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 17, 27, 4), 0);
49
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 5, 3, 1);
50
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
51
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 5, 3, 1), 0);
52
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
53
  // configure the inlets.
54
1
  int i;
55
61
  for (i = 0; i < 5 * 3 * 4; 
i++60
)
56
60
    w->data.f32[i] = 1;
57
460
  for (i = 0; i < 17 * 27; 
i++459
)
58
459
    a->data.f32[i] = 1;
59
5
  for (i = 0; i < 4; 
i++4
)
60
4
    bias->data.f32[i] = 0;
61
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
62
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(17, 27, CCV_32F | 4, 0, 0);
63
1
  int x, y;
64
18
  for (y = 0; y < 17; 
y++17
)
65
476
    
for (x = 0; 17
x < 27;
x++459
)
66
2.29k
      
for (i = 0; 459
i < 4;
i++1.83k
)
67
1.83k
      {
68
1.83k
        if ((x == 0 && 
y == 068
) ||
(1.83k
x == 01.83k
&&
y == 1664
) ||
(1.82k
x == 261.82k
&&
y == 068
) ||
(1.82k
x == 261.82k
&&
y == 1664
))
69
16
          c->data.f32[(y * 27 + x) * 4 + i] = 6;
70
1.82k
        else if ((x == 0 && 
y == 160
) ||
(1.81k
x == 261.81k
&&
y == 160
) ||
(1.81k
x == 01.81k
&&
y == 1556
) ||
(1.80k
x == 261.80k
&&
y == 1556
))
71
16
          c->data.f32[(y * 27 + x) * 4 + i] = 8;
72
1.80k
        else if (y == 0 || 
y == 161.70k
)
73
200
          c->data.f32[(y * 27 + x) * 4 + i] = 9;
74
1.60k
        else if (x == 0 || 
x == 261.55k
)
75
104
          c->data.f32[(y * 27 + x) * 4 + i] = 10;
76
1.50k
        else if (y == 1 || 
y == 151.40k
)
77
200
          c->data.f32[(y * 27 + x) * 4 + i] = 12;
78
1.30k
        else
79
1.30k
          c->data.f32[(y * 27 + x) * 4 + i] = 15;
80
1.83k
      }
81
1
  REQUIRE_MATRIX_EQ(b, c, "17x27 matrix should be exactly a matrix fill 15, with 6, 8 on the corner and 9, 10, 12 on the border");
82
1
  ccv_matrix_free(c);
83
1
  ccv_nnc_tensor_free(bias);
84
1
  ccv_nnc_tensor_free(w);
85
1
  ccv_nnc_tensor_free(b);
86
1
  ccv_nnc_tensor_free(a);
87
1
}
88
89
TEST_CASE("convolutional network of 11x11 on 225x185 with non-uniform weights")
90
1
{
91
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 225, 185, 1), 0);
92
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 45, 4), 0);
93
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 11, 11, 1);
94
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
95
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 11, 11, 1), 0);
96
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
97
  // configure the inlets.
98
1
  int i, x, y;
99
5
  for (x = 0; x < 4; 
x++4
)
100
488
    
for (i = 0; 4
i < 11 * 11;
i++484
)
101
484
      w->data.f32[x * 11 * 11 + i] = i + 1;
102
41.6k
  for (i = 0; i < 225 * 185; 
i++41.6k
)
103
41.6k
    a->data.f32[i] = i + 1;
104
5
  for (i = 0; i < 4; 
i++4
)
105
4
    bias->data.f32[i] = 0;
106
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
107
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(55, 45, CCV_32F | 4, 0, 0);
108
1
  float sum = 0;
109
  // first column
110
11
  for (y = 0; y < 10; 
y++10
)
111
110
    
for (x = 0; 10
x < 10;
x++100
)
112
100
      sum += ((y + 1) * 11 + x + 2) * (y * 185 + x + 1);
113
5
  for (i = 0; i < 4; 
i++4
)
114
4
    c->data.f32[i] = sum;
115
1
  sum = 0;
116
11
  for (y = 0; y < 10; 
y++10
)
117
120
    
for (x = 0; 10
x < 11;
x++110
)
118
110
      sum += ((y + 1) * 11 + x + 1) * (y * 185 + (x + 3) + 1);
119
44
  for (x = 1; x < 44; 
x++43
)
120
215
    
for (i = 0; 43
i < 4;
i++172
)
121
172
      c->data.f32[x * 4 + i] = sum + (x - 1) * 4 * (11 * 11 + 12) * 11 * 10 / 2;
122
1
  sum = 0;
123
11
  for (y = 0; y < 10; 
y++10
)
124
110
    
for (x = 0; 10
x < 10;
x++100
)
125
100
      sum += ((y + 1) * 11 + x + 1) * (y * 185 + (x + 175) + 1);
126
5
  for (i = 0; i < 4; 
i++4
)
127
4
    c->data.f32[44 * 4 + i] = sum;
128
  // last column
129
1
  sum = 0;
130
11
  for (y = 0; y < 10; 
y++10
)
131
110
    
for (x = 0; 10
x < 10;
x++100
)
132
100
      sum += (y * 11 + x + 2) * ((y + 215) * 185 + x + 1);
133
5
  for (i = 0; i < 4; 
i++4
)
134
4
    c->data.f32[54 * 45 * 4 + i] = sum;
135
1
  sum = 0;
136
11
  for (y = 0; y < 10; 
y++10
)
137
120
    
for (x = 0; 10
x < 11;
x++110
)
138
110
      sum += (y * 11 + x + 1) * ((y + 215) * 185 + (x + 3) + 1);
139
44
  for (x = 1; x < 44; 
x++43
)
140
215
    
for (i = 0; 43
i < 4;
i++172
)
141
172
      c->data.f32[(54 * 45 + x) * 4 + i] = sum + (x - 1) * 4 * (10 * 11 + 1) * 11 * 10 / 2;
142
1
  sum = 0;
143
11
  for (y = 0; y < 10; 
y++10
)
144
110
    
for (x = 0; 10
x < 10;
x++100
)
145
100
      sum += (y * 11 + x + 1) * ((y + 215) * 185 + (x + 175) + 1);
146
5
  for (i = 0; i < 4; 
i++4
)
147
4
    c->data.f32[(54 * 45 + 44) * 4 + i] = sum;
148
1
  float border[] = {
149
1
    0, 0
150
1
  };
151
12
  for (y = 0; y < 11; 
y++11
)
152
121
    
for (x = 0; 11
x < 10;
x++110
)
153
110
      border[0] += (y * 11 + x + 2) * ((y + 3) * 185 + x + 1);
154
12
  for (y = 0; y < 11; 
y++11
)
155
121
    
for (x = 0; 11
x < 10;
x++110
)
156
110
      border[1] += (y * 11 + x + 1) * ((y + 3) * 185 + (x + 175) + 1);
157
1
  sum = 0;
158
12
  for (y = 0; y < 11; 
y++11
)
159
132
    
for (x = 0; 11
x < 11;
x++121
)
160
121
      sum += (y * 11 + x + 1) * ((y + 3) * 185 + (x + 3) + 1);
161
54
  for (y = 1; y < 54; 
y++53
)
162
53
  {
163
265
    for (i = 0; i < 4; 
i++212
)
164
212
      c->data.f32[y * 45 * 4 + i] = border[0];
165
2.33k
    for (x = 1; x < 44; 
x++2.27k
)
166
11.3k
      
for (i = 0; 2.27k
i < 4;
i++9.11k
)
167
9.11k
        c->data.f32[(y * 45 + x) * 4 + i] = sum + (x - 1) * 4 * (11 * 11 + 1) * 11 * 11 / 2;
168
265
    for (i = 0; i < 4; 
i++212
)
169
212
      c->data.f32[(y * 45 + 44) * 4 + i] = border[1];
170
53
    sum += 185 * 4 * (11 * 11 + 1) * 11 * 11 / 2;
171
53
    border[0] += 185 * 4 * ((11 * 11 + 1) * 11 * 11 / 2 - (10 * 11 + 1 + 1) * 11 / 2);
172
53
    border[1] += 185 * 4 * ((11 * 11 + 1) * 11 * 11 / 2 - (11 * 11 + 11) * 11 / 2);
173
53
  }
174
  // regularize the output so it is within the tolerance
175
9.90k
  for (i = 0; i < 55 * 45 * 4; 
i++9.90k
)
176
9.90k
    c->data.f32[i] = c->data.f32[i] * 1e-7, b->data.f32[i] = b->data.f32[i] * 1e-7;
177
1
  REQUIRE_MATRIX_EQ(b, c, "55x55 matrix should be exactly the same");
178
1
  ccv_matrix_free(c);
179
1
  ccv_nnc_tensor_free(bias);
180
1
  ccv_nnc_tensor_free(w);
181
1
  ccv_nnc_tensor_free(b);
182
1
  ccv_nnc_tensor_free(a);
183
1
}
184
185
TEST_CASE("convolutional network of 3x5 on 27x27 with non-uniform weights")
186
1
{
187
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
188
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 4), 0);
189
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 3, 5, 1);
190
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
191
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 3, 5, 1), 0);
192
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
193
  // configure the inlets.
194
1
  int i, x, y;
195
5
  for (x = 0; x < 4; 
x++4
)
196
64
    
for (i = 0; 4
i < 3 * 5;
i++60
)
197
60
      w->data.f32[x * 3 * 5 + i] = i + 1;
198
730
  for (i = 0; i < 27 * 27; 
i++729
)
199
729
    a->data.f32[i] = i + 1;
200
5
  for (i = 0; i < 4; 
i++4
)
201
4
    bias->data.f32[i] = 0;
202
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
203
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | 4, 0, 0);
204
  // the first column
205
1
  float sum = 0;
206
3
  for (y = 0; y < 2; 
y++2
)
207
8
    
for (x = 0; 2
x < 3;
x++6
)
208
6
      sum += ((y + 1) * 5 + x + 3) * (y * 27 + x + 1);
209
5
  for (i = 0; i < 4; 
i++4
)
210
4
    c->data.f32[i] = sum;
211
1
  sum = 0;
212
3
  for (y = 0; y < 2; 
y++2
)
213
10
    
for (x = 0; 2
x < 4;
x++8
)
214
8
      sum += ((y + 1) * 5 + x + 2) * (y * 27 + x + 1);
215
5
  for (i = 0; i < 4; 
i++4
)
216
4
    c->data.f32[4 + i] = sum;
217
1
  sum = 0;
218
3
  for (y = 0; y < 2; 
y++2
)
219
12
    
for (x = 0; 2
x < 5;
x++10
)
220
10
      sum += ((y + 1) * 5 + x + 1) * (y * 27 + x + 1);
221
24
  for (x = 2; x < 25; 
x++23
)
222
115
    
for (i = 0; 23
i < 4;
i++92
)
223
92
      c->data.f32[x * 4 + i] = sum + (x - 2) * 21 * 10 / 2;
224
1
  sum = 0;
225
3
  for (y = 0; y < 2; 
y++2
)
226
10
    
for (x = 0; 2
x < 4;
x++8
)
227
8
      sum += ((y + 1) * 5 + x + 1) * (y * 27 + x + 24);
228
5
  for (i = 0; i < 4; 
i++4
)
229
4
    c->data.f32[25 * 4 + i] = sum;
230
1
  sum = 0;
231
3
  for (y = 0; y < 2; 
y++2
)
232
8
    
for (x = 0; 2
x < 3;
x++6
)
233
6
      sum += ((y + 1) * 5 + x + 1) * (y * 27 + x + 25);
234
5
  for (i = 0; i < 4; 
i++4
)
235
4
    c->data.f32[26 * 4 + i] = sum;
236
  // the last column
237
1
  sum = 0;
238
3
  for (y = 0; y < 2; 
y++2
)
239
8
    
for (x = 0; 2
x < 3;
x++6
)
240
6
      sum += (y * 5 + x + 3) * ((y + 25) * 27 + x + 1);
241
5
  for (i = 0; i < 4; 
i++4
)
242
4
    c->data.f32[27 * 26 * 4 + i] = sum;
243
1
  sum = 0;
244
3
  for (y = 0; y < 2; 
y++2
)
245
10
    
for (x = 0; 2
x < 4;
x++8
)
246
8
      sum += (y * 5 + x + 2) * ((y + 25) * 27 + x + 1);
247
5
  for (i = 0; i < 4; 
i++4
)
248
4
    c->data.f32[(27 * 26 + 1) * 4 + i] = sum;
249
1
  sum = 0;
250
3
  for (y = 0; y < 2; 
y++2
)
251
12
    
for (x = 0; 2
x < 5;
x++10
)
252
10
      sum += (y * 5 + x + 1) * ((y + 25) * 27 + x + 1);
253
24
  for (x = 2; x < 25; 
x++23
)
254
115
    
for (i = 0; 23
i < 4;
i++92
)
255
92
      c->data.f32[(27 * 26 + x) * 4 + i] = sum + (x - 2) * 11 * 10 / 2;
256
1
  sum = 0;
257
3
  for (y = 0; y < 2; 
y++2
)
258
10
    
for (x = 0; 2
x < 4;
x++8
)
259
8
      sum += (y * 5 + x + 1) * ((y + 25) * 27 + x + 24);
260
5
  for (i = 0; i < 4; 
i++4
)
261
4
    c->data.f32[(27 * 26 + 25) * 4 + i] = sum;
262
1
  sum = 0;
263
3
  for (y = 0; y < 2; 
y++2
)
264
8
    
for (x = 0; 2
x < 3;
x++6
)
265
6
      sum += (y * 5 + x + 1) * ((y + 25) * 27 + x + 25);
266
5
  for (i = 0; i < 4; 
i++4
)
267
4
    c->data.f32[(27 * 26 + 26) * 4 + i] = sum;
268
1
  float border[] = {
269
1
    0, 0, 0, 0
270
1
  };
271
4
  for (y = 0; y < 3; 
y++3
)
272
12
    
for (x = 0; 3
x < 3;
x++9
)
273
9
      border[0] += (y * 5 + x + 3) * (y * 27 + x + 1);
274
4
  for (y = 0; y < 3; 
y++3
)
275
15
    
for (x = 0; 3
x < 4;
x++12
)
276
12
      border[1] += (y * 5 + x + 2) * (y * 27 + x + 1);
277
4
  for (y = 0; y < 3; 
y++3
)
278
15
    
for (x = 0; 3
x < 4;
x++12
)
279
12
      border[2] += (y * 5 + x + 1) * (y * 27 + x + 24);
280
4
  for (y = 0; y < 3; 
y++3
)
281
12
    
for (x = 0; 3
x < 3;
x++9
)
282
9
      border[3] += (y * 5 + x + 1) * (y * 27 + x + 25);
283
1
  sum = 0;
284
4
  for (y = 0; y < 3; 
y++3
)
285
18
    
for (x = 0; 3
x < 5;
x++15
)
286
15
      sum += (y * 5 + x + 1) * (y * 27 + x + 1);
287
26
  for (y = 1; y < 26; 
y++25
)
288
25
  {
289
125
    for (i = 0; i < 4; 
i++100
)
290
100
    {
291
100
      c->data.f32[y * 27 * 4 + i] = border[0] + (y - 1) * 27 * (3 + 4 + 5 + 8 + 9 + 10 + 13 + 14 + 15);
292
100
      c->data.f32[(y * 27 + 1) * 4 + i] = border[1] + (y - 1) * 27 * (2 + 3 + 4 + 5 + 7 + 8 + 9 + 10 + 12 + 13 + 14 + 15);
293
2.40k
      for (x = 2; x < 25; 
x++2.30k
)
294
2.30k
        c->data.f32[(y * 27 + x) * 4 + i] = sum + ((y - 1) * 27 + x - 2) * 16 * 15 / 2;
295
100
      c->data.f32[(y * 27 + 25) * 4 + i] = border[2] + (y - 1) * 27 * (1 + 2 + 3 + 4 + 6 + 7 + 8 + 9 + 11 + 12 + 13 + 14);
296
100
      c->data.f32[(y * 27 + 26) * 4 + i] = border[3] + (y - 1) * 27 * (1 + 2 + 3 + 6 + 7 + 8 + 11 + 12 + 13);
297
100
    }
298
25
  }
299
1
  REQUIRE_MATRIX_EQ(b, c, "27x27 matrix should be exactly the same");
300
1
  ccv_matrix_free(c);
301
1
  ccv_nnc_tensor_free(bias);
302
1
  ccv_nnc_tensor_free(w);
303
1
  ccv_nnc_tensor_free(b);
304
1
  ccv_nnc_tensor_free(a);
305
1
}
306
307
TEST_CASE("convolution with no bias")
308
1
{
309
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
310
1
  ccv_nnc_tensor_t* bg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 4), 0);
311
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 3, 5, 1);
312
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, bg->info);
313
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 3, 5, 1), 0);
314
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
315
1
  dsfmt_t dsfmt;
316
1
  int i;
317
1
  dsfmt_init_gen_rand(&dsfmt, 1);
318
730
  for (i = 0; i < 27 * 27; 
i++729
)
319
729
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
320
61
  for (i = 0; i < 4 * 3 * 5; 
i++60
)
321
60
    w->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
322
5
  for (i = 0; i < 4; 
i++4
)
323
4
    bias->data.f32[i] = 0;
324
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(bg), 0);
325
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 4), 0);
326
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w), TENSOR_LIST(b), 0);
327
1
  REQUIRE_MATRIX_EQ(b, bg, "convolution with no bias should equal to with bias = 0");
328
1
  ccv_nnc_tensor_free(a);
329
1
  ccv_nnc_tensor_free(b);
330
1
  ccv_nnc_tensor_free(bg);
331
1
  ccv_nnc_tensor_free(w);
332
1
  ccv_nnc_tensor_free(bias);
333
1
}
334
335
TEST_CASE("maximum pool network of 55x55 with window of 3x3 and stride of 2")
336
1
{
337
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 55, 1), 0);
338
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
339
1
  ccv_nnc_cmd_t cmd = CMD_MAX_POOL_FORWARD(3, 3);
340
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
341
  // configure the inlets.
342
1
  int i;
343
3.02k
  for (i = 0; i < 55 * 55; 
i++3.02k
)
344
3.02k
    a->data.f32[i] = i + 1;
345
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
346
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0);
347
1
  int x, y;
348
28
  for (y = 0; y < 27; 
y++27
)
349
756
    
for (x = 0; 27
x < 27;
x++729
)
350
729
      c->data.f32[y * 27 + x] = 113 + y * 110 + x * 2;
351
1
  REQUIRE_MATRIX_EQ(b, c, "max pool network output should be exactly the same");
352
1
  ccv_matrix_free(c);
353
1
  ccv_nnc_tensor_free(b);
354
1
  ccv_nnc_tensor_free(a);
355
1
}
356
357
TEST_CASE("maximum pool network of 57x57 with window of 3x3 and stride of 3")
358
1
{
359
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 57, 57, 1), 0);
360
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 19, 19, 1), 0);
361
1
  ccv_nnc_cmd_t cmd = CMD_MAX_POOL_FORWARD(3, 3);
362
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
363
  // configure the inlets.
364
1
  int i;
365
3.25k
  for (i = 0; i < 57 * 57; 
i++3.24k
)
366
3.24k
    a->data.f32[i] = i + 1;
367
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
368
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(19, 19, CCV_32F | CCV_C1, 0, 0);
369
1
  int x, y;
370
20
  for (y = 0; y < 19; 
y++19
)
371
380
    
for (x = 0; 19
x < 19;
x++361
)
372
361
      c->data.f32[y * 19 + x] = 117 + y * 171 + x * 3;
373
1
  REQUIRE_MATRIX_EQ(b, c, "max pool network output should be exactly the same");
374
1
  ccv_matrix_free(c);
375
1
  ccv_nnc_tensor_free(b);
376
1
  ccv_nnc_tensor_free(a);
377
1
}
378
379
TEST_CASE("maximum pool network of 54x54 with window of 2x2 and stride of 2")
380
1
{
381
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 54, 54, 1), 0);
382
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
383
1
  ccv_nnc_cmd_t cmd = CMD_MAX_POOL_FORWARD(2, 2);
384
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
385
  // configure the inlets.
386
1
  int i;
387
2.91k
  for (i = 0; i < 54 * 54; 
i++2.91k
)
388
2.91k
    a->data.f32[i] = i + 1;
389
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
390
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0);
391
1
  int x, y;
392
28
  for (y = 0; y < 27; 
y++27
)
393
756
    
for (x = 0; 27
x < 27;
x++729
)
394
729
      c->data.f32[y * 27 + x] = 56 + y * 108 + x * 2;
395
1
  REQUIRE_MATRIX_EQ(b, c, "max pool network output should be exactly the same");
396
1
  ccv_matrix_free(c);
397
1
  ccv_nnc_tensor_free(b);
398
1
  ccv_nnc_tensor_free(a);
399
1
}
400
401
TEST_CASE("average pool network of 55x55 with window of 3x3 and stride of 2")
402
1
{
403
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 55, 1), 0);
404
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
405
1
  ccv_nnc_cmd_t cmd = CMD_AVERAGE_POOL_FORWARD(3, 3);
406
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
407
  // configure the inlets.
408
1
  int i;
409
3.02k
  for (i = 0; i < 55 * 55; 
i++3.02k
)
410
3.02k
    a->data.f32[i] = i + 1;
411
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
412
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0);
413
1
  int x, y;
414
28
  for (y = 0; y < 27; 
y++27
)
415
756
    
for (x = 0; 27
x < 27;
x++729
)
416
729
      c->data.f32[y * 27 + x] = 57 + y * 110 + x * 2;
417
1
  REQUIRE_MATRIX_EQ(b, c, "average pool network output should be exactly the same");
418
1
  ccv_matrix_free(c);
419
1
  ccv_nnc_tensor_free(b);
420
1
  ccv_nnc_tensor_free(a);
421
1
}
422
423
TEST_CASE("average pool network of 57x57 with window of 3x3 and stride of 3")
424
1
{
425
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 57, 57, 1), 0);
426
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 19, 19, 1), 0);
427
1
  ccv_nnc_cmd_t cmd = CMD_AVERAGE_POOL_FORWARD(3, 3);
428
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
429
  // configure the inlets.
430
1
  int i;
431
3.25k
  for (i = 0; i < 57 * 57; 
i++3.24k
)
432
3.24k
    a->data.f32[i] = i + 1;
433
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
434
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(19, 19, CCV_32F | CCV_C1, 0, 0);
435
1
  int x, y;
436
20
  for (y = 0; y < 19; 
y++19
)
437
380
    
for (x = 0; 19
x < 19;
x++361
)
438
361
      c->data.f32[y * 19 + x] = 59 + y * 171 + x * 3;
439
1
  REQUIRE_MATRIX_EQ(b, c, "average pool network output should be exactly the same");
440
1
  ccv_matrix_free(c);
441
1
  ccv_nnc_tensor_free(b);
442
1
  ccv_nnc_tensor_free(a);
443
1
}
444
445
TEST_CASE("average pool network of 54x54 with window of 2x2 and stride of 2")
446
1
{
447
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 54, 54, 1), 0);
448
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
449
1
  ccv_nnc_cmd_t cmd = CMD_AVERAGE_POOL_FORWARD(2, 2);
450
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
451
  // configure the inlets.
452
1
  int i;
453
2.91k
  for (i = 0; i < 54 * 54; 
i++2.91k
)
454
2.91k
    a->data.f32[i] = i + 1;
455
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
456
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0);
457
1
  int x, y;
458
28
  for (y = 0; y < 27; 
y++27
)
459
756
    
for (x = 0; 27
x < 27;
x++729
)
460
729
      c->data.f32[y * 27 + x] = 28.5 + y * 108 + x * 2;
461
1
  REQUIRE_MATRIX_EQ(b, c, "average pool network output should be exactly the same");
462
1
  ccv_matrix_free(c);
463
1
  ccv_nnc_tensor_free(b);
464
1
  ccv_nnc_tensor_free(a);
465
1
}
466
467
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights")
468
1
{
469
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2, 2), 0);
470
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 4, 1), 0);
471
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(1, 1, 0, 3, 3, 2);
472
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
473
1
  hint.stride.dim[0] = 1;
474
1
  hint.stride.dim[1] = 1;
475
1
  hint.border.begin[0] = 0;
476
1
  hint.border.begin[1] = 0;
477
1
  hint.border.end[0] = 0;
478
1
  hint.border.end[1] = 0;
479
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3, 3, 1), 0);
480
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
481
  // configure the inlets.
482
1
  int i;
483
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
484
18
    w->data.f32[i] = i;
485
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
486
8
    a->data.f32[i] = 1;
487
2
  for (i = 0; i < 1; 
i++1
)
488
1
    bias->data.f32[i] = 1;
489
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
490
1
  float cp[] = {
491
1
    10., 21., 25., 14.,
492
1
    25., 53., 61., 33.,
493
1
    37., 77., 85., 45.,
494
1
    22., 45., 49., 26.
495
1
  };
496
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NHWC(32F, 4, 4, 1), 0);
497
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
498
1
  ccv_nnc_tensor_free(c);
499
1
  ccv_nnc_tensor_free(bias);
500
1
  ccv_nnc_tensor_free(w);
501
1
  ccv_nnc_tensor_free(b);
502
1
  ccv_nnc_tensor_free(a);
503
1
}
504
505
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2")
506
1
{
507
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2, 2), 0);
508
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 4, 2), 0);
509
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2);
510
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
511
1
  hint.stride.dim[0] = 1;
512
1
  hint.stride.dim[1] = 1;
513
1
  hint.border.begin[0] = 0;
514
1
  hint.border.begin[1] = 0;
515
1
  hint.border.end[0] = 0;
516
1
  hint.border.end[1] = 0;
517
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3, 3, 1), 0);
518
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
519
  // configure the inlets.
520
1
  int i;
521
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
522
18
    w->data.f32[i] = i;
523
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
524
8
    a->data.f32[i] = 1;
525
3
  for (i = 0; i < 2; 
i++2
)
526
2
    bias->data.f32[i] = 1;
527
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
528
1
  float cp[] = {
529
1
    1., 10.,  2., 20.,  4., 22.,  3., 12.,
530
1
    4., 22.,  9., 45., 13., 49.,  8., 26.,
531
1
    10., 28., 21., 57., 25., 61., 14., 32.,
532
1
    7., 16., 14., 32., 16., 34.,  9., 18.
533
1
  };
534
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NHWC(32F, 4, 4, 2), 0);
535
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
536
1
  ccv_nnc_tensor_free(c);
537
1
  ccv_nnc_tensor_free(bias);
538
1
  ccv_nnc_tensor_free(w);
539
1
  ccv_nnc_tensor_free(b);
540
1
  ccv_nnc_tensor_free(a);
541
1
}
542
543
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2, stride of 2")
544
1
{
545
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2, 2), 0);
546
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 5, 2), 0);
547
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2);
548
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
549
1
  hint.stride.dim[0] = 2;
550
1
  hint.stride.dim[1] = 2;
551
1
  hint.border.begin[0] = 0;
552
1
  hint.border.begin[1] = 0;
553
1
  hint.border.end[0] = 0;
554
1
  hint.border.end[1] = 0;
555
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3, 3, 1), 0);
556
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
557
  // configure the inlets.
558
1
  int i;
559
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
560
18
    w->data.f32[i] = i;
561
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
562
8
    a->data.f32[i] = 1;
563
3
  for (i = 0; i < 2; 
i++2
)
564
2
    bias->data.f32[i] = 1;
565
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
566
1
  float cp[] = {
567
1
    1., 10.,  2., 11.,  3., 21.,  2., 11.,  3., 12.,
568
1
    4., 13.,  5., 14.,  9., 27.,  5., 14.,  6., 15.,
569
1
    7., 25.,  9., 27., 17., 53.,  9., 27., 11., 29.,
570
1
    4., 13.,  5., 14.,  9., 27.,  5., 14.,  6., 15.,
571
1
    7., 16.,  8., 17., 15., 33.,  8., 17.,  9., 18.
572
1
  };
573
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NHWC(32F, 5, 5, 2), 0);
574
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
575
1
  ccv_nnc_tensor_free(c);
576
1
  ccv_nnc_tensor_free(bias);
577
1
  ccv_nnc_tensor_free(w);
578
1
  ccv_nnc_tensor_free(b);
579
1
  ccv_nnc_tensor_free(a);
580
1
}
581
582
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights, NCHW")
583
1
{
584
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 2, 2), 0);
585
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1, 4, 4), 0);
586
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(1, 1, 0, 3, 3, 2);
587
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
588
1
  hint.stride.dim[0] = 1;
589
1
  hint.stride.dim[1] = 1;
590
1
  hint.border.begin[0] = 0;
591
1
  hint.border.begin[1] = 0;
592
1
  hint.border.end[0] = 0;
593
1
  hint.border.end[1] = 0;
594
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 1, 3, 3), 0);
595
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
596
  // configure the inlets.
597
1
  int i;
598
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
599
18
    w->data.f32[i] = i;
600
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
601
8
    a->data.f32[i] = 1;
602
2
  for (i = 0; i < 1; 
i++1
)
603
1
    bias->data.f32[i] = 1;
604
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
605
1
  float cp[] = {
606
1
    10., 21., 25., 14.,
607
1
    25., 53., 61., 33.,
608
1
    37., 77., 85., 45.,
609
1
    22., 45., 49., 26.
610
1
  };
611
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NCHW(32F, 1, 4, 4), 0);
612
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
613
1
  ccv_nnc_tensor_free(c);
614
1
  ccv_nnc_tensor_free(bias);
615
1
  ccv_nnc_tensor_free(w);
616
1
  ccv_nnc_tensor_free(b);
617
1
  ccv_nnc_tensor_free(a);
618
1
}
619
620
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2, NCHW")
621
1
{
622
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 2, 2), 0);
623
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 4, 4), 0);
624
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2);
625
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
626
1
  hint.stride.dim[0] = 1;
627
1
  hint.stride.dim[1] = 1;
628
1
  hint.border.begin[0] = 0;
629
1
  hint.border.begin[1] = 0;
630
1
  hint.border.end[0] = 0;
631
1
  hint.border.end[1] = 0;
632
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 1, 3, 3), 0);
633
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2), 0);
634
  // configure the inlets.
635
1
  int i;
636
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
637
18
    w->data.f32[i] = i;
638
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
639
8
    a->data.f32[i] = 1;
640
3
  for (i = 0; i < 2; 
i++2
)
641
2
    bias->data.f32[i] = 1;
642
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
643
1
  float cp[] = {
644
1
    1.,  2.,  4.,  3.,
645
1
    4.,  9., 13.,  8.,
646
1
    10., 21., 25., 14.,
647
1
    7., 14., 16.,  9.,
648
1
    10., 20., 22., 12.,
649
1
    22., 45., 49., 26.,
650
1
    28., 57., 61., 32.,
651
1
    16., 32., 34., 18.
652
1
  };
653
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NCHW(32F, 2, 4, 4), 0);
654
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
655
1
  ccv_nnc_tensor_free(c);
656
1
  ccv_nnc_tensor_free(bias);
657
1
  ccv_nnc_tensor_free(w);
658
1
  ccv_nnc_tensor_free(b);
659
1
  ccv_nnc_tensor_free(a);
660
1
}
661
662
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2, stride of 2, NCHW")
663
1
{
664
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 2, 2), 0);
665
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 5, 5), 0);
666
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2);
667
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
668
1
  hint.stride.dim[0] = 2;
669
1
  hint.stride.dim[1] = 2;
670
1
  hint.border.begin[0] = 0;
671
1
  hint.border.begin[1] = 0;
672
1
  hint.border.end[0] = 0;
673
1
  hint.border.end[1] = 0;
674
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 1, 3, 3), 0);
675
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2), 0);
676
  // configure the inlets.
677
1
  int i;
678
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
679
18
    w->data.f32[i] = i;
680
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
681
8
    a->data.f32[i] = 1;
682
3
  for (i = 0; i < 2; 
i++2
)
683
2
    bias->data.f32[i] = 1;
684
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
685
1
  float cp[] = {
686
1
    1.,  2.,  3.,  2.,  3.,
687
1
    4.,  5.,  9.,  5.,  6.,
688
1
    7.,  9., 17.,  9., 11.,
689
1
    4.,  5.,  9.,  5.,  6.,
690
1
    7.,  8., 15.,  8.,  9.,
691
1
    10., 11., 21., 11., 12.,
692
1
    13., 14., 27., 14., 15.,
693
1
    25., 27., 53., 27., 29.,
694
1
    13., 14., 27., 14., 15.,
695
1
    16., 17., 33., 17., 18.
696
1
  };
697
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NCHW(32F, 2, 5, 5), 0);
698
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
699
1
  ccv_nnc_tensor_free(c);
700
1
  ccv_nnc_tensor_free(bias);
701
1
  ccv_nnc_tensor_free(w);
702
1
  ccv_nnc_tensor_free(b);
703
1
  ccv_nnc_tensor_free(a);
704
1
}
705
706
#include "case_main.h"