Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/test/unit/nnc/forward.tests.c
Line
Count
Source
1
#include "case.h"
2
#include "ccv_case.h"
3
#include "ccv_nnc_case.h"
4
#include <ccv.h>
5
#include <nnc/ccv_nnc.h>
6
#include <nnc/ccv_nnc_easy.h>
7
#include "3rdparty/dsfmt/dSFMT.h"
8
9
TEST_SETUP()
10
{
11
  ccv_nnc_init();
12
}
13
14
TEST_CASE("convolutional network of 11x11 on 225x185 with uniform weights")
15
1
{
16
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 225, 185, 3), 0);
17
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 45, 4), 0);
18
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 11, 11, 3);
19
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
20
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 11, 11, 3), 0);
21
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
22
  // configure the inlets.
23
1
  int i;
24
1.45k
  for (i = 0; i < 11 * 11 * 3 * 4; 
i++1.45k
)
25
1.45k
    w->data.f32[i] = 1;
26
124k
  for (i = 0; i < 225 * 185 * 3; 
i++124k
)
27
124k
    a->data.f32[i] = 1;
28
5
  for (i = 0; i < 4; 
i++4
)
29
4
    bias->data.f32[i] = 0;
30
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
31
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(55, 45, CCV_32F | 4, 0, 0);
32
1
  int x, y;
33
56
  for (y = 0; y < 55; 
y++55
)
34
2.53k
    
for (x = 0; 55
x < 45;
x++2.47k
)
35
12.3k
      
for (i = 0; 2.47k
i < 4;
i++9.90k
)
36
9.90k
      c->data.f32[(y * 45 + x) * 4 + i] = ((x == 0 && 
y == 0220
) ||
(9.89k
x == 09.89k
&&
y == 54216
) ||
(9.89k
x == 449.89k
&&
y == 0220
) ||
(9.88k
x == 449.88k
&&
y == 54216
)) ?
30016
:
(9.88k
(9.88k
x == 09.88k
||
y == 09.67k
||
x == 449.50k
||
y == 549.28k
) ?
330768
:
3639.11k
);
37
1
  REQUIRE_MATRIX_EQ(b, c, "55x45 matrix should be exactly a matrix fill 363, with 300 on the corner and 330 on the border");
38
1
  ccv_matrix_free(c);
39
1
  ccv_nnc_tensor_free(bias);
40
1
  ccv_nnc_tensor_free(w);
41
1
  ccv_nnc_tensor_free(b);
42
1
  ccv_nnc_tensor_free(a);
43
1
}
44
45
TEST_CASE("convolutional network of 5x3 on 17x27 with uniform weights")
46
1
{
47
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 17, 27, 1), 0);
48
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 17, 27, 4), 0);
49
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 5, 3, 1);
50
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
51
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 5, 3, 1), 0);
52
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
53
  // configure the inlets.
54
1
  int i;
55
61
  for (i = 0; i < 5 * 3 * 4; 
i++60
)
56
60
    w->data.f32[i] = 1;
57
460
  for (i = 0; i < 17 * 27; 
i++459
)
58
459
    a->data.f32[i] = 1;
59
5
  for (i = 0; i < 4; 
i++4
)
60
4
    bias->data.f32[i] = 0;
61
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
62
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(17, 27, CCV_32F | 4, 0, 0);
63
1
  int x, y;
64
18
  for (y = 0; y < 17; 
y++17
)
65
476
    
for (x = 0; 17
x < 27;
x++459
)
66
2.29k
      
for (i = 0; 459
i < 4;
i++1.83k
)
67
1.83k
      {
68
1.83k
        if ((x == 0 && 
y == 068
) ||
(1.83k
x == 01.83k
&&
y == 1664
) ||
(1.82k
x == 261.82k
&&
y == 068
) ||
(1.82k
x == 261.82k
&&
y == 1664
))
69
16
          c->data.f32[(y * 27 + x) * 4 + i] = 6;
70
1.82k
        else if ((x == 0 && 
y == 160
) ||
(1.81k
x == 261.81k
&&
y == 160
) ||
(1.81k
x == 01.81k
&&
y == 1556
) ||
(1.80k
x == 261.80k
&&
y == 1556
))
71
16
          c->data.f32[(y * 27 + x) * 4 + i] = 8;
72
1.80k
        else if (y == 0 || 
y == 161.70k
)
73
200
          c->data.f32[(y * 27 + x) * 4 + i] = 9;
74
1.60k
        else if (x == 0 || 
x == 261.55k
)
75
104
          c->data.f32[(y * 27 + x) * 4 + i] = 10;
76
1.50k
        else if (y == 1 || 
y == 151.40k
)
77
200
          c->data.f32[(y * 27 + x) * 4 + i] = 12;
78
1.30k
        else
79
1.30k
          c->data.f32[(y * 27 + x) * 4 + i] = 15;
80
1.83k
      }
81
1
  REQUIRE_MATRIX_EQ(b, c, "17x27 matrix should be exactly a matrix fill 15, with 6, 8 on the corner and 9, 10, 12 on the border");
82
1
  ccv_matrix_free(c);
83
1
  ccv_nnc_tensor_free(bias);
84
1
  ccv_nnc_tensor_free(w);
85
1
  ccv_nnc_tensor_free(b);
86
1
  ccv_nnc_tensor_free(a);
87
1
}
88
89
TEST_CASE("convolutional network of 11x11x11 on 33x225x185 with uniform weights")
90
1
{
91
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 33, 225, 185, 3), 0);
92
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 55, 45, 4), 0);
93
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 11, 11, 11, 3);
94
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
95
1
  hint.stride.dim[0] = 4;
96
1
  hint.border.begin[0] = 1;
97
1
  hint.border.end[0] = 1;
98
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 11, 11, 11, 3), 0);
99
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
100
  // configure the inlets.
101
1
  int i;
102
15.9k
  for (i = 0; i < 11 * 11 * 11 * 3 * 4; 
i++15.9k
)
103
15.9k
    w->data.f32[i] = 1;
104
4.12M
  for (i = 0; i < 33 * 225 * 185 * 3; 
i++4.12M
)
105
4.12M
    a->data.f32[i] = 1;
106
5
  for (i = 0; i < 4; 
i++4
)
107
4
    bias->data.f32[i] = 0;
108
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
109
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 7, 55, 45, 4), 0);
110
1
  int x, y, z;
111
8
  for (z = 0; z < 7; 
z++7
)
112
392
    
for (y = 0; 7
y < 55;
y++385
)
113
17.7k
      
for (x = 0; 385
x < 45;
x++17.3k
)
114
86.6k
        
for (i = 0; 17.3k
i < 4;
i++69.3k
)
115
69.3k
          c->data.f32[((z * 55 + y) * 45 + x) * 4 + i] = ((x == 0 && 
y == 01.54k
&&
z == 028
) ||
(69.2k
x == 069.2k
&&
y == 01.53k
&&
z == 624
) ||
(69.2k
x == 069.2k
&&
y == 541.53k
&&
z == 028
) ||
(69.2k
x == 069.2k
&&
y == 541.52k
&&
z == 624
) ||
(69.2k
x == 4469.2k
&&
y == 01.54k
&&
z == 028
) ||
(69.2k
x == 4469.2k
&&
y == 01.53k
&&
z == 624
) ||
(69.2k
x == 4469.2k
&&
y == 541.53k
&&
z == 028
) ||
(69.2k
x == 4469.2k
&&
y == 541.52k
&&
z == 624
)) ?
300032
:
(69.2k
(69.2k
(69.2k
x == 069.2k
&&
y == 01.52k
) ||
(69.2k
x == 069.2k
&&
z == 01.50k
) ||
(69.0k
y == 069.0k
&&
z == 01.22k
) ||
(68.8k
x == 068.8k
&&
y == 541.29k
) ||
(68.8k
x == 068.8k
&&
z == 61.27k
) ||
(68.6k
y == 068.6k
&&
z == 61.05k
) ||
(68.4k
x == 4468.4k
&&
y == 01.52k
) ||
(68.4k
x == 4468.4k
&&
z == 01.50k
) ||
(68.2k
y == 5468.2k
&&
z == 01.22k
) ||
(68.0k
x == 4468.0k
&&
y == 541.29k
) ||
(68.0k
x == 4468.0k
&&
z == 61.27k
) ||
(67.8k
y == 5467.8k
&&
z == 61.03k
)) ?
33001.61k
:
(67.6k
(67.6k
x == 067.6k
||
x == 4466.5k
||
y == 065.5k
||
y == 5464.6k
||
z == 063.8k
||
z == 654.6k
) ?
363022.0k
:
399345.5k
));
116
1
  REQUIRE_TENSOR_EQ(b, c, "7x55x45 matrix should be exactly a matrix fill 3993, with 3000 on the corner and 3300 on the ridge and 3630 on the surface");
117
1
  ccv_nnc_tensor_free(c);
118
1
  ccv_nnc_tensor_free(bias);
119
1
  ccv_nnc_tensor_free(w);
120
1
  ccv_nnc_tensor_free(b);
121
1
  ccv_nnc_tensor_free(a);
122
1
}
123
124
TEST_CASE("convolutional network of 11x11 on 225x185 with non-uniform weights")
125
1
{
126
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 225, 185, 1), 0);
127
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 45, 4), 0);
128
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 11, 11, 1);
129
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
130
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 11, 11, 1), 0);
131
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
132
  // configure the inlets.
133
1
  int i, x, y;
134
5
  for (x = 0; x < 4; 
x++4
)
135
488
    
for (i = 0; 4
i < 11 * 11;
i++484
)
136
484
      w->data.f32[x * 11 * 11 + i] = i + 1;
137
41.6k
  for (i = 0; i < 225 * 185; 
i++41.6k
)
138
41.6k
    a->data.f32[i] = i + 1;
139
5
  for (i = 0; i < 4; 
i++4
)
140
4
    bias->data.f32[i] = 0;
141
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
142
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(55, 45, CCV_32F | 4, 0, 0);
143
1
  float sum = 0;
144
  // first column
145
11
  for (y = 0; y < 10; 
y++10
)
146
110
    
for (x = 0; 10
x < 10;
x++100
)
147
100
      sum += ((y + 1) * 11 + x + 2) * (y * 185 + x + 1);
148
5
  for (i = 0; i < 4; 
i++4
)
149
4
    c->data.f32[i] = sum;
150
1
  sum = 0;
151
11
  for (y = 0; y < 10; 
y++10
)
152
120
    
for (x = 0; 10
x < 11;
x++110
)
153
110
      sum += ((y + 1) * 11 + x + 1) * (y * 185 + (x + 3) + 1);
154
44
  for (x = 1; x < 44; 
x++43
)
155
215
    
for (i = 0; 43
i < 4;
i++172
)
156
172
      c->data.f32[x * 4 + i] = sum + (x - 1) * 4 * (11 * 11 + 12) * 11 * 10 / 2;
157
1
  sum = 0;
158
11
  for (y = 0; y < 10; 
y++10
)
159
110
    
for (x = 0; 10
x < 10;
x++100
)
160
100
      sum += ((y + 1) * 11 + x + 1) * (y * 185 + (x + 175) + 1);
161
5
  for (i = 0; i < 4; 
i++4
)
162
4
    c->data.f32[44 * 4 + i] = sum;
163
  // last column
164
1
  sum = 0;
165
11
  for (y = 0; y < 10; 
y++10
)
166
110
    
for (x = 0; 10
x < 10;
x++100
)
167
100
      sum += (y * 11 + x + 2) * ((y + 215) * 185 + x + 1);
168
5
  for (i = 0; i < 4; 
i++4
)
169
4
    c->data.f32[54 * 45 * 4 + i] = sum;
170
1
  sum = 0;
171
11
  for (y = 0; y < 10; 
y++10
)
172
120
    
for (x = 0; 10
x < 11;
x++110
)
173
110
      sum += (y * 11 + x + 1) * ((y + 215) * 185 + (x + 3) + 1);
174
44
  for (x = 1; x < 44; 
x++43
)
175
215
    
for (i = 0; 43
i < 4;
i++172
)
176
172
      c->data.f32[(54 * 45 + x) * 4 + i] = sum + (x - 1) * 4 * (10 * 11 + 1) * 11 * 10 / 2;
177
1
  sum = 0;
178
11
  for (y = 0; y < 10; 
y++10
)
179
110
    
for (x = 0; 10
x < 10;
x++100
)
180
100
      sum += (y * 11 + x + 1) * ((y + 215) * 185 + (x + 175) + 1);
181
5
  for (i = 0; i < 4; 
i++4
)
182
4
    c->data.f32[(54 * 45 + 44) * 4 + i] = sum;
183
1
  float border[] = {
184
1
    0, 0
185
1
  };
186
12
  for (y = 0; y < 11; 
y++11
)
187
121
    
for (x = 0; 11
x < 10;
x++110
)
188
110
      border[0] += (y * 11 + x + 2) * ((y + 3) * 185 + x + 1);
189
12
  for (y = 0; y < 11; 
y++11
)
190
121
    
for (x = 0; 11
x < 10;
x++110
)
191
110
      border[1] += (y * 11 + x + 1) * ((y + 3) * 185 + (x + 175) + 1);
192
1
  sum = 0;
193
12
  for (y = 0; y < 11; 
y++11
)
194
132
    
for (x = 0; 11
x < 11;
x++121
)
195
121
      sum += (y * 11 + x + 1) * ((y + 3) * 185 + (x + 3) + 1);
196
54
  for (y = 1; y < 54; 
y++53
)
197
53
  {
198
265
    for (i = 0; i < 4; 
i++212
)
199
212
      c->data.f32[y * 45 * 4 + i] = border[0];
200
2.33k
    for (x = 1; x < 44; 
x++2.27k
)
201
11.3k
      
for (i = 0; 2.27k
i < 4;
i++9.11k
)
202
9.11k
        c->data.f32[(y * 45 + x) * 4 + i] = sum + (x - 1) * 4 * (11 * 11 + 1) * 11 * 11 / 2;
203
265
    for (i = 0; i < 4; 
i++212
)
204
212
      c->data.f32[(y * 45 + 44) * 4 + i] = border[1];
205
53
    sum += 185 * 4 * (11 * 11 + 1) * 11 * 11 / 2;
206
53
    border[0] += 185 * 4 * ((11 * 11 + 1) * 11 * 11 / 2 - (10 * 11 + 1 + 1) * 11 / 2);
207
53
    border[1] += 185 * 4 * ((11 * 11 + 1) * 11 * 11 / 2 - (11 * 11 + 11) * 11 / 2);
208
53
  }
209
  // regularize the output so it is within the tolerance
210
9.90k
  for (i = 0; i < 55 * 45 * 4; 
i++9.90k
)
211
9.90k
    c->data.f32[i] = c->data.f32[i] * 1e-7, b->data.f32[i] = b->data.f32[i] * 1e-7;
212
1
  REQUIRE_MATRIX_EQ(b, c, "55x55 matrix should be exactly the same");
213
1
  ccv_matrix_free(c);
214
1
  ccv_nnc_tensor_free(bias);
215
1
  ccv_nnc_tensor_free(w);
216
1
  ccv_nnc_tensor_free(b);
217
1
  ccv_nnc_tensor_free(a);
218
1
}
219
220
TEST_CASE("convolutional network of 3x5 on 27x27 with non-uniform weights")
221
1
{
222
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
223
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 4), 0);
224
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 3, 5, 1);
225
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
226
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 3, 5, 1), 0);
227
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
228
  // configure the inlets.
229
1
  int i, x, y;
230
5
  for (x = 0; x < 4; 
x++4
)
231
64
    
for (i = 0; 4
i < 3 * 5;
i++60
)
232
60
      w->data.f32[x * 3 * 5 + i] = i + 1;
233
730
  for (i = 0; i < 27 * 27; 
i++729
)
234
729
    a->data.f32[i] = i + 1;
235
5
  for (i = 0; i < 4; 
i++4
)
236
4
    bias->data.f32[i] = 0;
237
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
238
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | 4, 0, 0);
239
  // the first column
240
1
  float sum = 0;
241
3
  for (y = 0; y < 2; 
y++2
)
242
8
    
for (x = 0; 2
x < 3;
x++6
)
243
6
      sum += ((y + 1) * 5 + x + 3) * (y * 27 + x + 1);
244
5
  for (i = 0; i < 4; 
i++4
)
245
4
    c->data.f32[i] = sum;
246
1
  sum = 0;
247
3
  for (y = 0; y < 2; 
y++2
)
248
10
    
for (x = 0; 2
x < 4;
x++8
)
249
8
      sum += ((y + 1) * 5 + x + 2) * (y * 27 + x + 1);
250
5
  for (i = 0; i < 4; 
i++4
)
251
4
    c->data.f32[4 + i] = sum;
252
1
  sum = 0;
253
3
  for (y = 0; y < 2; 
y++2
)
254
12
    
for (x = 0; 2
x < 5;
x++10
)
255
10
      sum += ((y + 1) * 5 + x + 1) * (y * 27 + x + 1);
256
24
  for (x = 2; x < 25; 
x++23
)
257
115
    
for (i = 0; 23
i < 4;
i++92
)
258
92
      c->data.f32[x * 4 + i] = sum + (x - 2) * 21 * 10 / 2;
259
1
  sum = 0;
260
3
  for (y = 0; y < 2; 
y++2
)
261
10
    
for (x = 0; 2
x < 4;
x++8
)
262
8
      sum += ((y + 1) * 5 + x + 1) * (y * 27 + x + 24);
263
5
  for (i = 0; i < 4; 
i++4
)
264
4
    c->data.f32[25 * 4 + i] = sum;
265
1
  sum = 0;
266
3
  for (y = 0; y < 2; 
y++2
)
267
8
    
for (x = 0; 2
x < 3;
x++6
)
268
6
      sum += ((y + 1) * 5 + x + 1) * (y * 27 + x + 25);
269
5
  for (i = 0; i < 4; 
i++4
)
270
4
    c->data.f32[26 * 4 + i] = sum;
271
  // the last column
272
1
  sum = 0;
273
3
  for (y = 0; y < 2; 
y++2
)
274
8
    
for (x = 0; 2
x < 3;
x++6
)
275
6
      sum += (y * 5 + x + 3) * ((y + 25) * 27 + x + 1);
276
5
  for (i = 0; i < 4; 
i++4
)
277
4
    c->data.f32[27 * 26 * 4 + i] = sum;
278
1
  sum = 0;
279
3
  for (y = 0; y < 2; 
y++2
)
280
10
    
for (x = 0; 2
x < 4;
x++8
)
281
8
      sum += (y * 5 + x + 2) * ((y + 25) * 27 + x + 1);
282
5
  for (i = 0; i < 4; 
i++4
)
283
4
    c->data.f32[(27 * 26 + 1) * 4 + i] = sum;
284
1
  sum = 0;
285
3
  for (y = 0; y < 2; 
y++2
)
286
12
    
for (x = 0; 2
x < 5;
x++10
)
287
10
      sum += (y * 5 + x + 1) * ((y + 25) * 27 + x + 1);
288
24
  for (x = 2; x < 25; 
x++23
)
289
115
    
for (i = 0; 23
i < 4;
i++92
)
290
92
      c->data.f32[(27 * 26 + x) * 4 + i] = sum + (x - 2) * 11 * 10 / 2;
291
1
  sum = 0;
292
3
  for (y = 0; y < 2; 
y++2
)
293
10
    
for (x = 0; 2
x < 4;
x++8
)
294
8
      sum += (y * 5 + x + 1) * ((y + 25) * 27 + x + 24);
295
5
  for (i = 0; i < 4; 
i++4
)
296
4
    c->data.f32[(27 * 26 + 25) * 4 + i] = sum;
297
1
  sum = 0;
298
3
  for (y = 0; y < 2; 
y++2
)
299
8
    
for (x = 0; 2
x < 3;
x++6
)
300
6
      sum += (y * 5 + x + 1) * ((y + 25) * 27 + x + 25);
301
5
  for (i = 0; i < 4; 
i++4
)
302
4
    c->data.f32[(27 * 26 + 26) * 4 + i] = sum;
303
1
  float border[] = {
304
1
    0, 0, 0, 0
305
1
  };
306
4
  for (y = 0; y < 3; 
y++3
)
307
12
    
for (x = 0; 3
x < 3;
x++9
)
308
9
      border[0] += (y * 5 + x + 3) * (y * 27 + x + 1);
309
4
  for (y = 0; y < 3; 
y++3
)
310
15
    
for (x = 0; 3
x < 4;
x++12
)
311
12
      border[1] += (y * 5 + x + 2) * (y * 27 + x + 1);
312
4
  for (y = 0; y < 3; 
y++3
)
313
15
    
for (x = 0; 3
x < 4;
x++12
)
314
12
      border[2] += (y * 5 + x + 1) * (y * 27 + x + 24);
315
4
  for (y = 0; y < 3; 
y++3
)
316
12
    
for (x = 0; 3
x < 3;
x++9
)
317
9
      border[3] += (y * 5 + x + 1) * (y * 27 + x + 25);
318
1
  sum = 0;
319
4
  for (y = 0; y < 3; 
y++3
)
320
18
    
for (x = 0; 3
x < 5;
x++15
)
321
15
      sum += (y * 5 + x + 1) * (y * 27 + x + 1);
322
26
  for (y = 1; y < 26; 
y++25
)
323
25
  {
324
125
    for (i = 0; i < 4; 
i++100
)
325
100
    {
326
100
      c->data.f32[y * 27 * 4 + i] = border[0] + (y - 1) * 27 * (3 + 4 + 5 + 8 + 9 + 10 + 13 + 14 + 15);
327
100
      c->data.f32[(y * 27 + 1) * 4 + i] = border[1] + (y - 1) * 27 * (2 + 3 + 4 + 5 + 7 + 8 + 9 + 10 + 12 + 13 + 14 + 15);
328
2.40k
      for (x = 2; x < 25; 
x++2.30k
)
329
2.30k
        c->data.f32[(y * 27 + x) * 4 + i] = sum + ((y - 1) * 27 + x - 2) * 16 * 15 / 2;
330
100
      c->data.f32[(y * 27 + 25) * 4 + i] = border[2] + (y - 1) * 27 * (1 + 2 + 3 + 4 + 6 + 7 + 8 + 9 + 11 + 12 + 13 + 14);
331
100
      c->data.f32[(y * 27 + 26) * 4 + i] = border[3] + (y - 1) * 27 * (1 + 2 + 3 + 6 + 7 + 8 + 11 + 12 + 13);
332
100
    }
333
25
  }
334
1
  REQUIRE_MATRIX_EQ(b, c, "27x27 matrix should be exactly the same");
335
1
  ccv_matrix_free(c);
336
1
  ccv_nnc_tensor_free(bias);
337
1
  ccv_nnc_tensor_free(w);
338
1
  ccv_nnc_tensor_free(b);
339
1
  ccv_nnc_tensor_free(a);
340
1
}
341
342
TEST_CASE("convolution with no bias")
343
1
{
344
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
345
1
  ccv_nnc_tensor_t* bg = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 4), 0);
346
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(1, 4, 3, 5, 1);
347
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, bg->info);
348
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 3, 5, 1), 0);
349
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4), 0);
350
1
  dsfmt_t dsfmt;
351
1
  int i;
352
1
  dsfmt_init_gen_rand(&dsfmt, 1);
353
730
  for (i = 0; i < 27 * 27; 
i++729
)
354
729
    a->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
355
61
  for (i = 0; i < 4 * 3 * 5; 
i++60
)
356
60
    w->data.f32[i] = dsfmt_genrand_open_close(&dsfmt);
357
5
  for (i = 0; i < 4; 
i++4
)
358
4
    bias->data.f32[i] = 0;
359
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(bg), 0);
360
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 4), 0);
361
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w), TENSOR_LIST(b), 0);
362
1
  REQUIRE_MATRIX_EQ(b, bg, "convolution with no bias should equal to with bias = 0");
363
1
  ccv_nnc_tensor_free(a);
364
1
  ccv_nnc_tensor_free(b);
365
1
  ccv_nnc_tensor_free(bg);
366
1
  ccv_nnc_tensor_free(w);
367
1
  ccv_nnc_tensor_free(bias);
368
1
}
369
370
TEST_CASE("maximum pool network of 55x55 with window of 3x3 and stride of 2")
371
1
{
372
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 55, 1), 0);
373
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
374
1
  ccv_nnc_cmd_t cmd = CMD_MAX_POOL_FORWARD(3, 3);
375
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
376
  // configure the inlets.
377
1
  int i;
378
3.02k
  for (i = 0; i < 55 * 55; 
i++3.02k
)
379
3.02k
    a->data.f32[i] = i + 1;
380
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
381
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0);
382
1
  int x, y;
383
28
  for (y = 0; y < 27; 
y++27
)
384
756
    
for (x = 0; 27
x < 27;
x++729
)
385
729
      c->data.f32[y * 27 + x] = 113 + y * 110 + x * 2;
386
1
  REQUIRE_MATRIX_EQ(b, c, "max pool network output should be exactly the same");
387
1
  ccv_matrix_free(c);
388
1
  ccv_nnc_tensor_free(b);
389
1
  ccv_nnc_tensor_free(a);
390
1
}
391
392
TEST_CASE("maximum pool network of 57x57 with window of 3x3 and stride of 3")
393
1
{
394
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 57, 57, 1), 0);
395
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 19, 19, 1), 0);
396
1
  ccv_nnc_cmd_t cmd = CMD_MAX_POOL_FORWARD(3, 3);
397
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
398
  // configure the inlets.
399
1
  int i;
400
3.25k
  for (i = 0; i < 57 * 57; 
i++3.24k
)
401
3.24k
    a->data.f32[i] = i + 1;
402
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
403
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(19, 19, CCV_32F | CCV_C1, 0, 0);
404
1
  int x, y;
405
20
  for (y = 0; y < 19; 
y++19
)
406
380
    
for (x = 0; 19
x < 19;
x++361
)
407
361
      c->data.f32[y * 19 + x] = 117 + y * 171 + x * 3;
408
1
  REQUIRE_MATRIX_EQ(b, c, "max pool network output should be exactly the same");
409
1
  ccv_matrix_free(c);
410
1
  ccv_nnc_tensor_free(b);
411
1
  ccv_nnc_tensor_free(a);
412
1
}
413
414
TEST_CASE("maximum pool network of 54x54 with window of 2x2 and stride of 2")
415
1
{
416
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 54, 54, 1), 0);
417
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
418
1
  ccv_nnc_cmd_t cmd = CMD_MAX_POOL_FORWARD(2, 2);
419
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
420
  // configure the inlets.
421
1
  int i;
422
2.91k
  for (i = 0; i < 54 * 54; 
i++2.91k
)
423
2.91k
    a->data.f32[i] = i + 1;
424
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
425
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0);
426
1
  int x, y;
427
28
  for (y = 0; y < 27; 
y++27
)
428
756
    
for (x = 0; 27
x < 27;
x++729
)
429
729
      c->data.f32[y * 27 + x] = 56 + y * 108 + x * 2;
430
1
  REQUIRE_MATRIX_EQ(b, c, "max pool network output should be exactly the same");
431
1
  ccv_matrix_free(c);
432
1
  ccv_nnc_tensor_free(b);
433
1
  ccv_nnc_tensor_free(a);
434
1
}
435
436
TEST_CASE("average pool network of 55x55 with window of 3x3 and stride of 2")
437
1
{
438
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 55, 55, 1), 0);
439
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
440
1
  ccv_nnc_cmd_t cmd = CMD_AVERAGE_POOL_FORWARD(3, 3);
441
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
442
  // configure the inlets.
443
1
  int i;
444
3.02k
  for (i = 0; i < 55 * 55; 
i++3.02k
)
445
3.02k
    a->data.f32[i] = i + 1;
446
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
447
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0);
448
1
  int x, y;
449
28
  for (y = 0; y < 27; 
y++27
)
450
756
    
for (x = 0; 27
x < 27;
x++729
)
451
729
      c->data.f32[y * 27 + x] = 57 + y * 110 + x * 2;
452
1
  REQUIRE_MATRIX_EQ(b, c, "average pool network output should be exactly the same");
453
1
  ccv_matrix_free(c);
454
1
  ccv_nnc_tensor_free(b);
455
1
  ccv_nnc_tensor_free(a);
456
1
}
457
458
TEST_CASE("average pool network of 57x57 with window of 3x3 and stride of 3")
459
1
{
460
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 57, 57, 1), 0);
461
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 19, 19, 1), 0);
462
1
  ccv_nnc_cmd_t cmd = CMD_AVERAGE_POOL_FORWARD(3, 3);
463
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
464
  // configure the inlets.
465
1
  int i;
466
3.25k
  for (i = 0; i < 57 * 57; 
i++3.24k
)
467
3.24k
    a->data.f32[i] = i + 1;
468
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
469
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(19, 19, CCV_32F | CCV_C1, 0, 0);
470
1
  int x, y;
471
20
  for (y = 0; y < 19; 
y++19
)
472
380
    
for (x = 0; 19
x < 19;
x++361
)
473
361
      c->data.f32[y * 19 + x] = 59 + y * 171 + x * 3;
474
1
  REQUIRE_MATRIX_EQ(b, c, "average pool network output should be exactly the same");
475
1
  ccv_matrix_free(c);
476
1
  ccv_nnc_tensor_free(b);
477
1
  ccv_nnc_tensor_free(a);
478
1
}
479
480
TEST_CASE("average pool network of 54x54 with window of 2x2 and stride of 2")
481
1
{
482
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 54, 54, 1), 0);
483
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 27, 27, 1), 0);
484
1
  ccv_nnc_cmd_t cmd = CMD_AVERAGE_POOL_FORWARD(2, 2);
485
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
486
  // configure the inlets.
487
1
  int i;
488
2.91k
  for (i = 0; i < 54 * 54; 
i++2.91k
)
489
2.91k
    a->data.f32[i] = i + 1;
490
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a), TENSOR_LIST(b), 0);
491
1
  ccv_dense_matrix_t* c = ccv_dense_matrix_new(27, 27, CCV_32F | CCV_C1, 0, 0);
492
1
  int x, y;
493
28
  for (y = 0; y < 27; 
y++27
)
494
756
    
for (x = 0; 27
x < 27;
x++729
)
495
729
      c->data.f32[y * 27 + x] = 28.5 + y * 108 + x * 2;
496
1
  REQUIRE_MATRIX_EQ(b, c, "average pool network output should be exactly the same");
497
1
  ccv_matrix_free(c);
498
1
  ccv_nnc_tensor_free(b);
499
1
  ccv_nnc_tensor_free(a);
500
1
}
501
502
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights")
503
1
{
504
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2, 2), 0);
505
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 4, 1), 0);
506
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(1, 1, 0, 3, 3, 2);
507
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
508
1
  hint.stride.dim[0] = 1;
509
1
  hint.stride.dim[1] = 1;
510
1
  hint.border.begin[0] = 0;
511
1
  hint.border.begin[1] = 0;
512
1
  hint.border.end[0] = 0;
513
1
  hint.border.end[1] = 0;
514
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3, 3, 1), 0);
515
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 1), 0);
516
  // configure the inlets.
517
1
  int i;
518
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
519
18
    w->data.f32[i] = i;
520
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
521
8
    a->data.f32[i] = 1;
522
2
  for (i = 0; i < 1; 
i++1
)
523
1
    bias->data.f32[i] = 1;
524
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
525
1
  float cp[] = {
526
1
    10., 21., 25., 14.,
527
1
    25., 53., 61., 33.,
528
1
    37., 77., 85., 45.,
529
1
    22., 45., 49., 26.
530
1
  };
531
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NHWC(32F, 4, 4, 1), 0);
532
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
533
1
  ccv_nnc_tensor_free(c);
534
1
  ccv_nnc_tensor_free(bias);
535
1
  ccv_nnc_tensor_free(w);
536
1
  ccv_nnc_tensor_free(b);
537
1
  ccv_nnc_tensor_free(a);
538
1
}
539
540
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2")
541
1
{
542
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2, 2), 0);
543
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 4, 4, 2), 0);
544
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2);
545
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
546
1
  hint.stride.dim[0] = 1;
547
1
  hint.stride.dim[1] = 1;
548
1
  hint.border.begin[0] = 0;
549
1
  hint.border.begin[1] = 0;
550
1
  hint.border.end[0] = 0;
551
1
  hint.border.end[1] = 0;
552
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3, 3, 1), 0);
553
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
554
  // configure the inlets.
555
1
  int i;
556
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
557
18
    w->data.f32[i] = i;
558
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
559
8
    a->data.f32[i] = 1;
560
3
  for (i = 0; i < 2; 
i++2
)
561
2
    bias->data.f32[i] = 1;
562
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
563
1
  float cp[] = {
564
1
    1., 10.,  2., 20.,  4., 22.,  3., 12.,
565
1
    4., 22.,  9., 45., 13., 49.,  8., 26.,
566
1
    10., 28., 21., 57., 25., 61., 14., 32.,
567
1
    7., 16., 14., 32., 16., 34.,  9., 18.
568
1
  };
569
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NHWC(32F, 4, 4, 2), 0);
570
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
571
1
  ccv_nnc_tensor_free(c);
572
1
  ccv_nnc_tensor_free(bias);
573
1
  ccv_nnc_tensor_free(w);
574
1
  ccv_nnc_tensor_free(b);
575
1
  ccv_nnc_tensor_free(a);
576
1
}
577
578
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2, stride of 2")
579
1
{
580
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 2, 2), 0);
581
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 5, 5, 2), 0);
582
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2);
583
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
584
1
  hint.stride.dim[0] = 2;
585
1
  hint.stride.dim[1] = 2;
586
1
  hint.border.begin[0] = 0;
587
1
  hint.border.begin[1] = 0;
588
1
  hint.border.end[0] = 0;
589
1
  hint.border.end[1] = 0;
590
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2, 3, 3, 1), 0);
591
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32F, 2), 0);
592
  // configure the inlets.
593
1
  int i;
594
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
595
18
    w->data.f32[i] = i;
596
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
597
8
    a->data.f32[i] = 1;
598
3
  for (i = 0; i < 2; 
i++2
)
599
2
    bias->data.f32[i] = 1;
600
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
601
1
  float cp[] = {
602
1
    1., 10.,  2., 11.,  3., 21.,  2., 11.,  3., 12.,
603
1
    4., 13.,  5., 14.,  9., 27.,  5., 14.,  6., 15.,
604
1
    7., 25.,  9., 27., 17., 53.,  9., 27., 11., 29.,
605
1
    4., 13.,  5., 14.,  9., 27.,  5., 14.,  6., 15.,
606
1
    7., 16.,  8., 17., 15., 33.,  8., 17.,  9., 18.
607
1
  };
608
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NHWC(32F, 5, 5, 2), 0);
609
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
610
1
  ccv_nnc_tensor_free(c);
611
1
  ccv_nnc_tensor_free(bias);
612
1
  ccv_nnc_tensor_free(w);
613
1
  ccv_nnc_tensor_free(b);
614
1
  ccv_nnc_tensor_free(a);
615
1
}
616
617
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights, NCHW")
618
1
{
619
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 2, 2), 0);
620
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1, 4, 4), 0);
621
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(1, 1, 0, 3, 3, 2);
622
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
623
1
  hint.stride.dim[0] = 1;
624
1
  hint.stride.dim[1] = 1;
625
1
  hint.border.begin[0] = 0;
626
1
  hint.border.begin[1] = 0;
627
1
  hint.border.end[0] = 0;
628
1
  hint.border.end[1] = 0;
629
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 1, 3, 3), 0);
630
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 1), 0);
631
  // configure the inlets.
632
1
  int i;
633
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
634
18
    w->data.f32[i] = i;
635
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
636
8
    a->data.f32[i] = 1;
637
2
  for (i = 0; i < 1; 
i++1
)
638
1
    bias->data.f32[i] = 1;
639
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
640
1
  float cp[] = {
641
1
    10., 21., 25., 14.,
642
1
    25., 53., 61., 33.,
643
1
    37., 77., 85., 45.,
644
1
    22., 45., 49., 26.
645
1
  };
646
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NCHW(32F, 1, 4, 4), 0);
647
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
648
1
  ccv_nnc_tensor_free(c);
649
1
  ccv_nnc_tensor_free(bias);
650
1
  ccv_nnc_tensor_free(w);
651
1
  ccv_nnc_tensor_free(b);
652
1
  ccv_nnc_tensor_free(a);
653
1
}
654
655
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2, NCHW")
656
1
{
657
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 2, 2), 0);
658
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 4, 4), 0);
659
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2);
660
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
661
1
  hint.stride.dim[0] = 1;
662
1
  hint.stride.dim[1] = 1;
663
1
  hint.border.begin[0] = 0;
664
1
  hint.border.begin[1] = 0;
665
1
  hint.border.end[0] = 0;
666
1
  hint.border.end[1] = 0;
667
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 1, 3, 3), 0);
668
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2), 0);
669
  // configure the inlets.
670
1
  int i;
671
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
672
18
    w->data.f32[i] = i;
673
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
674
8
    a->data.f32[i] = 1;
675
3
  for (i = 0; i < 2; 
i++2
)
676
2
    bias->data.f32[i] = 1;
677
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
678
1
  float cp[] = {
679
1
    1.,  2.,  4.,  3.,
680
1
    4.,  9., 13.,  8.,
681
1
    10., 21., 25., 14.,
682
1
    7., 14., 16.,  9.,
683
1
    10., 20., 22., 12.,
684
1
    22., 45., 49., 26.,
685
1
    28., 57., 61., 32.,
686
1
    16., 32., 34., 18.
687
1
  };
688
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NCHW(32F, 2, 4, 4), 0);
689
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
690
1
  ccv_nnc_tensor_free(c);
691
1
  ccv_nnc_tensor_free(bias);
692
1
  ccv_nnc_tensor_free(w);
693
1
  ccv_nnc_tensor_free(b);
694
1
  ccv_nnc_tensor_free(a);
695
1
}
696
697
TEST_CASE("convolution transpose of 3x3 on 2x2 with given weights and group of 2, stride of 2, NCHW")
698
1
{
699
1
  ccv_nnc_tensor_t* a = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 2, 2), 0);
700
1
  ccv_nnc_tensor_t* b = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 5, 5), 0);
701
1
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(2, 2, 0, 3, 3, 2);
702
1
  ccv_nnc_hint_t hint = ccv_nnc_hint_auto(cmd.info, a->info, b->info);
703
1
  hint.stride.dim[0] = 2;
704
1
  hint.stride.dim[1] = 2;
705
1
  hint.border.begin[0] = 0;
706
1
  hint.border.begin[1] = 0;
707
1
  hint.border.end[0] = 0;
708
1
  hint.border.end[1] = 0;
709
1
  ccv_nnc_tensor_t* w = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2, 1, 3, 3), 0);
710
1
  ccv_nnc_tensor_t* bias = ccv_nnc_tensor_new(0, CPU_TENSOR_NCHW(32F, 2), 0);
711
  // configure the inlets.
712
1
  int i;
713
19
  for (i = 0; i < 3 * 3 * 2; 
i++18
)
714
18
    w->data.f32[i] = i;
715
9
  for (i = 0; i < 2 * 2 * 2; 
i++8
)
716
8
    a->data.f32[i] = 1;
717
3
  for (i = 0; i < 2; 
i++2
)
718
2
    bias->data.f32[i] = 1;
719
1
  ccv_nnc_cmd_exec(cmd, hint, 0, TENSOR_LIST(a, w, bias), TENSOR_LIST(b), 0);
720
1
  float cp[] = {
721
1
    1.,  2.,  3.,  2.,  3.,
722
1
    4.,  5.,  9.,  5.,  6.,
723
1
    7.,  9., 17.,  9., 11.,
724
1
    4.,  5.,  9.,  5.,  6.,
725
1
    7.,  8., 15.,  8.,  9.,
726
1
    10., 11., 21., 11., 12.,
727
1
    13., 14., 27., 14., 15.,
728
1
    25., 27., 53., 27., 29.,
729
1
    13., 14., 27., 14., 15.,
730
1
    16., 17., 33., 17., 18.
731
1
  };
732
1
  ccv_nnc_tensor_t* c = ccv_nnc_tensor_new(cp, CPU_TENSOR_NCHW(32F, 2, 5, 5), 0);
733
1
  REQUIRE_TENSOR_EQ(b, c, "convolution transpose output should be exactly the same");
734
1
  ccv_nnc_tensor_free(c);
735
1
  ccv_nnc_tensor_free(bias);
736
1
  ccv_nnc_tensor_free(w);
737
1
  ccv_nnc_tensor_free(b);
738
1
  ccv_nnc_tensor_free(a);
739
1
}
740
741
#include "case_main.h"