Bug Summary

File:ccv_convnet.c
Warning:line 974, column 25
4th function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ccv_convnet.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model static -mthread-model posix -menable-no-infs -menable-no-nans -menable-unsafe-fp-math -fno-signed-zeros -mreassociate -freciprocal-math -fno-trapping-math -ffp-contract=fast -ffast-math -ffinite-math-only -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -target-feature +sse2 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -resource-dir /usr/local/lib/clang/8.0.0 -I . -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_UCONTEXT -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D USE_DISPATCH -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -I /usr/local/include -internal-isystem /usr/local/include -internal-isystem /usr/local/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -fdebug-compilation-dir /home/liu/buildslave/linux-x64-runtests/build/lib -ferror-limit 19 -fmessage-length 0 -fblocks -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -o /home/liu/buildslave/public_html/analyze/2019-07-03-215927-77989-1 -x c ccv_convnet.c -faddrsig
1#include "ccv.h"
2#include "ccv_internal.h"
3#if defined(HAVE_SSE21)
4#include <xmmintrin.h>
5#elif defined(HAVE_NEON)
6#include <arm_neon.h>
7#endif
8#ifdef HAVE_GSL1
9#include <gsl/gsl_rng.h>
10#include <gsl/gsl_randist.h>
11#endif
12#ifdef USE_OPENMP
13#include <omp.h>
14#endif
15#ifdef USE_DISPATCH1
16#include <dispatch/dispatch.h>
17#endif
18#ifdef HAVE_CUDA1
19#include "cuda/cwc.h"
20#endif
21#include "3rdparty/sqlite3/sqlite3.h"
22#include "inc/ccv_convnet_internal.h"
23
24#ifndef CASE_TESTS
25
26ccv_convnet_t* ccv_convnet_new(int use_cwc_accel, ccv_size_t input, ccv_convnet_layer_param_t params[], int count)
27{
28 ccv_convnet_t* convnet = (ccv_convnet_t*)ccmallocmalloc(sizeof(ccv_convnet_t) + sizeof(ccv_convnet_layer_t) * count + sizeof(ccv_dense_matrix_t*) * count * 2);
29 convnet->use_cwc_accel = use_cwc_accel;
30#ifdef HAVE_GSL1
31 gsl_rng_env_setup();
32 gsl_rng* rng = gsl_rng_alloc(gsl_rng_default);
33 gsl_rng_set(rng, (unsigned long int)convnet);
34#endif
35 convnet->reserved = 0;
36 convnet->layers = (ccv_convnet_layer_t*)(convnet + 1);
37 convnet->acts = (ccv_dense_matrix_t**)(convnet->layers + count);
38 memset(convnet->acts, 0, sizeof(ccv_dense_matrix_t*) * count);
39 convnet->denoms = (ccv_dense_matrix_t**)(convnet->acts + count);
40 memset(convnet->denoms, 0, sizeof(ccv_dense_matrix_t*) * count);
41 convnet->count = count;
42 convnet->input = input;
43 convnet->rows = params[0].input.matrix.rows;
44 convnet->cols = params[0].input.matrix.cols;
45 convnet->channels = params[0].input.matrix.channels;
46 convnet->mean_activity = ccv_dense_matrix_new(convnet->input.height, convnet->input.width, convnet->channels | CCV_32F, 0, 0);
47 ccv_zero(convnet->mean_activity);
48 ccv_convnet_layer_t* layers = convnet->layers;
49 int i, j;
50 for (i = 0; i < count; i++)
51 {
52 layers[i].type = params[i].type;
53 layers[i].input = params[i].input;
54 layers[i].net = params[i].output;
55 layers[i].reserved = 0;
56 switch (params[i].type)
57 {
58 case CCV_CONVNET_CONVOLUTIONAL:
59 assert(params[i].input.matrix.channels % params[i].input.matrix.partition == 0)((void) sizeof ((params[i].input.matrix.channels % params[i].
input.matrix.partition == 0) ? 1 : 0), __extension__ ({ if (params
[i].input.matrix.channels % params[i].input.matrix.partition ==
0) ; else __assert_fail ("params[i].input.matrix.channels % params[i].input.matrix.partition == 0"
, "ccv_convnet.c", 59, __extension__ __PRETTY_FUNCTION__); })
)
;
60 assert(params[i].output.convolutional.count % params[i].output.convolutional.partition == 0)((void) sizeof ((params[i].output.convolutional.count % params
[i].output.convolutional.partition == 0) ? 1 : 0), __extension__
({ if (params[i].output.convolutional.count % params[i].output
.convolutional.partition == 0) ; else __assert_fail ("params[i].output.convolutional.count % params[i].output.convolutional.partition == 0"
, "ccv_convnet.c", 60, __extension__ __PRETTY_FUNCTION__); })
)
;
61 assert(params[i].output.convolutional.partition % params[i].input.matrix.partition == 0)((void) sizeof ((params[i].output.convolutional.partition % params
[i].input.matrix.partition == 0) ? 1 : 0), __extension__ ({ if
(params[i].output.convolutional.partition % params[i].input.
matrix.partition == 0) ; else __assert_fail ("params[i].output.convolutional.partition % params[i].input.matrix.partition == 0"
, "ccv_convnet.c", 61, __extension__ __PRETTY_FUNCTION__); })
)
;
62 assert(params[i].output.convolutional.partition >= params[i].input.matrix.partition)((void) sizeof ((params[i].output.convolutional.partition >=
params[i].input.matrix.partition) ? 1 : 0), __extension__ ({
if (params[i].output.convolutional.partition >= params[i]
.input.matrix.partition) ; else __assert_fail ("params[i].output.convolutional.partition >= params[i].input.matrix.partition"
, "ccv_convnet.c", 62, __extension__ __PRETTY_FUNCTION__); })
)
;
63 layers[i].wnum = params[i].output.convolutional.rows * params[i].output.convolutional.cols * params[i].output.convolutional.channels / params[i].input.matrix.partition * params[i].output.convolutional.count;
64 layers[i].w = (float*)ccmallocmalloc(sizeof(float) * (layers[i].wnum + params[i].output.convolutional.count));
65 layers[i].bias = layers[i].w + layers[i].wnum;
66#ifdef HAVE_GSL1
67 for (j = 0; j < layers[i].wnum; j++)
68 layers[i].w[j] = (gsl_rng_uniform_pos(rng) * 2 - 1) * params[i].glorot / sqrtf(params[i].output.convolutional.rows * params[i].output.convolutional.cols * params[i].output.convolutional.channels / params[i].input.matrix.partition + params[i].output.convolutional.count);
69#else
70 for (j = 0; j < layers[i].wnum; j++)
71 layers[i].w[j] = 0;
72#endif
73 for (j = 0; j < params[i].output.convolutional.count; j++)
74 layers[i].bias[j] = params[i].bias;
75 break;
76 case CCV_CONVNET_FULL_CONNECT:
77 layers[i].wnum = params[i].input.node.count * params[i].output.full_connect.count;
78 layers[i].w = (float*)ccmallocmalloc(sizeof(float) * (layers[i].wnum + params[i].output.full_connect.count));
79 layers[i].bias = layers[i].w + layers[i].wnum;
80#ifdef HAVE_GSL1
81 for (j = 0; j < layers[i].wnum; j++)
82 layers[i].w[j] = (gsl_rng_uniform_pos(rng) * 2 - 1) * params[i].glorot / sqrtf(params[i].input.node.count + params[i].output.full_connect.count);
83#else
84 for (j = 0; j < layers[i].wnum; j++)
85 layers[i].w[j] = 0;
86#endif
87 for (j = 0; j < params[i].output.full_connect.count; j++)
88 layers[i].bias[j] = params[i].bias;
89 break;
90 default:
91 layers[i].wnum = 0;
92 layers[i].w = 0;
93 layers[i].bias = 0;
94 break;
95 }
96 }
97#ifdef HAVE_GSL1
98 gsl_rng_free(rng);
99#endif
100 return convnet;
101}
102
103int ccv_convnet_verify(ccv_convnet_t* convnet, int output)
104{
105 int i, out_rows, out_cols, out_partition, out_channels;
106 if (convnet->count < 1)
107 return -1;
108 // the last layer has to be full connect
109 if (convnet->layers[convnet->count - 1].type != CCV_CONVNET_FULL_CONNECT)
110 return -1;
111 // you cannot enable relu on the last layer
112 if (convnet->layers[convnet->count - 1].net.full_connect.relu)
113 return -1;
114 out_channels = 3;
115 for (i = 0; i < convnet->count; i++)
116 {
117 ccv_convnet_layer_t* layer = convnet->layers + i;
118 if (i > 0 && (out_rows != layer->input.matrix.rows || out_cols != layer->input.matrix.cols))
119 return -1;
120 // the input channels should be equal to the previous output channels, skip this check for full connect as it is meaningless
121 if (out_channels != layer->input.matrix.channels && layer->type != CCV_CONVNET_FULL_CONNECT)
122 return -1;
123 ccv_convnet_make_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &out_rows, &out_cols, &out_partition);
124 if (layer->type == CCV_CONVNET_CONVOLUTIONAL)
125 {
126 // check to see if the input matrix channel is equal to the expected input of the convolutional layer filters
127 if (layer->input.matrix.channels != layer->net.convolutional.channels)
128 return -1;
129 // if this layer is convolutional layer, its filter output should equal to next layer's channel input
130 out_channels = layer->net.convolutional.count;
131 }
132 }
133 if (out_rows * out_cols != output)
134 return -1;
135 int count = 0;
136 for (i = 0; i < convnet->count; i++)
137 {
138 ccv_convnet_layer_t* layer = convnet->layers + i;
139 if (layer->type == CCV_CONVNET_FULL_CONNECT)
140 {
141 count = i;
142 break;
143 }
144 }
145 // all the layers after the first full connect layer should only be full connect layer
146 for (i = count; i < convnet->count; i++)
147 if (convnet->layers[i].type != CCV_CONVNET_FULL_CONNECT ||
148 convnet->layers[i].input.matrix.rows * convnet->layers[i].input.matrix.cols * convnet->layers[i].input.matrix.channels != convnet->layers[i].input.node.count)
149 return -1;
150 return 0;
151}
152
153#endif
154
155#if defined(HAVE_SSE21) || defined(HAVE_NEON)
156
157static void _ccv_convnet_layer_simd_alloc_reserved(ccv_convnet_layer_t* layer)
158{
159 if (layer->reserved)
160 return;
161 int partition = layer->input.matrix.partition;
162 int ch = layer->net.convolutional.channels;
163 int count = layer->net.convolutional.count;
164 int kernel_rows = layer->net.convolutional.rows;
165 int kernel_cols = layer->net.convolutional.cols;
166 int ch_per_partition = ch / partition;
167 int count_per_4 = count / 4;
168 float* simd_w = (float*)ccmallocmalloc(sizeof(float) * layer->wnum);
169 int i, j, k, c;
170 for (k = 0; k < count_per_4; k++)
171 for (i = 0; i < kernel_rows * kernel_cols; i++)
172 for (j = 0; j < ch_per_partition; j++)
173 for (c = 0; c < 4; c++)
174 simd_w[(k * kernel_rows * kernel_cols * ch_per_partition + i * ch_per_partition + j) * 4 + c] = layer->w[(k * 4 + c) * kernel_rows * kernel_cols * ch_per_partition + i * ch_per_partition + j];
175 layer->reserved = simd_w;
176}
177
178#endif
179
180#define SIMD(x)((float*)((x)->reserved)) ((float*)((x)->reserved))
181
182#if defined(HAVE_SSE21)
183static inline void _ccv_convnet_convolutional_forward_propagate_sse2(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t* db, int rows, int cols, int ch, int count, int strides, int border, int kernel_rows, int kernel_cols, int ch_per_partition, int count_per_partition)
184{
185 assert(SIMD(layer))((void) sizeof ((((float*)((layer)->reserved))) ? 1 : 0), __extension__
({ if (((float*)((layer)->reserved))) ; else __assert_fail
("SIMD(layer)", "ccv_convnet.c", 185, __extension__ __PRETTY_FUNCTION__
); }))
;
186#define main_for(block) \
187 parallel_for(k, (count >> 2)){ int k; for ((k) = 0; (k) < ((count >> 2)); (k)++) { { \
188 int i, j, x, y, c; \
189 int p = k * 4 / count_per_partition; \
190 float* ap = a->data.f32 + p * ch_per_partition; \
191 float* bp = db->data.f32 + k * 4; \
192 float* layer_w = SIMD(layer)((float*)((layer)->reserved)) + k * 4 * kernel_rows * kernel_cols * ch_per_partition; \
193 float bias[4] __attribute__ ((__aligned__(16))); \
194 memcpy(bias, layer->bias + k * 4, sizeof(float) * 4); \
195 /* 4 accumulators */ \
196 __m128 z4 = _mm_setzero_ps(); \
197 for (i = 0; i < db->rows; i++) \
198 { \
199 int comy = ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (i * strides - border); \
200 int maxy = kernel_rows - comy - (i * strides + kernel_rows - ccv_min(a->rows + border, i * strides + kernel_rows)({ typeof (a->rows + border) _a = (a->rows + border); typeof
(i * strides + kernel_rows) _b = (i * strides + kernel_rows)
; (_a < _b) ? _a : _b; })
); \
201 comy *= ch_per_partition * kernel_cols; \
202 for (j = 0; j < db->cols; j++) \
203 { \
204 __m128 v40 = _mm_load_ps(bias); \
205 __m128 v41 = _mm_setzero_ps(); \
206 __m128 v42 = _mm_setzero_ps(); \
207 __m128 v43 = _mm_setzero_ps(); \
208 int comx = ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (j * strides - border); \
209 int maxx = kernel_cols - comx - (j * strides + kernel_cols - ccv_min(a->cols + border, j * strides + kernel_cols)({ typeof (a->cols + border) _a = (a->cols + border); typeof
(j * strides + kernel_cols) _b = (j * strides + kernel_cols)
; (_a < _b) ? _a : _b; })
); \
210 float* w = layer_w + (comx * ch_per_partition + comy) * 4; \
211 float* apz = ap + ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
* ch; \
212 /* when we have border, we simply do zero padding */ \
213 for (y = 0; y < maxy; y++) \
214 { \
215 /* special casing for these cases to speed up SIMD computation */ \
216 for (x = 0; x < maxx; x++) \
217 { \
218 c = 0; \
219 for (; c < ch_per_partition - 3; c += 4) \
220 { \
221 __m128 apz4 = _mm_loadu_ps(apz + x * ch + c); \
222 __m128 w40 = _mm_loadu_ps(w + (x * ch_per_partition + c) * 4); \
223 __m128 w41 = _mm_loadu_ps(w + (x * ch_per_partition + c + 1) * 4); \
224 __m128 w42 = _mm_loadu_ps(w + (x * ch_per_partition + c + 2) * 4); \
225 __m128 w43 = _mm_loadu_ps(w + (x * ch_per_partition + c + 3) * 4); \
226 __m128 apz40 = _mm_shuffle_ps(apz4, apz4, 0x00)(__m128)__builtin_ia32_shufps((__v4sf)(__m128)(apz4), (__v4sf
)(__m128)(apz4), (int)(0x00))
; \
227 __m128 apz41 = _mm_shuffle_ps(apz4, apz4, 0x55)(__m128)__builtin_ia32_shufps((__v4sf)(__m128)(apz4), (__v4sf
)(__m128)(apz4), (int)(0x55))
; \
228 __m128 apz42 = _mm_shuffle_ps(apz4, apz4, 0xAA)(__m128)__builtin_ia32_shufps((__v4sf)(__m128)(apz4), (__v4sf
)(__m128)(apz4), (int)(0xAA))
; \
229 __m128 apz43 = _mm_shuffle_ps(apz4, apz4, 0xFF)(__m128)__builtin_ia32_shufps((__v4sf)(__m128)(apz4), (__v4sf
)(__m128)(apz4), (int)(0xFF))
; \
230 v40 =_mm_add_ps(_mm_mul_ps(w40, apz40), v40); \
231 v41 =_mm_add_ps(_mm_mul_ps(w41, apz41), v41); \
232 v42 =_mm_add_ps(_mm_mul_ps(w42, apz42), v42); \
233 v43 =_mm_add_ps(_mm_mul_ps(w43, apz43), v43); \
234 } \
235 block /* insert executions for tail partition */ \
236 } \
237 w += kernel_cols * ch_per_partition * 4; \
238 apz += a->cols * ch; \
239 } \
240 __m128 v4 = _mm_max_ps(z4, _mm_add_ps(_mm_add_ps(v40, v41), _mm_add_ps(v42, v43))); \
241 _mm_storeu_ps(bp + j * count, v4); /* ReLU */ \
242 } \
243 bp += db->cols * count; \
244 ap += a->cols * ch * (ccv_max((i + 1) * strides - border, 0)({ typeof ((i + 1) * strides - border) _a = ((i + 1) * strides
- border); typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
); \
245 } \
246 } parallel_endfor} }
247 if (ch_per_partition % 4 == 0)
248 {
249 main_for();
250 } else if (ch_per_partition % 4 == 3) { // unroll the last for-loops
251#define block \
252 __m128 apz40 = _mm_load1_ps(apz + x * ch + c); \
253 __m128 apz41 = _mm_load1_ps(apz + x * ch + c + 1); \
254 __m128 apz42 = _mm_load1_ps(apz + x * ch + c + 2); \
255 __m128 w40 = _mm_loadu_ps(w + (x * ch_per_partition + c) * 4); \
256 __m128 w41 = _mm_loadu_ps(w + (x * ch_per_partition + c + 1) * 4); \
257 __m128 w42 = _mm_loadu_ps(w + (x * ch_per_partition + c + 2) * 4); \
258 v40 = _mm_add_ps(_mm_mul_ps(w40, apz40), v40); \
259 v41 = _mm_add_ps(_mm_mul_ps(w41, apz41), v41); \
260 v42 = _mm_add_ps(_mm_mul_ps(w42, apz42), v42);
261 main_for(block);
262#undef block
263 } else if (ch_per_partition % 4 == 2) { // unroll the last for-loops
264#define block \
265 __m128 apz40 = _mm_load1_ps(apz + x * ch + c); \
266 __m128 apz41 = _mm_load1_ps(apz + x * ch + c + 1); \
267 __m128 w40 = _mm_loadu_ps(w + (x * ch_per_partition + c) * 4); \
268 __m128 w41 = _mm_loadu_ps(w + (x * ch_per_partition + c + 1) * 4); \
269 v40 = _mm_add_ps(_mm_mul_ps(w40, apz40), v40); \
270 v41 = _mm_add_ps(_mm_mul_ps(w41, apz41), v41);
271 main_for(block);
272#undef block
273 } else {
274#define block \
275 __m128 apz4 = _mm_load1_ps(apz + x * ch + c); \
276 __m128 w4 = _mm_loadu_ps(w + (x * ch_per_partition + c) * 4); \
277 v40 = _mm_add_ps(_mm_mul_ps(w4, apz4), v40);
278 main_for(block);
279#undef block
280 }
281#undef main_for
282}
283#elif defined(HAVE_NEON)
284static inline void _ccv_convnet_convolutional_forward_propagate_neon(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t* db, int rows, int cols, int ch, int count, int strides, int border, int kernel_rows, int kernel_cols, int ch_per_partition, int count_per_partition)
285{
286 assert(SIMD(layer))((void) sizeof ((((float*)((layer)->reserved))) ? 1 : 0), __extension__
({ if (((float*)((layer)->reserved))) ; else __assert_fail
("SIMD(layer)", "ccv_convnet.c", 286, __extension__ __PRETTY_FUNCTION__
); }))
;
287#define main_for(block) \
288 parallel_for(k, (count >> 2)){ int k; for ((k) = 0; (k) < ((count >> 2)); (k)++) { { \
289 int i, j, x, y, c; \
290 int p = k * 4 / count_per_partition; \
291 float* ap = a->data.f32 + p * ch_per_partition; \
292 float* bp = db->data.f32 + k * 4; \
293 float* layer_w = SIMD(layer)((float*)((layer)->reserved)) + k * 4 * kernel_rows * kernel_cols * ch_per_partition; \
294 float bias[4] __attribute__ ((__aligned__(16))); \
295 memcpy(bias, layer->bias + k * 4, sizeof(float) * 4); \
296 float32x4_t z4 = vmovq_n_f32(0); \
297 for (i = 0; i < db->rows; i++) \
298 { \
299 int comy = ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (i * strides - border); \
300 int maxy = kernel_rows - comy - (i * strides + kernel_rows - ccv_min(a->rows + border, i * strides + kernel_rows)({ typeof (a->rows + border) _a = (a->rows + border); typeof
(i * strides + kernel_rows) _b = (i * strides + kernel_rows)
; (_a < _b) ? _a : _b; })
); \
301 comy *= ch_per_partition * kernel_cols; \
302 for (j = 0; j < db->cols; j++) \
303 { \
304 float32x4_t v40 = vld1q_f32(bias); \
305 float32x4_t v41 = vmovq_n_f32(0); \
306 int comx = ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (j * strides - border); \
307 int maxx = kernel_cols - comx - (j * strides + kernel_cols - ccv_min(a->cols + border, j * strides + kernel_cols)({ typeof (a->cols + border) _a = (a->cols + border); typeof
(j * strides + kernel_cols) _b = (j * strides + kernel_cols)
; (_a < _b) ? _a : _b; })
); \
308 float* w = layer_w + (comx * ch_per_partition + comy) * 4; \
309 float* apz = ap + ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
* ch; \
310 /* when we have border, we simply do zero padding */ \
311 for (y = 0; y < maxy; y++) \
312 { \
313 for (x = 0; x < maxx; x++) \
314 { \
315 c = 0; \
316 for (; c < ch_per_partition - 1; c += 2) \
317 { \
318 float32x2_t apz4 = vld1_f32(apz + x * ch + c); \
319 float32x4_t apz40 = vdupq_lane_f32(apz4, 0); \
320 float32x4_t apz41 = vdupq_lane_f32(apz4, 1); \
321 float32x4_t w40 = vld1q_f32(w + (x * ch_per_partition + c) * 4); \
322 float32x4_t w41 = vld1q_f32(w + (x * ch_per_partition + c + 1) * 4); \
323 v40 = vmlaq_f32(v40, w40, apz40); \
324 v41 = vmlaq_f32(v41, w41, apz41); \
325 } \
326 block /* insert executions for tail partition */ \
327 } \
328 w += kernel_cols * ch_per_partition * 4; \
329 apz += a->cols * ch; \
330 } \
331 float32x4_t v4 = vmaxq_f32(z4, vaddq_f32(v40, v41)); \
332 vst1q_f32(bp + j * count, v4); /* ReLU */ \
333 } \
334 bp += db->cols * count; \
335 ap += a->cols * ch * (ccv_max((i + 1) * strides - border, 0)({ typeof ((i + 1) * strides - border) _a = ((i + 1) * strides
- border); typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
); \
336 } \
337 } parallel_endfor} }
338 if (ch_per_partition % 2 == 0)
339 {
340 main_for();
341 } else { // unroll the last for-loops
342#define block \
343 float32x4_t apz4 = vmovq_n_f32(apz[x * ch + c]); \
344 float32x4_t w4 = vld1q_f32(w + (x * ch_per_partition + c) * 4); \
345 v40 = vmlaq_f32(v40, w4, apz4);
346 main_for(block);
347#undef block
348 }
349#undef main_for
350}
351#else
352static inline void _ccv_convnet_convolutional_forward_propagate_fallback(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t* db, int rows, int cols, int ch, int count, int strides, int border, int kernel_rows, int kernel_cols, int ch_per_partition, int count_per_partition)
353{
354 parallel_for(k, count){ int k; for ((k) = 0; (k) < (count); (k)++) { {
355 int i, j, x, y, c;
356 int p = k / count_per_partition;
357 float* ap = a->data.f32 + p * ch_per_partition;
358 float* bp = db->data.f32 + k;
359 float* layer_w = layer->w + k * kernel_rows * kernel_cols * ch_per_partition;
360 float bias = layer->bias[k];
361 for (i = 0; i < db->rows; i++)
362 {
363 int comy = ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (i * strides - border);
364 int maxy = kernel_rows - comy - (i * strides + kernel_rows - ccv_min(a->rows + border, i * strides + kernel_rows)({ typeof (a->rows + border) _a = (a->rows + border); typeof
(i * strides + kernel_rows) _b = (i * strides + kernel_rows)
; (_a < _b) ? _a : _b; })
);
365 comy *= ch_per_partition * kernel_cols;
366 for (j = 0; j < db->cols; j++)
367 {
368 float v = bias;
369 int comx = ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (j * strides - border);
370 int maxx = kernel_cols - comx - (j * strides + kernel_cols - ccv_min(a->cols + border, j * strides + kernel_cols)({ typeof (a->cols + border) _a = (a->cols + border); typeof
(j * strides + kernel_cols) _b = (j * strides + kernel_cols)
; (_a < _b) ? _a : _b; })
);
371 float* w = layer_w + comx * ch_per_partition + comy;
372 float* apz = ap + ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
* ch;
373 // when we have border, we simply do zero padding
374 for (y = 0; y < maxy; y++)
375 {
376 for (x = 0; x < maxx; x++)
377 for (c = 0; c < ch_per_partition; c++)
378 v += w[x * ch_per_partition + c] * apz[x * ch + c];
379 w += kernel_cols * ch_per_partition;
380 apz += a->cols * ch;
381 }
382 bp[j * count] = ccv_max(0, v)({ typeof (0) _a = (0); typeof (v) _b = (v); (_a > _b) ? _a
: _b; })
; // ReLU
383 }
384 bp += db->cols * count;
385 ap += a->cols * ch * (ccv_max((i + 1) * strides - border, 0)({ typeof ((i + 1) * strides - border) _a = ((i + 1) * strides
- border); typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
);
386 }
387 } parallel_endfor} }
388}
389#endif
390
391static void _ccv_convnet_convolutional_forward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b)
392{
393 int rows, cols, partition;
394 ccv_convnet_make_output(layer, a->rows, a->cols, &rows, &cols, &partition);
395 int ch = layer->net.convolutional.channels;
396 int count = layer->net.convolutional.count;
397 int strides = layer->net.convolutional.strides;
398 int border = layer->net.convolutional.border;
399 int kernel_rows = layer->net.convolutional.rows;
400 int kernel_cols = layer->net.convolutional.cols;
401 int type = CCV_32F | count;
402 assert(CCV_GET_CHANNEL(a->type) == ch)((void) sizeof ((((a->type) & 0xFFF) == ch) ? 1 : 0), __extension__
({ if (((a->type) & 0xFFF) == ch) ; else __assert_fail
("CCV_GET_CHANNEL(a->type) == ch", "ccv_convnet.c", 402, __extension__
__PRETTY_FUNCTION__); }))
;
403 assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F)((void) sizeof ((((a->type) & 0xFF000) == CCV_32F) ? 1
: 0), __extension__ ({ if (((a->type) & 0xFF000) == CCV_32F
) ; else __assert_fail ("CCV_GET_DATA_TYPE(a->type) == CCV_32F"
, "ccv_convnet.c", 403, __extension__ __PRETTY_FUNCTION__); }
))
;
404 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, rows, cols, type, type, 0);
405 int ch_per_partition = ch / partition;
406 int count_per_partition = count / partition;
407 assert(count_per_partition % 4 == 0)((void) sizeof ((count_per_partition % 4 == 0) ? 1 : 0), __extension__
({ if (count_per_partition % 4 == 0) ; else __assert_fail ("count_per_partition % 4 == 0"
, "ccv_convnet.c", 407, __extension__ __PRETTY_FUNCTION__); }
))
;
408#if defined(HAVE_SSE21) || defined(HAVE_NEON)
409 _ccv_convnet_layer_simd_alloc_reserved(layer);
410#endif
411#if defined(HAVE_SSE21)
412 _ccv_convnet_convolutional_forward_propagate_sse2(layer, a, db, rows, cols, ch, count, strides, border, kernel_rows, kernel_cols, ch_per_partition, count_per_partition);
413#elif defined(HAVE_NEON)
414 _ccv_convnet_convolutional_forward_propagate_neon(layer, a, db, rows, cols, ch, count, strides, border, kernel_rows, kernel_cols, ch_per_partition, count_per_partition);
415#else
416 _ccv_convnet_convolutional_forward_propagate_fallback(layer, a, db, rows, cols, ch, count, strides, border, kernel_rows, kernel_cols, ch_per_partition, count_per_partition);
417#endif
418}
419
420static void _ccv_convnet_full_connect_forward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b)
421{
422 assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F)((void) sizeof ((((a->type) & 0xFF000) == CCV_32F) ? 1
: 0), __extension__ ({ if (((a->type) & 0xFF000) == CCV_32F
) ; else __assert_fail ("CCV_GET_DATA_TYPE(a->type) == CCV_32F"
, "ccv_convnet.c", 422, __extension__ __PRETTY_FUNCTION__); }
))
;
423 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, layer->net.full_connect.count, 1, CCV_32F | CCV_C1, CCV_32F | CCV_C1, 0);
424 int ch = CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF);
425 int rows = a->rows, cols = a->cols;
426 // reshape a for gemm
427 assert(a->step == a->cols * CCV_GET_DATA_TYPE_SIZE(a->type) * ch)((void) sizeof ((a->step == a->cols * _ccv_get_data_type_size
[((a->type) & 0xFF000) >> 12] * ch) ? 1 : 0), __extension__
({ if (a->step == a->cols * _ccv_get_data_type_size[((
a->type) & 0xFF000) >> 12] * ch) ; else __assert_fail
("a->step == a->cols * CCV_GET_DATA_TYPE_SIZE(a->type) * ch"
, "ccv_convnet.c", 427, __extension__ __PRETTY_FUNCTION__); }
))
;
428 a->rows = rows * cols * ch, a->cols = 1, a->type = (a->type - ch) | CCV_C1;
429 assert(a->rows * db->rows == layer->wnum)((void) sizeof ((a->rows * db->rows == layer->wnum) ?
1 : 0), __extension__ ({ if (a->rows * db->rows == layer
->wnum) ; else __assert_fail ("a->rows * db->rows == layer->wnum"
, "ccv_convnet.c", 429, __extension__ __PRETTY_FUNCTION__); }
))
;
430 a->step = a->cols * CCV_GET_DATA_TYPE_SIZE(a->type)_ccv_get_data_type_size[((a->type) & 0xFF000) >>
12]
;
431 int i;
432 float* bptr = db->data.f32;
433 for (i = 0; i < db->rows; i++)
434 bptr[i] = layer->bias[i];
435 ccv_dense_matrix_t dw = ccv_dense_matrix(db->rows, a->rows, CCV_32F | CCV_C1, layer->w, 0);
436 ccv_gemm(&dw, a, 1, db, 1, 0, (ccv_matrix_t**)&db, 0); // supply db as matrix C is allowed
437 if (layer->net.full_connect.relu)
438 for (i = 0; i < db->rows; i++)
439 bptr[i] = ccv_max(0, bptr[i])({ typeof (0) _a = (0); typeof (bptr[i]) _b = (bptr[i]); (_a >
_b) ? _a : _b; })
; // relu
440 a->rows = rows, a->cols = cols, a->type = (a->type - CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF)) | ch;
441 a->step = a->cols * CCV_GET_DATA_TYPE_SIZE(a->type)_ccv_get_data_type_size[((a->type) & 0xFF000) >>
12]
* CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF);
442}
443
444static void _ccv_convnet_rnorm_forward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, ccv_dense_matrix_t** denoms)
445{
446 int rows, cols, partition;
447 ccv_convnet_make_output(layer, a->rows, a->cols, &rows, &cols, &partition);
448 int size = layer->net.rnorm.size;
449 float kappa = layer->net.rnorm.kappa;
450 float alpha = layer->net.rnorm.alpha;
451 float beta = layer->net.rnorm.beta;
452 int way = size / 2;
453 assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F)((void) sizeof ((((a->type) & 0xFF000) == CCV_32F) ? 1
: 0), __extension__ ({ if (((a->type) & 0xFF000) == CCV_32F
) ; else __assert_fail ("CCV_GET_DATA_TYPE(a->type) == CCV_32F"
, "ccv_convnet.c", 453, __extension__ __PRETTY_FUNCTION__); }
))
;
454 int ch = CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF);
455 int type = CCV_32F | ch;
456 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, rows, cols, type, type, 0);
457 int i, j, k, x, p;
458 float* ap = a->data.f32;
459 float* bp = db->data.f32;
460 int ch_per_partition = ch / partition;
461 if (denoms)
462 {
463 ccv_dense_matrix_t* ddenoms = *denoms = ccv_dense_matrix_renew(*denoms, rows, cols, type, type, 0);
464 float* dp = ddenoms->data.f32;
465 for (i = 0; i < db->rows; i++)
466 {
467 for (j = 0; j < db->cols; j++)
468 for (p = 0; p < partition; p++)
469 for (k = 0; k < ch_per_partition; k++)
470 {
471 float v = ap[j * ch + p * ch_per_partition + k];
472 float denom = 0;
473 for (x = ccv_max(k - way, 0)({ typeof (k - way) _a = (k - way); typeof (0) _b = (0); (_a >
_b) ? _a : _b; })
; x <= ccv_min(k + way, ch_per_partition - 1)({ typeof (k + way) _a = (k + way); typeof (ch_per_partition -
1) _b = (ch_per_partition - 1); (_a < _b) ? _a : _b; })
; x++)
474 denom += ap[j * ch + p * ch_per_partition + x] * ap[j * ch + p * ch_per_partition + x];
475 denom = kappa + alpha * denom;
476 dp[j * ch + p * ch_per_partition + k] = denom;
477 bp[j * ch + p * ch_per_partition + k] = v * powf(denom, -beta);
478 }
479 ap += a->cols * ch;
480 dp += ddenoms->cols * ch;
481 bp += db->cols * ch;
482 }
483 } else {
484 for (i = 0; i < db->rows; i++)
485 {
486 for (j = 0; j < db->cols; j++)
487 for (p = 0; p < partition; p++)
488 for (k = 0; k < ch_per_partition; k++)
489 {
490 float v = ap[j * ch + p * ch_per_partition + k];
491 float denom = 0;
492 for (x = ccv_max(k - way, 0)({ typeof (k - way) _a = (k - way); typeof (0) _b = (0); (_a >
_b) ? _a : _b; })
; x <= ccv_min(k + way, ch_per_partition - 1)({ typeof (k + way) _a = (k + way); typeof (ch_per_partition -
1) _b = (ch_per_partition - 1); (_a < _b) ? _a : _b; })
; x++)
493 denom += ap[j * ch + p * ch_per_partition + x] * ap[j * ch + p * ch_per_partition + x];
494 denom = kappa + alpha * denom;
495 bp[j * ch + p * ch_per_partition + k] = v * powf(denom, -beta);
496 }
497 ap += a->cols * ch;
498 bp += db->cols * ch;
499 }
500 }
501}
502
503static void _ccv_convnet_max_pool_forward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b)
504{
505 int rows, cols, partition;
506 ccv_convnet_make_output(layer, a->rows, a->cols, &rows, &cols, &partition);
507 int size = layer->net.pool.size;
508 int strides = layer->net.pool.strides;
509 int border = layer->net.pool.border;
510 assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F)((void) sizeof ((((a->type) & 0xFF000) == CCV_32F) ? 1
: 0), __extension__ ({ if (((a->type) & 0xFF000) == CCV_32F
) ; else __assert_fail ("CCV_GET_DATA_TYPE(a->type) == CCV_32F"
, "ccv_convnet.c", 510, __extension__ __PRETTY_FUNCTION__); }
))
;
511 int ch = CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF);
512 int type = CCV_32F | ch;
513 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, rows, cols, type, type, 0);
514 int i, j, k, x, y;
515 float* ap = a->data.f32;
516 float* bp = db->data.f32;
517 for (i = 0; i < db->rows; i++)
518 {
519 const int start_y = ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (i * strides - border);
520 const int end_y = size + ccv_min(i * strides + size - border, a->rows)({ typeof (i * strides + size - border) _a = (i * strides + size
- border); typeof (a->rows) _b = (a->rows); (_a < _b
) ? _a : _b; })
- (i * strides + size - border);
521 for (j = 0; j < db->cols; j++)
522 {
523 const int start_x = ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (j * strides - border);
524 const int end_x = size + ccv_min(j * strides + size - border, a->cols)({ typeof (j * strides + size - border) _a = (j * strides + size
- border); typeof (a->cols) _b = (a->cols); (_a < _b
) ? _a : _b; })
- (j * strides + size - border);
525 for (k = 0; k < ch; k++)
526 {
527 float v = 0;
528 for (y = start_y; y < end_y; y++)
529 for (x = start_x; x < end_x; x++)
530 if (x == start_x && y == start_y)
531 v = ap[(j * strides - border + x + (y - border) * a->cols) * ch + k];
532 else if (ap[(j * strides - border + x + (y - border) * a->cols) * ch + k] > v)
533 v = ap[(j * strides - border + x + (y - border) * a->cols) * ch + k];
534 bp[j * ch + k] = v;
535 }
536 }
537 ap += a->cols * ch * strides;
538 bp += db->cols * ch;
539 }
540}
541
542static void _ccv_convnet_average_pool_forward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b)
543{
544 int rows, cols, partition;
545 ccv_convnet_make_output(layer, a->rows, a->cols, &rows, &cols, &partition);
546 int size = layer->net.pool.size;
547 int strides = layer->net.pool.strides;
548 int border = layer->net.pool.border;
549 assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F)((void) sizeof ((((a->type) & 0xFF000) == CCV_32F) ? 1
: 0), __extension__ ({ if (((a->type) & 0xFF000) == CCV_32F
) ; else __assert_fail ("CCV_GET_DATA_TYPE(a->type) == CCV_32F"
, "ccv_convnet.c", 549, __extension__ __PRETTY_FUNCTION__); }
))
;
550 int ch = CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF);
551 int type = CCV_32F | ch;
552 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, rows, cols, type, type, 0);
553 int i, j, k, x, y;
554 float* ap = a->data.f32;
555 float* bp = db->data.f32;
556 for (i = 0; i < db->rows; i++)
557 {
558 const int start_y = ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (i * strides - border);
559 const int end_y = size + ccv_min(i * strides + size - border, a->rows)({ typeof (i * strides + size - border) _a = (i * strides + size
- border); typeof (a->rows) _b = (a->rows); (_a < _b
) ? _a : _b; })
- (i * strides + size - border);
560 for (j = 0; j < db->cols; j++)
561 {
562 const int start_x = ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (j * strides - border);
563 const int end_x = size + ccv_min(j * strides + size - border, a->cols)({ typeof (j * strides + size - border) _a = (j * strides + size
- border); typeof (a->cols) _b = (a->cols); (_a < _b
) ? _a : _b; })
- (j * strides + size - border);
564 for (k = 0; k < ch; k++)
565 {
566 float v = 0;
567 for (y = start_y; y < end_y; y++)
568 for (x = start_x; x < end_x; x++)
569 v += ap[(j * strides - border + x + (y - border) * a->cols) * ch + k];
570 bp[j * ch + k] = v / ((end_x - start_x) * (end_y - start_y));
571 }
572 }
573 ap += a->cols * ch * strides;
574 bp += db->cols * ch;
575 }
576}
577
578static void _ccv_convnet_layer_forward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, ccv_dense_matrix_t** denoms)
579{
580 switch(layer->type)
581 {
582 case CCV_CONVNET_CONVOLUTIONAL:
583 _ccv_convnet_convolutional_forward_propagate(layer, a, b);
584 break;
585 case CCV_CONVNET_FULL_CONNECT:
586 _ccv_convnet_full_connect_forward_propagate(layer, a, b);
587 break;
588 case CCV_CONVNET_LOCAL_RESPONSE_NORM:
589 _ccv_convnet_rnorm_forward_propagate(layer, a, b, denoms);
590 break;
591 case CCV_CONVNET_MAX_POOL:
592 _ccv_convnet_max_pool_forward_propagate(layer, a, b);
593 break;
594 case CCV_CONVNET_AVERAGE_POOL:
595 _ccv_convnet_average_pool_forward_propagate(layer, a, b);
596 break;
597 }
598}
599
600static void _ccv_convnet_full_connect_forward_propagate_parallel(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b)
601{
602 assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F)((void) sizeof ((((a->type) & 0xFF000) == CCV_32F) ? 1
: 0), __extension__ ({ if (((a->type) & 0xFF000) == CCV_32F
) ; else __assert_fail ("CCV_GET_DATA_TYPE(a->type) == CCV_32F"
, "ccv_convnet.c", 602, __extension__ __PRETTY_FUNCTION__); }
))
;
603 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, a->rows, layer->net.full_connect.count, CCV_32F | CCV_C1, CCV_32F | CCV_C1, 0);
604 // reshape a for gemm
605 int i, j;
606 float* bptr = db->data.f32;
607 for (i = 0; i < db->rows; i++)
608 {
609 for (j = 0; j < db->cols; j++)
610 bptr[j] = layer->bias[j];
611 bptr += db->cols;
612 }
613 ccv_dense_matrix_t dw = ccv_dense_matrix(db->cols, a->cols, CCV_32F | CCV_C1, layer->w, 0);
614 ccv_gemm(a, &dw, 1, db, 1, CCV_B_TRANSPOSE, (ccv_matrix_t**)&db, 0); // supply db as matrix C is allowed
615 bptr = db->data.f32;
616 if (layer->net.full_connect.relu)
617 for (i = 0; i < db->rows; i++)
618 {
619 for (j = 0; j < db->cols; j++)
620 bptr[j] = ccv_max(0, bptr[j])({ typeof (0) _a = (0); typeof (bptr[j]) _b = (bptr[j]); (_a >
_b) ? _a : _b; })
; // relu
621 bptr += db->cols;
622 }
623}
624
625static void _ccv_convnet_compute_softmax_parallel(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type)
626{
627 assert(CCV_GET_CHANNEL(a->type) == CCV_C1)((void) sizeof ((((a->type) & 0xFFF) == CCV_C1) ? 1 : 0
), __extension__ ({ if (((a->type) & 0xFFF) == CCV_C1)
; else __assert_fail ("CCV_GET_CHANNEL(a->type) == CCV_C1"
, "ccv_convnet.c", 627, __extension__ __PRETTY_FUNCTION__); }
))
;
628 assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F)((void) sizeof ((((a->type) & 0xFF000) == CCV_32F) ? 1
: 0), __extension__ ({ if (((a->type) & 0xFF000) == CCV_32F
) ; else __assert_fail ("CCV_GET_DATA_TYPE(a->type) == CCV_32F"
, "ccv_convnet.c", 628, __extension__ __PRETTY_FUNCTION__); }
))
;
629 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, 1, a->cols, CCV_32F | CCV_C1, CCV_32F | CCV_C1, 0);
630 ccv_zero(db);
631 int i, j;
632 float* aptr = a->data.f32;
633 float* bptr = db->data.f32;
634 float* cptr = (float*)ccmallocmalloc(sizeof(float) * a->cols);
635 for (i = 0; i < a->rows; i++)
636 {
637 double max = aptr[0];
638 for (j = 1; j < a->cols; j++)
639 if (aptr[j] > max)
640 max = aptr[j];
641 double tt = 0;
642 for (j = 0; j < a->cols; j++)
643 tt += (cptr[j] = expf(aptr[j] - max));
644 tt = 1.0 / tt;
645 for (j = 0; j < a->cols; j++)
646 bptr[j] += cptr[j] * tt;
647 aptr += a->cols;
648 }
649 ccfreefree(cptr);
650}
651
652#ifndef CASE_TESTS
653
654void ccv_convnet_encode(ccv_convnet_t* convnet, ccv_dense_matrix_t** a, ccv_dense_matrix_t** b, int batch)
655{
656#ifdef HAVE_CUDA1
657 if (convnet->use_cwc_accel)
658 cwc_convnet_encode(convnet, a, b, batch);
659 else {
660#endif
661 assert(batch == 1)((void) sizeof ((batch == 1) ? 1 : 0), __extension__ ({ if (batch
== 1) ; else __assert_fail ("batch == 1", "ccv_convnet.c", 661
, __extension__ __PRETTY_FUNCTION__); }))
;
662 assert(CCV_GET_CHANNEL((*a)->type) == convnet->channels)((void) sizeof (((((*a)->type) & 0xFFF) == convnet->
channels) ? 1 : 0), __extension__ ({ if ((((*a)->type) &
0xFFF) == convnet->channels) ; else __assert_fail ("CCV_GET_CHANNEL((*a)->type) == convnet->channels"
, "ccv_convnet.c", 662, __extension__ __PRETTY_FUNCTION__); }
))
;
663 assert((*a)->rows == convnet->rows)((void) sizeof (((*a)->rows == convnet->rows) ? 1 : 0),
__extension__ ({ if ((*a)->rows == convnet->rows) ; else
__assert_fail ("(*a)->rows == convnet->rows", "ccv_convnet.c"
, 663, __extension__ __PRETTY_FUNCTION__); }))
;
664 assert((*a)->cols == convnet->cols)((void) sizeof (((*a)->cols == convnet->cols) ? 1 : 0),
__extension__ ({ if ((*a)->cols == convnet->cols) ; else
__assert_fail ("(*a)->cols == convnet->cols", "ccv_convnet.c"
, 664, __extension__ __PRETTY_FUNCTION__); }))
;
665 int i;
666 // save the last layer of neuron cache in case that we encode to a different matrix
667 ccv_dense_matrix_t* out_neuron = convnet->acts[convnet->count - 1];
668 convnet->acts[convnet->count - 1] = *b;
669 _ccv_convnet_layer_forward_propagate(convnet->layers, *a, convnet->acts, convnet->denoms);
670 for (i = 1; i < convnet->count; i++)
671 _ccv_convnet_layer_forward_propagate(convnet->layers + i, convnet->acts[i - 1], convnet->acts + i, convnet->denoms + i);
672 if (convnet->acts + convnet->count - 1 != b)
673 {
674 *b = convnet->acts[convnet->count - 1];
675 // restore the last layer of neuron cache
676 convnet->acts[convnet->count - 1] = out_neuron;
677 }
678#ifdef HAVE_CUDA1
679 }
680#endif
681}
682
683// find the layer for scanning (it is the last convolutional layer)
684static int _ccv_convnet_find_scan(ccv_convnet_t* convnet)
685{
686 int i;
687 ccv_convnet_layer_t* layers = convnet->layers;
688 for (i = convnet->count - 1; i >= 0; i--)
689 if (layers[i].type == CCV_CONVNET_CONVOLUTIONAL)
690 return i;
691 return -1;
692}
693
694static int _ccv_convnet_derive_scale(ccv_convnet_t* convnet, int scan)
695{
696 int i, scale = 1;
697 for (i = scan; i >= 0; i--)
698 {
699 ccv_convnet_layer_t* layer = convnet->layers + i;
700 switch (layer->type)
701 {
702 case CCV_CONVNET_CONVOLUTIONAL:
703 scale *= layer->net.convolutional.strides;
704 break;
705 case CCV_CONVNET_MAX_POOL:
706 case CCV_CONVNET_AVERAGE_POOL:
707 scale *= layer->net.pool.strides;
708 break;
709 }
710 }
711 return scale;
712}
713
714static int _ccv_convnet_find_full_connect(ccv_convnet_t* convnet)
715{
716 int i;
717 for (i = 0; i < convnet->count; i++)
718 if (convnet->layers[i].type == CCV_CONVNET_FULL_CONNECT)
719 return i;
720 return -1;
721}
722
723void ccv_convnet_classify(ccv_convnet_t* convnet, ccv_dense_matrix_t** a, int symmetric, ccv_array_t** ranks, int tops, int batch)
724{
725#ifdef HAVE_CUDA1
726 if (convnet->use_cwc_accel)
727 cwc_convnet_classify(convnet, a, symmetric, ranks, tops, batch);
728 else {
729#endif
730 int i, j, k, t;
731 ccv_dense_matrix_t** b = (ccv_dense_matrix_t**)alloca(sizeof(ccv_dense_matrix_t*) * (convnet->count + 1))__builtin_alloca (sizeof(ccv_dense_matrix_t*) * (convnet->
count + 1))
;
732 int scan = _ccv_convnet_find_scan(convnet);
733 int scale = _ccv_convnet_derive_scale(convnet, scan);
734 int full_connect = _ccv_convnet_find_full_connect(convnet);
735 assert(scan >= 0 && scan < convnet->count)((void) sizeof ((scan >= 0 && scan < convnet->
count) ? 1 : 0), __extension__ ({ if (scan >= 0 &&
scan < convnet->count) ; else __assert_fail ("scan >= 0 && scan < convnet->count"
, "ccv_convnet.c", 735, __extension__ __PRETTY_FUNCTION__); }
))
;
736 assert(full_connect >= 0 && full_connect < convnet->count)((void) sizeof ((full_connect >= 0 && full_connect
< convnet->count) ? 1 : 0), __extension__ ({ if (full_connect
>= 0 && full_connect < convnet->count) ; else
__assert_fail ("full_connect >= 0 && full_connect < convnet->count"
, "ccv_convnet.c", 736, __extension__ __PRETTY_FUNCTION__); }
))
;
737 memset(b, 0, sizeof(ccv_dense_matrix_t*) * (convnet->count + 1));
738 for (i = 0; i < batch; i++)
739 {
740 assert(CCV_GET_CHANNEL(a[i]->type) == convnet->channels)((void) sizeof ((((a[i]->type) & 0xFFF) == convnet->
channels) ? 1 : 0), __extension__ ({ if (((a[i]->type) &
0xFFF) == convnet->channels) ; else __assert_fail ("CCV_GET_CHANNEL(a[i]->type) == convnet->channels"
, "ccv_convnet.c", 740, __extension__ __PRETTY_FUNCTION__); }
))
;
741 assert(a[i]->rows == convnet->input.height || a[i]->cols == convnet->input.width)((void) sizeof ((a[i]->rows == convnet->input.height ||
a[i]->cols == convnet->input.width) ? 1 : 0), __extension__
({ if (a[i]->rows == convnet->input.height || a[i]->
cols == convnet->input.width) ; else __assert_fail ("a[i]->rows == convnet->input.height || a[i]->cols == convnet->input.width"
, "ccv_convnet.c", 741, __extension__ __PRETTY_FUNCTION__); }
))
;
742 assert(a[i]->rows >= convnet->input.height && a[i]->cols >= convnet->input.width)((void) sizeof ((a[i]->rows >= convnet->input.height
&& a[i]->cols >= convnet->input.width) ? 1 :
0), __extension__ ({ if (a[i]->rows >= convnet->input
.height && a[i]->cols >= convnet->input.width
) ; else __assert_fail ("a[i]->rows >= convnet->input.height && a[i]->cols >= convnet->input.width"
, "ccv_convnet.c", 742, __extension__ __PRETTY_FUNCTION__); }
))
;
743 // find optimal rows and cols to slice to
744 int rows = convnet->rows + ((a[i]->rows - convnet->rows) / scale) * scale;
745 int cols = convnet->cols + ((a[i]->cols - convnet->cols) / scale) * scale;
746 assert(rows == convnet->input.height || cols == convnet->input.width)((void) sizeof ((rows == convnet->input.height || cols == convnet
->input.width) ? 1 : 0), __extension__ ({ if (rows == convnet
->input.height || cols == convnet->input.width) ; else __assert_fail
("rows == convnet->input.height || cols == convnet->input.width"
, "ccv_convnet.c", 746, __extension__ __PRETTY_FUNCTION__); }
))
;
747 assert(rows <= a[i]->rows && cols <= a[i]->cols)((void) sizeof ((rows <= a[i]->rows && cols <=
a[i]->cols) ? 1 : 0), __extension__ ({ if (rows <= a[i
]->rows && cols <= a[i]->cols) ; else __assert_fail
("rows <= a[i]->rows && cols <= a[i]->cols"
, "ccv_convnet.c", 747, __extension__ __PRETTY_FUNCTION__); }
))
;
748 ccv_dense_matrix_t* slice = 0;
749 ccv_slice(a[i], (ccv_matrix_t**)&slice, CCV_32F, (a[i]->rows - rows) / 2, (a[i]->cols - cols) / 2, rows, cols);
750 ccv_dense_matrix_t* mean_activity = 0;
751 // scale mean activity up to be substractable (from this one, the CPU implementation is an approximation of GPU implementation)
752 ccv_resample(convnet->mean_activity, &mean_activity, 0, rows, cols, CCV_INTER_CUBIC);
753 ccv_subtract(slice, mean_activity, (ccv_matrix_t**)b, CCV_32F);
754 ccv_matrix_free(mean_activity);
755 ccv_matrix_free(slice);
756 // doing the first few layers until the first scan layer
757 int out_rows, out_cols, out_partition;
758 ccv_dense_matrix_t* c = ccv_dense_matrix_new(5 * (!!symmetric + 1), convnet->layers[full_connect].input.node.count, CCV_32F | CCV_C1, 0, 0);
759 for (t = 0; t <= !!symmetric; t++)
760 {
761 rows = b[0]->rows, cols = b[0]->cols;
762 for (j = 0; j < scan + 1; j++)
763 {
764 ccv_convnet_layer_t* layer = convnet->layers + j;
765 ccv_convnet_make_output(layer, rows, cols, &out_rows, &out_cols, &out_partition);
766 _ccv_convnet_layer_forward_propagate(layer, b[j], b + j + 1, 0);
767 assert(b[j + 1]->rows == out_rows && b[j + 1]->cols == out_cols)((void) sizeof ((b[j + 1]->rows == out_rows && b[j
+ 1]->cols == out_cols) ? 1 : 0), __extension__ ({ if (b[
j + 1]->rows == out_rows && b[j + 1]->cols == out_cols
) ; else __assert_fail ("b[j + 1]->rows == out_rows && b[j + 1]->cols == out_cols"
, "ccv_convnet.c", 767, __extension__ __PRETTY_FUNCTION__); }
))
;
768 if (j > 0)
769 ccv_matrix_free(b[j]);
770 rows = out_rows, cols = out_cols;
771 }
772 int offsets[5][2] = {
773 {0, 0},
774 {cols - convnet->layers[scan + 1].input.matrix.cols, 0},
775 {(cols - convnet->layers[scan + 1].input.matrix.cols) / 2, (rows - convnet->layers[scan + 1].input.matrix.rows) / 2},
776 {0, rows - convnet->layers[scan + 1].input.matrix.rows},
777 {cols - convnet->layers[scan + 1].input.matrix.cols, rows - convnet->layers[scan + 1].input.matrix.rows},
778 };
779 for (k = 0; k < 5; k++)
780 {
781 ccv_dense_matrix_t* input = 0;
782 ccv_convnet_layer_t* layer = convnet->layers + scan + 1;
783 ccv_slice(b[scan + 1], (ccv_matrix_t**)&input, CCV_32F, offsets[k][1], offsets[k][0], layer->input.matrix.rows, layer->input.matrix.cols);
784 // copy the last layer for full connect compute
785 b[full_connect] = ccv_dense_matrix_new(convnet->layers[full_connect].input.matrix.rows, convnet->layers[full_connect].input.matrix.cols, CCV_NO_DATA_ALLOC | CCV_32F | convnet->layers[full_connect].input.matrix.channels, c->data.f32 + (t * 5 + k) * convnet->layers[full_connect].input.node.count, 0);
786 for (j = scan + 1; j < full_connect; j++)
787 {
788 layer = convnet->layers + j;
789 _ccv_convnet_layer_forward_propagate(layer, j > scan + 1 ? b[j] : input, b + j + 1, 0);
790 if (j > scan + 1)
791 ccv_matrix_free(b[j]);
792 else
793 ccv_matrix_free(input);
794 }
795 ccv_matrix_free(b[full_connect]);
796 // set it to 0
797 memset(b + scan + 2, 0, sizeof(ccv_dense_matrix_t*) * (full_connect - scan - 1));
798 }
799 ccv_matrix_free(b[scan + 1]);
800 memset(b + 1, 0, sizeof(ccv_dense_matrix_t*) * (scan + 1));
801 if (t < !!symmetric)
802 ccv_flip(b[0], &b[0], 0, CCV_FLIP_X);
803 }
804 ccv_matrix_free(b[0]);
805 // now have everything in c, do the last full connect propagate
806 b[full_connect] = c;
807 for (j = full_connect; j < convnet->count; j++)
808 {
809 ccv_convnet_layer_t* layer = convnet->layers + j;
810 assert(layer->type == CCV_CONVNET_FULL_CONNECT)((void) sizeof ((layer->type == CCV_CONVNET_FULL_CONNECT) ?
1 : 0), __extension__ ({ if (layer->type == CCV_CONVNET_FULL_CONNECT
) ; else __assert_fail ("layer->type == CCV_CONVNET_FULL_CONNECT"
, "ccv_convnet.c", 810, __extension__ __PRETTY_FUNCTION__); }
))
;
811 _ccv_convnet_full_connect_forward_propagate_parallel(layer, b[j], b + j + 1);
812 ccv_matrix_free(b[j]);
813 }
814 ccv_dense_matrix_t* softmax = 0;
815 _ccv_convnet_compute_softmax_parallel(b[convnet->count], &softmax, 0);
816 ccv_matrix_free(b[convnet->count]);
817 ranks[i] = ccv_array_new(sizeof(ccv_classification_t), tops, 0);
818 float* r = softmax->data.f32;
819 assert(tops <= softmax->cols)((void) sizeof ((tops <= softmax->cols) ? 1 : 0), __extension__
({ if (tops <= softmax->cols) ; else __assert_fail ("tops <= softmax->cols"
, "ccv_convnet.c", 819, __extension__ __PRETTY_FUNCTION__); }
))
;
820 for (j = 0; j < tops; j++)
821 {
822 float max_val = -1;
823 int max_idx = -1;
824 for (k = 0; k < softmax->cols; k++)
825 if (r[k] >= 0 && r[k] > max_val)
826 max_val = r[k], max_idx = k;
827 assert(max_idx >= 0)((void) sizeof ((max_idx >= 0) ? 1 : 0), __extension__ ({ if
(max_idx >= 0) ; else __assert_fail ("max_idx >= 0", "ccv_convnet.c"
, 827, __extension__ __PRETTY_FUNCTION__); }))
;
828 r[max_idx] = -1;
829 ccv_classification_t classification = {
830 .id = max_idx,
831 .confidence = max_val / ((!!symmetric + 1) * 5),
832 };
833 ccv_array_push(ranks[i], &classification);
834 }
835 ccv_matrix_free(softmax);
836 memset(b, 0, sizeof(ccv_dense_matrix_t*) * (convnet->count + 1));
837 }
838#ifdef HAVE_CUDA1
839 }
840#endif
841}
842
843#endif
844
845#ifdef HAVE_GSL1
846
847// compute back propagated gradient & weight update delta
848static void _ccv_convnet_convolutional_backward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t* n, ccv_dense_matrix_t* m, ccv_dense_matrix_t** b, ccv_convnet_layer_t* update_params)
849{
850 // a is the input gradient (for back prop).
851 // x is the input (for forward prop), b is the output gradient (gradient, or known as propagated error)
852 // note that y (the output from forward prop) is not included because the full connect net is simple enough that we don't need it
853 int rows, cols, partition;
854 ccv_convnet_make_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
855 int ch = layer->net.convolutional.channels;
856 int count = layer->net.convolutional.count;
857 int strides = layer->net.convolutional.strides;
858 int border = layer->net.convolutional.border;
859 int kernel_rows = layer->net.convolutional.rows;
860 int kernel_cols = layer->net.convolutional.cols;
861 assert(a->rows == rows)((void) sizeof ((a->rows == rows) ? 1 : 0), __extension__ (
{ if (a->rows == rows) ; else __assert_fail ("a->rows == rows"
, "ccv_convnet.c", 861, __extension__ __PRETTY_FUNCTION__); }
))
;
862 assert(a->cols == cols)((void) sizeof ((a->cols == cols) ? 1 : 0), __extension__ (
{ if (a->cols == cols) ; else __assert_fail ("a->cols == cols"
, "ccv_convnet.c", 862, __extension__ __PRETTY_FUNCTION__); }
))
;
863 assert(CCV_GET_CHANNEL(a->type) == count)((void) sizeof ((((a->type) & 0xFFF) == count) ? 1 : 0
), __extension__ ({ if (((a->type) & 0xFFF) == count) ;
else __assert_fail ("CCV_GET_CHANNEL(a->type) == count", "ccv_convnet.c"
, 863, __extension__ __PRETTY_FUNCTION__); }))
;
864 int a_rows = a->rows, a_cols = a->cols, a_ch = CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF);
865 a->rows = rows, a->cols = cols, a->type = (a->type - a_ch) | count;
866 assert(CCV_GET_CHANNEL(m->type) == ch)((void) sizeof ((((m->type) & 0xFFF) == ch) ? 1 : 0), __extension__
({ if (((m->type) & 0xFFF) == ch) ; else __assert_fail
("CCV_GET_CHANNEL(m->type) == ch", "ccv_convnet.c", 866, __extension__
__PRETTY_FUNCTION__); }))
;
867 assert(CCV_GET_DATA_TYPE(m->type) == CCV_32F)((void) sizeof ((((m->type) & 0xFF000) == CCV_32F) ? 1
: 0), __extension__ ({ if (((m->type) & 0xFF000) == CCV_32F
) ; else __assert_fail ("CCV_GET_DATA_TYPE(m->type) == CCV_32F"
, "ccv_convnet.c", 867, __extension__ __PRETTY_FUNCTION__); }
))
;
868 int count_per_partition = count / partition;
869 int ch_per_partition = ch / partition;
870 // update weight gradient
871 parallel_for(k, count){ int k; for ((k) = 0; (k) < (count); (k)++) { {
872 int i, j, x, y, c;
873 int p = k / count_per_partition;
874 float* mp = m->data.f32 + p * ch_per_partition;
875 float* ap = a->data.f32 + k;
876 float* np = n->data.f32 + k;
877 float* update_w = update_params->w + k * kernel_rows * kernel_cols * ch_per_partition;
878 float bias = 0;
879 for (i = 0; i < rows; i++)
880 {
881 int comy = ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (i * strides - border);
882 int maxy = kernel_rows - comy - (i * strides + kernel_rows - ccv_min(m->rows + border, i * strides + kernel_rows)({ typeof (m->rows + border) _a = (m->rows + border); typeof
(i * strides + kernel_rows) _b = (i * strides + kernel_rows)
; (_a < _b) ? _a : _b; })
);
883 comy *= ch_per_partition * kernel_cols;
884 for (j = 0; j < cols; j++)
885 {
886 if (np[j * count] > 0)
887 { /* when np is bigger than 0, relu continues to update the weight, otherwise it stops */
888 float v = ap[j * count];
889 bias += v;
890 int comx = ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (j * strides - border);
891 int maxx = kernel_cols - comx - (j * strides + kernel_cols - ccv_min(m->cols + border, j * strides + kernel_cols)({ typeof (m->cols + border) _a = (m->cols + border); typeof
(j * strides + kernel_cols) _b = (j * strides + kernel_cols)
; (_a < _b) ? _a : _b; })
);
892 float* w = update_w + comx * ch_per_partition + comy;
893 float* mpz = mp + ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
* ch;
894 /* when we have border, we simply do zero padding */
895 for (y = 0; y < maxy; y++)
896 {
897 for (x = 0; x < maxx; x++)
898 for (c = 0; c < ch_per_partition; c++)
899 w[x * ch_per_partition + c] += v * mpz[x * ch + c];
900 w += kernel_cols * ch_per_partition;
901 mpz += m->cols * ch;
902 }
903 }
904 }
905 ap += a->cols * count;
906 np += n->cols * count;
907 mp += m->cols * ch * (ccv_max((i + 1) * strides - border, 0)({ typeof ((i + 1) * strides - border) _a = ((i + 1) * strides
- border); typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
);
908 }
909 update_params->bias[k] += bias;
910 } parallel_endfor} }
911 if (b)
912 {
913 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, m->rows, m->cols, CCV_32F | CCV_GET_CHANNEL(m->type)((m->type) & 0xFFF), CCV_32F | CCV_GET_CHANNEL(m->type)((m->type) & 0xFFF), 0);
914 // clear it up before propagate result
915 ccv_zero(db);
916 int k;
917 for (k = 0; k < count; k++)
918 {
919 int i, j, x, y, c;
920 int p = k / count_per_partition;
921 float* bp = db->data.f32 + p * ch_per_partition;
922 float* ap = a->data.f32 + k;
923 float* np = n->data.f32 + k;
924 float* layer_w = layer->w + k * kernel_rows * kernel_cols * ch_per_partition;
925 for (i = 0; i < rows; i++)
926 {
927 int comy = ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (i * strides - border);
928 int maxy = kernel_rows - comy - (i * strides + kernel_rows - ccv_min(db->rows + border, i * strides + kernel_rows)({ typeof (db->rows + border) _a = (db->rows + border);
typeof (i * strides + kernel_rows) _b = (i * strides + kernel_rows
); (_a < _b) ? _a : _b; })
);
929 comy *= ch_per_partition * kernel_cols;
930 for (j = 0; j < cols; j++)
931 {
932 if (np[j * count] > 0)
933 { /* when np is bigger than 0, relu continues to update the weight, otherwise it stops */
934 float v = ap[j * count];
935 int comx = ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (j * strides - border);
936 int maxx = kernel_cols - comx - (j * strides + kernel_cols - ccv_min(db->cols + border, j * strides + kernel_cols)({ typeof (db->cols + border) _a = (db->cols + border);
typeof (j * strides + kernel_cols) _b = (j * strides + kernel_cols
); (_a < _b) ? _a : _b; })
);
937 float* w = layer_w + comx * ch_per_partition + comy;
938 float* bpz = bp + ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
* ch;
939 /* when we have border, we simply do zero padding */
940 for (y = 0; y < maxy; y++)
941 {
942 for (x = 0; x < maxx; x++)
943 for (c = 0; c < ch_per_partition; c++)
944 bpz[x * ch + c] += v * w[x * ch_per_partition + c];
945 w += kernel_cols * ch_per_partition;
946 bpz += db->cols * ch;
947 }
948 }
949 }
950 ap += a->cols * count;
951 np += n->cols * count;
952 bp += db->cols * ch * (ccv_max((i + 1) * strides - border, 0)({ typeof ((i + 1) * strides - border) _a = ((i + 1) * strides
- border); typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
);
953 }
954 }
955 }
956 a->rows = a_rows, a->cols = a_cols, a->type = (a->type - CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF)) | a_ch;
957}
958
959static void _ccv_convnet_full_connect_backward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t* y, ccv_dense_matrix_t* x, ccv_dense_matrix_t** b, ccv_convnet_layer_t* update_params)
960{
961 // a is the input gradient (for back prop), y is the output (for forward prop)
962 // x is the input (for forward prop), b is the output gradient (gradient, or known as propagated error)
963 ccv_dense_matrix_t* db = 0;
964 if (b)
27
Taking false branch
965 db = *b = ccv_dense_matrix_renew(*b, x->rows, x->cols, CCV_32F | CCV_GET_CHANNEL(x->type)((x->type) & 0xFFF), CCV_32F | CCV_GET_CHANNEL(x->type)((x->type) & 0xFFF), 0);
966 int x_rows = x->rows, x_cols = x->cols, x_ch = CCV_GET_CHANNEL(x->type)((x->type) & 0xFFF);
967 x->rows = x_rows * x_cols * x_ch, x->cols = 1, x->type = (x->type - x_ch) | CCV_C1;
968 x->step = x->cols * CCV_GET_DATA_TYPE_SIZE(x->type)_ccv_get_data_type_size[((x->type) & 0xFF000) >>
12]
;
969 int i;
970 if (layer->net.full_connect.relu)
28
Assuming the condition is false
29
Taking false branch
971 for (i = 0; i < y->rows; i++)
972 if (y->data.f32[i] <= 0)
973 a->data.f32[i] = 0;
974 ccv_dense_matrix_t w = ccv_dense_matrix(a->rows, x->rows, CCV_32F | CCV_C1, update_params->w, 0);
30
4th function call argument is an uninitialized value
975 ccv_dense_matrix_t* dw = &w;
976 // compute bias gradient
977 ccv_dense_matrix_t bias = ccv_dense_matrix(a->rows, 1, CCV_32F | CCV_C1, update_params->bias, 0);
978 ccv_dense_matrix_t* dbias = &bias;
979 ccv_add(a, dbias, (ccv_matrix_t**)&dbias, 0);
980 // compute weight gradient
981 ccv_gemm(a, x, 1, dw, 1, CCV_B_TRANSPOSE, (ccv_matrix_t**)&dw, 0);
982 w = ccv_dense_matrix(a->rows, x->rows, CCV_32F | CCV_C1, layer->w, 0);
983 // propagate error
984 if (db)
985 {
986 db->rows = x->rows, db->cols = x->cols, db->type = (db->type - x_ch) | CCV_C1;
987 db->step = db->cols * CCV_GET_DATA_TYPE_SIZE(db->type)_ccv_get_data_type_size[((db->type) & 0xFF000) >>
12]
;
988 ccv_gemm(&w, a, 1, 0, 0, CCV_A_TRANSPOSE, (ccv_matrix_t**)&db, 0);
989 db->rows = x_rows, db->cols = x_cols, db->type = (db->type - CCV_GET_CHANNEL(db->type)((db->type) & 0xFFF)) | x_ch;
990 db->step = db->cols * CCV_GET_DATA_TYPE_SIZE(db->type)_ccv_get_data_type_size[((db->type) & 0xFF000) >>
12]
* CCV_GET_CHANNEL(db->type)((db->type) & 0xFFF);
991 }
992 x->rows = x_rows, x->cols = x_cols, x->type = (x->type - CCV_GET_CHANNEL(x->type)((x->type) & 0xFFF)) | x_ch;
993 x->step = x->cols * CCV_GET_DATA_TYPE_SIZE(x->type)_ccv_get_data_type_size[((x->type) & 0xFF000) >>
12]
* CCV_GET_CHANNEL(x->type)((x->type) & 0xFFF);
994}
995
996static void _ccv_convnet_rnorm_backward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t* n, ccv_dense_matrix_t* m, ccv_dense_matrix_t* denoms, ccv_dense_matrix_t** b)
997{
998 int rows, cols, partition;
999 ccv_convnet_make_output(layer, layer->input.matrix.rows, layer->input.matrix.cols, &rows, &cols, &partition);
1000 int size = layer->net.rnorm.size;
1001 float alpha = layer->net.rnorm.alpha;
1002 float beta = layer->net.rnorm.beta;
1003 int way = size / 2;
1004 assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F)((void) sizeof ((((a->type) & 0xFF000) == CCV_32F) ? 1
: 0), __extension__ ({ if (((a->type) & 0xFF000) == CCV_32F
) ; else __assert_fail ("CCV_GET_DATA_TYPE(a->type) == CCV_32F"
, "ccv_convnet.c", 1004, __extension__ __PRETTY_FUNCTION__); }
))
;
1005 int ch = CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF);
1006 int type = CCV_32F | ch;
1007 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, rows, cols, type, type, 0);
1008 int i, j, k, x, p;
1009 float* ap = a->data.f32;
1010 float* np = n->data.f32;
1011 float* mp = m->data.f32;
1012 float* dp = denoms->data.f32;
1013 float* bp = db->data.f32;
1014 int ch_per_partition = ch / partition;
1015 for (i = 0; i < db->rows; i++)
1016 {
1017 for (j = 0; j < db->cols; j++)
1018 for (p = 0; p < partition; p++)
1019 for (k = 0; k < ch_per_partition; k++)
1020 {
1021 float nom = 0;
1022 for (x = ccv_max(k - way, 0)({ typeof (k - way) _a = (k - way); typeof (0) _b = (0); (_a >
_b) ? _a : _b; })
; x <= ccv_min(k + way, ch_per_partition - 1)({ typeof (k + way) _a = (k + way); typeof (ch_per_partition -
1) _b = (ch_per_partition - 1); (_a < _b) ? _a : _b; })
; x++)
1023 nom += -2 * alpha * beta * ap[j * ch + x + p * ch_per_partition] * np[j * ch + x + p * ch_per_partition] / dp[j * ch + x + p * ch_per_partition];
1024 bp[j * ch + k + p * ch_per_partition] = mp[j * ch + k + p * ch_per_partition] * nom + ap[j * ch + k + p * ch_per_partition] * powf(dp[j * ch + k + p * ch_per_partition], -beta);
1025 }
1026 ap += a->cols * ch;
1027 np += n->cols * ch;
1028 mp += m->cols * ch;
1029 dp += denoms->cols * ch;
1030 bp += db->cols * ch;
1031 }
1032}
1033
1034static void _ccv_convnet_max_pool_backward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t* n, ccv_dense_matrix_t* m, ccv_dense_matrix_t** b)
1035{
1036 // a is the input gradient (for back prop), y is the output (from forward prop),
1037 // x is the input (for forward prop), b is the output gradient (gradient, or known as propagated error)
1038 // pooling layer doesn't need the dropout
1039 if (b)
1040 {
1041 assert(CCV_GET_CHANNEL(a->type) == CCV_GET_CHANNEL(n->type))((void) sizeof ((((a->type) & 0xFFF) == ((n->type) &
0xFFF)) ? 1 : 0), __extension__ ({ if (((a->type) & 0xFFF
) == ((n->type) & 0xFFF)) ; else __assert_fail ("CCV_GET_CHANNEL(a->type) == CCV_GET_CHANNEL(n->type)"
, "ccv_convnet.c", 1041, __extension__ __PRETTY_FUNCTION__); }
))
;
1042 assert(CCV_GET_CHANNEL(a->type) == CCV_GET_CHANNEL(m->type))((void) sizeof ((((a->type) & 0xFFF) == ((m->type) &
0xFFF)) ? 1 : 0), __extension__ ({ if (((a->type) & 0xFFF
) == ((m->type) & 0xFFF)) ; else __assert_fail ("CCV_GET_CHANNEL(a->type) == CCV_GET_CHANNEL(m->type)"
, "ccv_convnet.c", 1042, __extension__ __PRETTY_FUNCTION__); }
))
;
1043 int ch = CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF);
1044 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, m->rows, m->cols, CCV_32F | ch, CCV_32F | ch, 0);
1045 ccv_zero(db);
1046 int size = layer->net.pool.size;
1047 int strides = layer->net.pool.strides;
1048 int border = layer->net.pool.border;
1049 int i, j, k, x, y;
1050 float* ap = a->data.f32;
1051 float* bp = db->data.f32;
1052 float* np = n->data.f32;
1053 float* mp = m->data.f32;
1054 for (i = 0; i < a->rows; i++)
1055 {
1056 const int start_y = ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (i * strides - border);
1057 const int end_y = size + ccv_min(i * strides + size - border, db->rows)({ typeof (i * strides + size - border) _a = (i * strides + size
- border); typeof (db->rows) _b = (db->rows); (_a <
_b) ? _a : _b; })
- (i * strides + size - border);
1058 for (j = 0; j < a->cols; j++)
1059 {
1060 const int start_x = ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (j * strides - border);
1061 const int end_x = size + ccv_min(j * strides + size - border, db->cols)({ typeof (j * strides + size - border) _a = (j * strides + size
- border); typeof (db->cols) _b = (db->cols); (_a <
_b) ? _a : _b; })
- (j * strides + size - border);
1062 for (k = 0; k < ch; k++)
1063 {
1064 float v = np[j * ch + k];
1065 float u = ap[j * ch + k];
1066 for (y = start_y; y < end_y; y++)
1067 for (x = start_x; x < end_x; x++)
1068 // we have to do direct comparison otherwise it will contribute to too many cells
1069 // and the propagation won't work. But CPU will have different result comparing with GPU
1070 if (mp[(j * strides - border + x + (y - border) * m->cols) * ch + k] == v)
1071 bp[(j * strides - border + x + (y - border) * db->cols) * ch + k] += u;
1072 }
1073 }
1074 ap += a->cols * ch;
1075 np += n->cols * ch;
1076 bp += db->cols * ch * strides;
1077 mp += m->cols * ch * strides;
1078 }
1079 }
1080}
1081
1082static void _ccv_convnet_average_pool_backward_propagate(ccv_convnet_layer_t* layer, ccv_dense_matrix_t* a, ccv_dense_matrix_t* m, ccv_dense_matrix_t** b)
1083{
1084 // a is the input gradient (for back prop), y is the output (from forward prop),
1085 // x is the input (for forward prop), b is the output gradient (gradient, or known as propagated error)
1086 // pooling layer doesn't need the dropout
1087 if (b)
1088 {
1089 assert(CCV_GET_CHANNEL(a->type) == CCV_GET_CHANNEL(m->type))((void) sizeof ((((a->type) & 0xFFF) == ((m->type) &
0xFFF)) ? 1 : 0), __extension__ ({ if (((a->type) & 0xFFF
) == ((m->type) & 0xFFF)) ; else __assert_fail ("CCV_GET_CHANNEL(a->type) == CCV_GET_CHANNEL(m->type)"
, "ccv_convnet.c", 1089, __extension__ __PRETTY_FUNCTION__); }
))
;
1090 int ch = CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF);
1091 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, m->rows, m->cols, CCV_32F | ch, CCV_32F | ch, 0);
1092 ccv_zero(db);
1093 int size = layer->net.pool.size;
1094 int strides = layer->net.pool.strides;
1095 int border = layer->net.pool.border;
1096 int i, j, k, x, y;
1097 float* ap = a->data.f32;
1098 float* bp = db->data.f32;
1099 for (i = 0; i < a->rows; i++)
1100 {
1101 const int start_y = ccv_max(i * strides - border, 0)({ typeof (i * strides - border) _a = (i * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (i * strides - border);
1102 const int end_y = size + ccv_min(i * strides + size - border, db->rows)({ typeof (i * strides + size - border) _a = (i * strides + size
- border); typeof (db->rows) _b = (db->rows); (_a <
_b) ? _a : _b; })
- (i * strides + size - border);
1103 for (j = 0; j < a->cols; j++)
1104 {
1105 const int start_x = ccv_max(j * strides - border, 0)({ typeof (j * strides - border) _a = (j * strides - border);
typeof (0) _b = (0); (_a > _b) ? _a : _b; })
- (j * strides - border);
1106 const int end_x = size + ccv_min(j * strides + size - border, db->cols)({ typeof (j * strides + size - border) _a = (j * strides + size
- border); typeof (db->cols) _b = (db->cols); (_a <
_b) ? _a : _b; })
- (j * strides + size - border);
1107 for (k = 0; k < ch; k++)
1108 {
1109 float u = ap[j * ch + k] / ((end_x - start_x) * (end_y - start_y));
1110 for (y = start_y; y < end_y; y++)
1111 for (x = start_x; x < end_x; x++)
1112 bp[(j * strides - border + x + (y - border) * db->cols) * ch + k] += u;
1113 }
1114 }
1115 ap += a->cols * ch;
1116 bp += db->cols * ch * strides;
1117 }
1118 }
1119}
1120
1121static void _ccv_convnet_propagate_loss(ccv_convnet_t* convnet, ccv_dense_matrix_t* a, ccv_dense_matrix_t* dloss, ccv_convnet_t* update_params)
1122{
1123 int i;
1124 ccv_convnet_layer_t* layer = convnet->layers + convnet->count - 1;
1125 assert(layer->type == CCV_CONVNET_FULL_CONNECT)((void) sizeof ((layer->type == CCV_CONVNET_FULL_CONNECT) ?
1 : 0), __extension__ ({ if (layer->type == CCV_CONVNET_FULL_CONNECT
) ; else __assert_fail ("layer->type == CCV_CONVNET_FULL_CONNECT"
, "ccv_convnet.c", 1125, __extension__ __PRETTY_FUNCTION__); }
))
; // the last layer has too be a full connect one to generate softmax result
22
Assuming the condition is true
23
Taking true branch
1126 _ccv_convnet_full_connect_backward_propagate(layer, dloss, convnet->acts[convnet->count - 1], convnet->acts[convnet->count - 2], convnet->count - 1 > 0 ? update_params->acts + convnet->count - 2 : 0, update_params->layers + convnet->count - 1);
24
Assuming the condition is false
25
'?' condition is false
26
Calling '_ccv_convnet_full_connect_backward_propagate'
1127 for (i = convnet->count - 2; i >= 0; i--)
1128 {
1129 layer = convnet->layers + i;
1130 switch (layer->type)
1131 {
1132 case CCV_CONVNET_CONVOLUTIONAL:
1133 _ccv_convnet_convolutional_backward_propagate(layer, update_params->acts[i], convnet->acts[i], i > 0 ? convnet->acts[i - 1] : a, i > 0 ? update_params->acts + i - 1 : 0, update_params->layers + i);
1134 break;
1135 case CCV_CONVNET_FULL_CONNECT:
1136 _ccv_convnet_full_connect_backward_propagate(layer, update_params->acts[i], convnet->acts[i], i > 0 ? convnet->acts[i - 1] : a, i > 0 ? update_params->acts + i - 1 : 0, update_params->layers + i);
1137 break;
1138 case CCV_CONVNET_LOCAL_RESPONSE_NORM:
1139 _ccv_convnet_rnorm_backward_propagate(layer, update_params->acts[i], convnet->acts[i], i > 0 ? convnet->acts[i - 1] : a, convnet->denoms[i], i > 0 ? update_params->acts + i - 1 : 0);
1140 break;
1141 case CCV_CONVNET_MAX_POOL:
1142 _ccv_convnet_max_pool_backward_propagate(layer, update_params->acts[i], convnet->acts[i], i > 0 ? convnet->acts[i - 1] : a, i > 0 ? update_params->acts + i - 1 : 0);
1143 break;
1144 case CCV_CONVNET_AVERAGE_POOL:
1145 _ccv_convnet_average_pool_backward_propagate(layer, update_params->acts[i], i > 0 ? convnet->acts[i - 1] : a, i > 0 ? update_params->acts + i - 1 : 0);
1146 break;
1147 }
1148 }
1149}
1150
1151static void _ccv_convnet_update(ccv_convnet_t* convnet, int batch, ccv_convnet_t* momentum, ccv_convnet_t* update_params, ccv_convnet_layer_train_param_t* layer_params)
1152{
1153 int i, j;
1154 float learn_rate;
1155 for (i = 0; i < convnet->count; i++)
1156 switch (update_params->layers[i].type)
1157 {
1158 case CCV_CONVNET_CONVOLUTIONAL:
1159 {
1160 float* w = convnet->layers[i].w;
1161 float* vw = momentum->layers[i].w;
1162 float* dw = update_params->layers[i].w;
1163 learn_rate = layer_params[i].w.learn_rate / batch;
1164 for (j = 0; j < convnet->layers[i].wnum; j++)
1165 {
1166 vw[j] = layer_params[i].w.momentum * vw[j] - layer_params[i].w.decay * layer_params[i].w.learn_rate * w[j] + learn_rate * dw[j];
1167 w[j] += vw[j];
1168 }
1169 float* bias = convnet->layers[i].bias;
1170 float* vbias = momentum->layers[i].bias;
1171 float* dbias = update_params->layers[i].bias;
1172 learn_rate = layer_params[i].bias.learn_rate / batch;
1173 for (j = 0; j < convnet->layers[i].net.convolutional.count; j++)
1174 {
1175 vbias[j] = layer_params[i].bias.momentum * vbias[j] - layer_params[i].bias.decay * layer_params[i].bias.learn_rate * bias[j] + learn_rate * dbias[j];
1176 bias[j] += vbias[j];
1177 }
1178 break;
1179 }
1180 case CCV_CONVNET_FULL_CONNECT:
1181 {
1182 float* w = convnet->layers[i].w;
1183 float* vw = momentum->layers[i].w;
1184 float* dw = update_params->layers[i].w;
1185 learn_rate = layer_params[i].w.learn_rate / batch;
1186 for (j = 0; j < convnet->layers[i].wnum; j++)
1187 {
1188 vw[j] = layer_params[i].w.momentum * vw[j] - layer_params[i].w.decay * layer_params[i].w.learn_rate * w[j] + learn_rate * dw[j];
1189 w[j] += vw[j];
1190 }
1191 float* bias = convnet->layers[i].bias;
1192 float* vbias = momentum->layers[i].bias;
1193 float* dbias = update_params->layers[i].bias;
1194 learn_rate = layer_params[i].bias.learn_rate / batch;
1195 for (j = 0; j < convnet->layers[i].net.full_connect.count; j++)
1196 {
1197 vbias[j] = layer_params[i].bias.momentum * vbias[j] - layer_params[i].bias.decay * layer_params[i].bias.learn_rate * bias[j] + learn_rate * dbias[j];
1198 bias[j] += vbias[j];
1199 }
1200 break;
1201 }
1202 }
1203}
1204
1205static void _ccv_convnet_update_zero(ccv_convnet_t* update_params)
1206{
1207 int i;
1208 for (i = 0; i < update_params->count; i++)
1209 switch (update_params->layers[i].type)
1210 {
1211 case CCV_CONVNET_CONVOLUTIONAL:
1212 memset(update_params->layers[i].w, 0, sizeof(float) * update_params->layers[i].wnum);
1213 memset(update_params->layers[i].bias, 0, sizeof(float) * update_params->layers[i].net.convolutional.count);
1214 break;
1215 case CCV_CONVNET_FULL_CONNECT:
1216 assert(update_params->layers[i].wnum % update_params->layers[i].net.full_connect.count == 0)((void) sizeof ((update_params->layers[i].wnum % update_params
->layers[i].net.full_connect.count == 0) ? 1 : 0), __extension__
({ if (update_params->layers[i].wnum % update_params->
layers[i].net.full_connect.count == 0) ; else __assert_fail (
"update_params->layers[i].wnum % update_params->layers[i].net.full_connect.count == 0"
, "ccv_convnet.c", 1216, __extension__ __PRETTY_FUNCTION__); }
))
;
1217 memset(update_params->layers[i].w, 0, sizeof(float) * update_params->layers[i].wnum);
1218 memset(update_params->layers[i].bias, 0, sizeof(float) * update_params->layers[i].net.full_connect.count);
1219 break;
1220 }
1221}
1222
1223static ccv_convnet_t* _ccv_convnet_update_new(ccv_convnet_t* convnet)
1224{
1225 ccv_convnet_t* update_params = (ccv_convnet_t*)ccmallocmalloc(sizeof(ccv_convnet_t) + sizeof(ccv_convnet_layer_t) * convnet->count + sizeof(ccv_dense_matrix_t*) * convnet->count);
8
Uninitialized value stored to field 'w'
1226 update_params->reserved = 0;
1227 update_params->layers = (ccv_convnet_layer_t*)(update_params + 1);
1228 update_params->acts = (ccv_dense_matrix_t**)(update_params->layers + convnet->count);
1229 memset(update_params->acts, 0, sizeof(ccv_dense_matrix_t*) * convnet->count);
1230 update_params->denoms = 0;
1231 update_params->input = convnet->input;
1232 update_params->rows = convnet->rows;
1233 update_params->cols = convnet->cols;
1234 update_params->count = convnet->count;
1235 update_params->channels = convnet->channels;
1236 update_params->mean_activity = 0;
1237 int i;
1238 for (i = 0; i < convnet->count; i++)
9
Assuming the condition is false
10
Loop condition is false. Execution continues on line 1264
1239 {
1240 update_params->layers[i].type = convnet->layers[i].type;
1241 update_params->layers[i].input = convnet->layers[i].input;
1242 update_params->layers[i].net = convnet->layers[i].net;
1243 update_params->layers[i].wnum = convnet->layers[i].wnum;
1244 update_params->layers[i].reserved = 0;
1245 switch (update_params->layers[i].type)
1246 {
1247 case CCV_CONVNET_CONVOLUTIONAL:
1248 update_params->layers[i].w = (float*)cccalloccalloc(update_params->layers[i].wnum + update_params->layers[i].net.convolutional.count, sizeof(float));
1249 update_params->layers[i].bias = update_params->layers[i].w + update_params->layers[i].wnum;
1250 break;
1251 case CCV_CONVNET_FULL_CONNECT:
1252 assert(update_params->layers[i].wnum % update_params->layers[i].net.full_connect.count == 0)((void) sizeof ((update_params->layers[i].wnum % update_params
->layers[i].net.full_connect.count == 0) ? 1 : 0), __extension__
({ if (update_params->layers[i].wnum % update_params->
layers[i].net.full_connect.count == 0) ; else __assert_fail (
"update_params->layers[i].wnum % update_params->layers[i].net.full_connect.count == 0"
, "ccv_convnet.c", 1252, __extension__ __PRETTY_FUNCTION__); }
))
;
1253 update_params->layers[i].w = (float*)cccalloccalloc(update_params->layers[i].wnum + update_params->layers[i].net.full_connect.count, sizeof(float));
1254 update_params->layers[i].bias = update_params->layers[i].w + update_params->layers[i].wnum;
1255 break;
1256 case CCV_CONVNET_LOCAL_RESPONSE_NORM:
1257 case CCV_CONVNET_MAX_POOL:
1258 case CCV_CONVNET_AVERAGE_POOL:
1259 update_params->layers[i].w = 0;
1260 update_params->layers[i].bias = 0;
1261 break;
1262 }
1263 }
1264 return update_params;
1265}
1266
1267static void _ccv_convnet_compute_softmax(ccv_dense_matrix_t* a, ccv_dense_matrix_t** b, int type)
1268{
1269 int ch = CCV_GET_CHANNEL(a->type)((a->type) & 0xFFF);
1270 assert(CCV_GET_DATA_TYPE(a->type) == CCV_32F)((void) sizeof ((((a->type) & 0xFF000) == CCV_32F) ? 1
: 0), __extension__ ({ if (((a->type) & 0xFF000) == CCV_32F
) ; else __assert_fail ("CCV_GET_DATA_TYPE(a->type) == CCV_32F"
, "ccv_convnet.c", 1270, __extension__ __PRETTY_FUNCTION__); }
))
;
1271 ccv_dense_matrix_t* db = *b = ccv_dense_matrix_renew(*b, a->rows, a->cols, CCV_32F | ch, CCV_32F | ch, 0);
1272 int i;
1273 float* aptr = a->data.f32;
1274 float* bptr = db->data.f32;
1275 double max = aptr[0];
1276 for (i = 1; i < a->rows * a->cols * ch; i++)
1277 if (aptr[i] > max)
1278 max = aptr[i];
1279 double tt = 0;
1280 for (i = 0; i < a->rows * a->cols * ch; i++)
1281 tt += (bptr[i] = expf(aptr[i] - max));
1282 tt = 1.0 / tt;
1283 for (i = 0; i < a->rows * a->cols * ch; i++)
1284 bptr[i] *= tt;
1285}
1286
1287static void _ccv_convnet_classify(ccv_convnet_t* convnet, ccv_dense_matrix_t** a, int* labels, int batch)
1288{
1289 assert(batch == 1)((void) sizeof ((batch == 1) ? 1 : 0), __extension__ ({ if (batch
== 1) ; else __assert_fail ("batch == 1", "ccv_convnet.c", 1289
, __extension__ __PRETTY_FUNCTION__); }))
;
1290 ccv_convnet_encode(convnet, a, convnet->acts + convnet->count - 1, 1);
1291 int i, c = 0;
1292 ccv_dense_matrix_t* b = convnet->acts[convnet->count - 1];
1293 float maxc = b->data.f32[0];
1294 for (i = 1; i < b->rows; i++)
1295 if (b->data.f32[i] > maxc)
1296 maxc = b->data.f32[i], c = i;
1297 labels[0] = c;
1298}
1299
1300#endif
1301
1302#ifndef CASE_TESTS
1303
1304void ccv_convnet_supervised_train(ccv_convnet_t* convnet, ccv_array_t* categorizeds, ccv_array_t* tests, const char* filename, ccv_convnet_train_param_t params)
1305{
1306#ifdef HAVE_GSL1
1307#ifdef HAVE_CUDA1
1308 if (convnet->use_cwc_accel)
1
Assuming the condition is false
2
Taking false branch
1309 cwc_convnet_supervised_train(convnet, categorizeds, tests, filename, params);
1310 else {
1311#endif
1312 int i, j, t;
1313 gsl_rng_env_setup();
1314 gsl_rng* rng = gsl_rng_alloc(gsl_rng_default);
1315 int aligned_padding = categorizeds->rnum % params.mini_batch;
1316 int aligned_rnum = categorizeds->rnum - aligned_padding;
1317 int* idx = (int*)ccmallocmalloc(sizeof(int) * (categorizeds->rnum + aligned_padding));
1318 for (i = 0; i < categorizeds->rnum; i++)
3
Assuming the condition is false
4
Loop condition is false. Execution continues on line 1320
1319 idx[i] = i;
1320 gsl_ran_shuffle(rng, idx, categorizeds->rnum, sizeof(int));
1321 // the last layer has to be full connect, thus we can use it as softmax layer
1322 assert(convnet->layers[convnet->count - 1].type == CCV_CONVNET_FULL_CONNECT)((void) sizeof ((convnet->layers[convnet->count - 1].type
== CCV_CONVNET_FULL_CONNECT) ? 1 : 0), __extension__ ({ if (
convnet->layers[convnet->count - 1].type == CCV_CONVNET_FULL_CONNECT
) ; else __assert_fail ("convnet->layers[convnet->count - 1].type == CCV_CONVNET_FULL_CONNECT"
, "ccv_convnet.c", 1322, __extension__ __PRETTY_FUNCTION__); }
))
;
5
Assuming the condition is true
6
Taking true branch
1323 int category_count = convnet->layers[convnet->count - 1].net.full_connect.count;
1324 ccv_convnet_t* update_params = _ccv_convnet_update_new(convnet);
7
Calling '_ccv_convnet_update_new'
11
Returning from '_ccv_convnet_update_new'
1325 ccv_convnet_t* momentum = _ccv_convnet_update_new(convnet);
1326 for (t = 0; t < params.max_epoch; t++)
12
Assuming the condition is true
13
Loop condition is true. Entering loop body
1327 {
1328 for (i = 0; i < aligned_rnum; i++)
14
Assuming 'i' is < 'aligned_rnum'
15
Loop condition is true. Entering loop body
1329 {
1330 // dropout the first hidden layer
1331 ccv_categorized_t* categorized = (ccv_categorized_t*)ccv_array_get(categorizeds, idx[i])((void*)(((char*)((categorizeds)->data)) + (size_t)(categorizeds
)->rsize * (size_t)(idx[i])))
;
1332 ccv_convnet_encode(convnet, &categorized->matrix, convnet->acts + convnet->count - 1, 1);
1333 ccv_dense_matrix_t* softmax = convnet->acts[convnet->count - 1];
1334 float* dloss = softmax->data.f32;
1335 _ccv_convnet_compute_softmax(softmax, &softmax, 0);
1336 assert(softmax->rows == category_count && softmax->cols == 1)((void) sizeof ((softmax->rows == category_count &&
softmax->cols == 1) ? 1 : 0), __extension__ ({ if (softmax
->rows == category_count && softmax->cols == 1)
; else __assert_fail ("softmax->rows == category_count && softmax->cols == 1"
, "ccv_convnet.c", 1336, __extension__ __PRETTY_FUNCTION__); }
))
;
16
Assuming the condition is true
17
Assuming the condition is true
18
Taking true branch
1337 // this mashes softmax and logistic regression together
1338 // also, it gives you -D[loss w.r.t. to x_i] (note the negative sign)
1339 for (j = 0; j < category_count; j++)
19
Assuming 'j' is >= 'category_count'
20
Loop condition is false. Execution continues on line 1341
1340 dloss[j] = (j == categorized->c) - dloss[j];
1341 _ccv_convnet_propagate_loss(convnet, categorized->matrix, softmax, update_params);
21
Calling '_ccv_convnet_propagate_loss'
1342 if ((i + 1) % params.mini_batch == 0)
1343 {
1344 FLUSH(CCV_CLI_INFO, " - at epoch %03d / %d => stochastic gradient descent at %d / %d", t + 1, params.max_epoch, (i + 1) / params.mini_batch, aligned_rnum / params.mini_batch)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { for
(_CCV_PRINT_LOOP = 0; _CCV_PRINT_LOOP < _CCV_PRINT_COUNT;
_CCV_PRINT_LOOP++) printf("\b"); for (_CCV_PRINT_LOOP = 0; _CCV_PRINT_LOOP
< _CCV_PRINT_COUNT; _CCV_PRINT_LOOP++) printf(" "); for (
_CCV_PRINT_LOOP = 0; _CCV_PRINT_LOOP < _CCV_PRINT_COUNT; _CCV_PRINT_LOOP
++) printf("\b"); _CCV_PRINT_COUNT = printf(" - at epoch %03d / %d => stochastic gradient descent at %d / %d"
, t + 1, params.max_epoch, (i + 1) / params.mini_batch, aligned_rnum
/ params.mini_batch); fflush(stdout); } } while (0)
;
1345 // update weights
1346 _ccv_convnet_update(convnet, params.mini_batch, momentum, update_params, params.layer_params);
1347 _ccv_convnet_update_zero(update_params);
1348 // compact the convnet to avoid any staled temporary resource
1349 ccv_convnet_compact(convnet);
1350 }
1351 }
1352 int miss = 0;
1353 for (i = 0; i < tests->rnum; i++)
1354 {
1355 FLUSH(CCV_CLI_INFO, " - at epoch %03d / %d => going through %d / %d for tests", t + 1, params.max_epoch, i + 1, tests->rnum)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { for
(_CCV_PRINT_LOOP = 0; _CCV_PRINT_LOOP < _CCV_PRINT_COUNT;
_CCV_PRINT_LOOP++) printf("\b"); for (_CCV_PRINT_LOOP = 0; _CCV_PRINT_LOOP
< _CCV_PRINT_COUNT; _CCV_PRINT_LOOP++) printf(" "); for (
_CCV_PRINT_LOOP = 0; _CCV_PRINT_LOOP < _CCV_PRINT_COUNT; _CCV_PRINT_LOOP
++) printf("\b"); _CCV_PRINT_COUNT = printf(" - at epoch %03d / %d => going through %d / %d for tests"
, t + 1, params.max_epoch, i + 1, tests->rnum); fflush(stdout
); } } while (0)
;
1356 ccv_categorized_t* test = (ccv_categorized_t*)ccv_array_get(tests, i)((void*)(((char*)((tests)->data)) + (size_t)(tests)->rsize
* (size_t)(i)))
;
1357 int c = 0;
1358 _ccv_convnet_classify(convnet, &test->matrix, &c, 1);
1359 if (c != test->c)
1360 ++miss;
1361 }
1362 FLUSH(CCV_CLI_INFO, " - at epoch %03d / %d => with miss rate %.2f%%\n", t + 1, params.max_epoch, miss * 100.0f / tests->rnum)do { if ((CCV_CLI_INFO & ccv_cli_get_output_levels())) { for
(_CCV_PRINT_LOOP = 0; _CCV_PRINT_LOOP < _CCV_PRINT_COUNT;
_CCV_PRINT_LOOP++) printf("\b"); for (_CCV_PRINT_LOOP = 0; _CCV_PRINT_LOOP
< _CCV_PRINT_COUNT; _CCV_PRINT_LOOP++) printf(" "); for (
_CCV_PRINT_LOOP = 0; _CCV_PRINT_LOOP < _CCV_PRINT_COUNT; _CCV_PRINT_LOOP
++) printf("\b"); _CCV_PRINT_COUNT = printf(" - at epoch %03d / %d => with miss rate %.2f%%\n"
, t + 1, params.max_epoch, miss * 100.0f / tests->rnum); fflush
(stdout); } } while (0)
;
1363 if (t + 1 < params.max_epoch)
1364 {
1365 // reshuffle the parts we visited and move the rest to the beginning
1366 memcpy(idx + categorizeds->rnum, idx + aligned_rnum, sizeof(int) * aligned_padding);
1367 memmove(idx + aligned_padding, idx, sizeof(int) * aligned_rnum);
1368 memcpy(idx, idx + categorizeds->rnum, sizeof(int) * aligned_padding);
1369 gsl_ran_shuffle(rng, idx + aligned_padding, aligned_rnum, sizeof(int));
1370 }
1371 }
1372 ccfreefree(idx);
1373 ccv_convnet_free(momentum);
1374 ccv_convnet_free(update_params);
1375 gsl_rng_free(rng);
1376#ifdef HAVE_CUDA1
1377 }
1378#endif
1379#else
1380 assert(0 && "ccv_convnet_supervised_train requires GSL library support")((void) sizeof ((0 && "ccv_convnet_supervised_train requires GSL library support"
) ? 1 : 0), __extension__ ({ if (0 && "ccv_convnet_supervised_train requires GSL library support"
) ; else __assert_fail ("0 && \"ccv_convnet_supervised_train requires GSL library support\""
, "ccv_convnet.c", 1380, __extension__ __PRETTY_FUNCTION__); }
))
;
1381#endif
1382}
1383
1384void ccv_convnet_compact(ccv_convnet_t* convnet)
1385{
1386#ifdef HAVE_CUDA1
1387 cwc_convnet_compact(convnet);
1388#endif
1389 int i;
1390 for (i = 0; i < convnet->count; i++)
1391 {
1392 if (convnet->acts[i])
1393 ccv_matrix_free(convnet->acts[i]);
1394 convnet->acts[i] = 0;
1395 if (convnet->denoms)
1396 {
1397 if (convnet->denoms[i])
1398 ccv_matrix_free(convnet->denoms[i]);
1399 convnet->denoms[i] = 0;
1400 }
1401 if (SIMD(convnet->layers + i)((float*)((convnet->layers + i)->reserved)))
1402 {
1403 ccfreefree(convnet->layers[i].reserved);
1404 convnet->layers[i].reserved = 0;
1405 }
1406 }
1407}
1408
1409void ccv_convnet_write(ccv_convnet_t* convnet, const char* filename, ccv_convnet_write_param_t params)
1410{
1411 sqlite3* db = 0;
1412 if (SQLITE_OK0 == sqlite3_open(filename, &db))
1413 {
1414 const char layer_create_table_qs[] =
1415 "CREATE TABLE IF NOT EXISTS layer_params "
1416 "(layer INTEGER PRIMARY KEY ASC, type INTEGER, "
1417 "input_matrix_rows INTEGER, input_matrix_cols INTEGER, input_matrix_channels INTEGER, input_matrix_partition INTEGER, input_node_count INTEGER, "
1418 "output_rows INTEGER, output_cols INTEGER, output_channels INTEGER, output_partition INTEGER, output_count INTEGER, output_strides INTEGER, output_border INTEGER, "
1419 "output_size INTEGER, output_kappa REAL, output_alpha REAL, output_beta REAL, output_relu INTEGER);"
1420 "CREATE TABLE IF NOT EXISTS convnet_params "
1421 "(convnet INTEGER PRIMARY KEY ASC, input_height INTEGER, input_width INTEGER, mean_activity BLOB);"
1422 "CREATE TABLE IF NOT EXISTS layer_data "
1423 "(layer INTEGER PRIMARY KEY ASC, weight BLOB, bias BLOB, half_precision INTEGER);";
1424 assert(SQLITE_OK == sqlite3_exec(db, layer_create_table_qs, 0, 0, 0))((void) sizeof ((0 == sqlite3_exec(db, layer_create_table_qs,
0, 0, 0)) ? 1 : 0), __extension__ ({ if (0 == sqlite3_exec(db
, layer_create_table_qs, 0, 0, 0)) ; else __assert_fail ("SQLITE_OK == sqlite3_exec(db, layer_create_table_qs, 0, 0, 0)"
, "ccv_convnet.c", 1424, __extension__ __PRETTY_FUNCTION__); }
))
;
1425 const char layer_params_insert_qs[] =
1426 "REPLACE INTO layer_params "
1427 "(layer, type, "
1428 "input_matrix_rows, input_matrix_cols, input_matrix_channels, input_matrix_partition, input_node_count, "
1429 "output_rows, output_cols, output_channels, output_partition, output_count, output_strides, output_border, "
1430 "output_size, output_kappa, output_alpha, output_beta, output_relu) VALUES "
1431 "($layer, $type, " // 1
1432 "$input_matrix_rows, $input_matrix_cols, $input_matrix_channels, $input_matrix_partition, $input_node_count, " // 6
1433 "$output_rows, $output_cols, $output_channels, $output_partition, $output_count, $output_strides, $output_border, " // 13
1434 "$output_size, $output_kappa, $output_alpha, $output_beta, $output_relu);"; // 18
1435 sqlite3_stmt* layer_params_insert_stmt = 0;
1436 assert(SQLITE_OK == sqlite3_prepare_v2(db, layer_params_insert_qs, sizeof(layer_params_insert_qs), &layer_params_insert_stmt, 0))((void) sizeof ((0 == sqlite3_prepare_v2(db, layer_params_insert_qs
, sizeof(layer_params_insert_qs), &layer_params_insert_stmt
, 0)) ? 1 : 0), __extension__ ({ if (0 == sqlite3_prepare_v2(
db, layer_params_insert_qs, sizeof(layer_params_insert_qs), &
layer_params_insert_stmt, 0)) ; else __assert_fail ("SQLITE_OK == sqlite3_prepare_v2(db, layer_params_insert_qs, sizeof(layer_params_insert_qs), &layer_params_insert_stmt, 0)"
, "ccv_convnet.c", 1436, __extension__ __PRETTY_FUNCTION__); }
))
;
1437 const char layer_data_insert_qs[] =
1438 "REPLACE INTO layer_data "
1439 "(layer, weight, bias, half_precision) VALUES ($layer, $weight, $bias, $half_precision);";
1440 sqlite3_stmt* layer_data_insert_stmt = 0;
1441 assert(SQLITE_OK == sqlite3_prepare_v2(db, layer_data_insert_qs, sizeof(layer_data_insert_qs), &layer_data_insert_stmt, 0))((void) sizeof ((0 == sqlite3_prepare_v2(db, layer_data_insert_qs
, sizeof(layer_data_insert_qs), &layer_data_insert_stmt, 0
)) ? 1 : 0), __extension__ ({ if (0 == sqlite3_prepare_v2(db,
layer_data_insert_qs, sizeof(layer_data_insert_qs), &layer_data_insert_stmt
, 0)) ; else __assert_fail ("SQLITE_OK == sqlite3_prepare_v2(db, layer_data_insert_qs, sizeof(layer_data_insert_qs), &layer_data_insert_stmt, 0)"
, "ccv_convnet.c", 1441, __extension__ __PRETTY_FUNCTION__); }
))
;
1442 int i;
1443 for (i = 0; i < convnet->count; i++)
1444 {
1445 ccv_convnet_layer_t* layer = convnet->layers + i;
1446 // insert layer params
1447 sqlite3_bind_int(layer_params_insert_stmt, 1, i);
1448 sqlite3_bind_int(layer_params_insert_stmt, 2, layer->type);
1449 sqlite3_bind_int(layer_params_insert_stmt, 3, layer->input.matrix.rows);
1450 sqlite3_bind_int(layer_params_insert_stmt, 4, layer->input.matrix.cols);
1451 sqlite3_bind_int(layer_params_insert_stmt, 5, layer->input.matrix.channels);
1452 sqlite3_bind_int(layer_params_insert_stmt, 6, layer->input.matrix.partition);
1453 sqlite3_bind_int(layer_params_insert_stmt, 7, layer->input.node.count);
1454 switch (layer->type)
1455 {
1456 case CCV_CONVNET_CONVOLUTIONAL:
1457 sqlite3_bind_int(layer_params_insert_stmt, 8, layer->net.convolutional.rows);
1458 sqlite3_bind_int(layer_params_insert_stmt, 9, layer->net.convolutional.cols);
1459 sqlite3_bind_int(layer_params_insert_stmt, 10, layer->net.convolutional.channels);
1460 sqlite3_bind_int(layer_params_insert_stmt, 11, layer->net.convolutional.partition);
1461 sqlite3_bind_int(layer_params_insert_stmt, 12, layer->net.convolutional.count);
1462 sqlite3_bind_int(layer_params_insert_stmt, 13, layer->net.convolutional.strides);
1463 sqlite3_bind_int(layer_params_insert_stmt, 14, layer->net.convolutional.border);
1464 break;
1465 case CCV_CONVNET_FULL_CONNECT:
1466 sqlite3_bind_int(layer_params_insert_stmt, 12, layer->net.full_connect.count);
1467 sqlite3_bind_int(layer_params_insert_stmt, 19, layer->net.full_connect.relu);
1468 break;
1469 case CCV_CONVNET_MAX_POOL:
1470 case CCV_CONVNET_AVERAGE_POOL:
1471 sqlite3_bind_int(layer_params_insert_stmt, 13, layer->net.pool.strides);
1472 sqlite3_bind_int(layer_params_insert_stmt, 14, layer->net.pool.border);
1473 sqlite3_bind_int(layer_params_insert_stmt, 15, layer->net.pool.size);
1474 break;
1475 case CCV_CONVNET_LOCAL_RESPONSE_NORM:
1476 sqlite3_bind_int(layer_params_insert_stmt, 15, layer->net.rnorm.size);
1477 sqlite3_bind_double(layer_params_insert_stmt, 16, layer->net.rnorm.kappa);
1478 sqlite3_bind_double(layer_params_insert_stmt, 17, layer->net.rnorm.alpha);
1479 sqlite3_bind_double(layer_params_insert_stmt, 18, layer->net.rnorm.beta);
1480 break;
1481 }
1482 assert(SQLITE_DONE == sqlite3_step(layer_params_insert_stmt))((void) sizeof ((101 == sqlite3_step(layer_params_insert_stmt
)) ? 1 : 0), __extension__ ({ if (101 == sqlite3_step(layer_params_insert_stmt
)) ; else __assert_fail ("SQLITE_DONE == sqlite3_step(layer_params_insert_stmt)"
, "ccv_convnet.c", 1482, __extension__ __PRETTY_FUNCTION__); }
))
;
1483 sqlite3_reset(layer_params_insert_stmt);
1484 sqlite3_clear_bindings(layer_params_insert_stmt);
1485 // insert layer data
1486 if (layer->type == CCV_CONVNET_CONVOLUTIONAL || layer->type == CCV_CONVNET_FULL_CONNECT)
1487 {
1488 sqlite3_bind_int(layer_data_insert_stmt, 1, i);
1489 if (params.half_precision)
1490 {
1491 uint16_t* w = (uint16_t*)ccmallocmalloc(sizeof(uint16_t) * layer->wnum);
1492 ccv_float_to_half_precision(layer->w, w, layer->wnum);
1493 uint16_t* bias = (uint16_t*)ccmallocmalloc(sizeof(uint16_t) * (layer->type == CCV_CONVNET_CONVOLUTIONAL ? layer->net.convolutional.count : layer->net.full_connect.count));
1494 ccv_float_to_half_precision(layer->bias, bias, layer->type == CCV_CONVNET_CONVOLUTIONAL ? layer->net.convolutional.count : layer->net.full_connect.count);
1495 sqlite3_bind_blob(layer_data_insert_stmt, 2, w, sizeof(uint16_t) * layer->wnum, ccfreefree);
1496 sqlite3_bind_blob(layer_data_insert_stmt, 3, bias, sizeof(uint16_t) * (layer->type == CCV_CONVNET_CONVOLUTIONAL ? layer->net.convolutional.count : layer->net.full_connect.count), ccfreefree);
1497 } else {
1498 sqlite3_bind_blob(layer_data_insert_stmt, 2, layer->w, sizeof(float) * layer->wnum, SQLITE_STATIC((sqlite3_destructor_type)0));
1499 sqlite3_bind_blob(layer_data_insert_stmt, 3, layer->bias, sizeof(float) * (layer->type == CCV_CONVNET_CONVOLUTIONAL ? layer->net.convolutional.count : layer->net.full_connect.count), SQLITE_STATIC((sqlite3_destructor_type)0));
1500 }
1501 sqlite3_bind_int(layer_data_insert_stmt, 4, params.half_precision);
1502 assert(SQLITE_DONE == sqlite3_step(layer_data_insert_stmt))((void) sizeof ((101 == sqlite3_step(layer_data_insert_stmt))
? 1 : 0), __extension__ ({ if (101 == sqlite3_step(layer_data_insert_stmt
)) ; else __assert_fail ("SQLITE_DONE == sqlite3_step(layer_data_insert_stmt)"
, "ccv_convnet.c", 1502, __extension__ __PRETTY_FUNCTION__); }
))
;
1503 sqlite3_reset(layer_data_insert_stmt);
1504 sqlite3_clear_bindings(layer_data_insert_stmt);
1505 }
1506 }
1507 // insert convnet related params
1508 const char convnet_params_insert_qs[] =
1509 "REPLACE INTO convnet_params "
1510 "(convnet, mean_activity, input_height, input_width) VALUES (0, $mean_activity, $input_height, $input_width);";
1511 sqlite3_stmt* convnet_params_insert_stmt = 0;
1512 assert(SQLITE_OK == sqlite3_prepare_v2(db, convnet_params_insert_qs, sizeof(convnet_params_insert_qs), &convnet_params_insert_stmt, 0))((void) sizeof ((0 == sqlite3_prepare_v2(db, convnet_params_insert_qs
, sizeof(convnet_params_insert_qs), &convnet_params_insert_stmt
, 0)) ? 1 : 0), __extension__ ({ if (0 == sqlite3_prepare_v2(
db, convnet_params_insert_qs, sizeof(convnet_params_insert_qs
), &convnet_params_insert_stmt, 0)) ; else __assert_fail (
"SQLITE_OK == sqlite3_prepare_v2(db, convnet_params_insert_qs, sizeof(convnet_params_insert_qs), &convnet_params_insert_stmt, 0)"
, "ccv_convnet.c", 1512, __extension__ __PRETTY_FUNCTION__); }
))
;
1513 assert(convnet->mean_activity->rows == convnet->input.height)((void) sizeof ((convnet->mean_activity->rows == convnet
->input.height) ? 1 : 0), __extension__ ({ if (convnet->
mean_activity->rows == convnet->input.height) ; else __assert_fail
("convnet->mean_activity->rows == convnet->input.height"
, "ccv_convnet.c", 1513, __extension__ __PRETTY_FUNCTION__); }
))
;
1514 assert(convnet->mean_activity->cols == convnet->input.width)((void) sizeof ((convnet->mean_activity->cols == convnet
->input.width) ? 1 : 0), __extension__ ({ if (convnet->
mean_activity->cols == convnet->input.width) ; else __assert_fail
("convnet->mean_activity->cols == convnet->input.width"
, "ccv_convnet.c", 1514, __extension__ __PRETTY_FUNCTION__); }
))
;
1515 assert(CCV_GET_CHANNEL(convnet->mean_activity->type) == convnet->channels)((void) sizeof ((((convnet->mean_activity->type) & 0xFFF
) == convnet->channels) ? 1 : 0), __extension__ ({ if (((convnet
->mean_activity->type) & 0xFFF) == convnet->channels
) ; else __assert_fail ("CCV_GET_CHANNEL(convnet->mean_activity->type) == convnet->channels"
, "ccv_convnet.c", 1515, __extension__ __PRETTY_FUNCTION__); }
))
;
1516 assert(CCV_GET_DATA_TYPE(convnet->mean_activity->type) == CCV_32F)((void) sizeof ((((convnet->mean_activity->type) & 0xFF000
) == CCV_32F) ? 1 : 0), __extension__ ({ if (((convnet->mean_activity
->type) & 0xFF000) == CCV_32F) ; else __assert_fail ("CCV_GET_DATA_TYPE(convnet->mean_activity->type) == CCV_32F"
, "ccv_convnet.c", 1516, __extension__ __PRETTY_FUNCTION__); }
))
;
1517 sqlite3_bind_blob(convnet_params_insert_stmt, 1, convnet->mean_activity->data.f32, sizeof(float) * convnet->input.height * convnet->input.width * convnet->channels, SQLITE_STATIC((sqlite3_destructor_type)0));
1518 sqlite3_bind_int(convnet_params_insert_stmt, 2, convnet->input.height);
1519 sqlite3_bind_int(convnet_params_insert_stmt, 3, convnet->input.width);
1520 assert(SQLITE_DONE == sqlite3_step(convnet_params_insert_stmt))((void) sizeof ((101 == sqlite3_step(convnet_params_insert_stmt
)) ? 1 : 0), __extension__ ({ if (101 == sqlite3_step(convnet_params_insert_stmt
)) ; else __assert_fail ("SQLITE_DONE == sqlite3_step(convnet_params_insert_stmt)"
, "ccv_convnet.c", 1520, __extension__ __PRETTY_FUNCTION__); }
))
;
1521 sqlite3_reset(convnet_params_insert_stmt);
1522 sqlite3_clear_bindings(convnet_params_insert_stmt);
1523
1524 sqlite3_finalize(layer_params_insert_stmt);
1525 sqlite3_finalize(layer_data_insert_stmt);
1526 sqlite3_finalize(convnet_params_insert_stmt);
1527 sqlite3_close(db);
1528 }
1529}
1530
1531ccv_convnet_t* ccv_convnet_read(int use_cwc_accel, const char* filename)
1532{
1533 sqlite3* db = 0;
1534 if (SQLITE_OK0 == sqlite3_open(filename, &db))
1535 {
1536 ccv_convnet_t* convnet = 0;
1537 sqlite3_stmt* layer_params_stmt = 0;
1538 // load layer params
1539 const char layer_params_qs[] =
1540 "SELECT type, " // 1
1541 "input_matrix_rows, input_matrix_cols, input_matrix_channels, input_matrix_partition, input_node_count, " // 6
1542 "output_rows, output_cols, output_channels, output_partition, output_count, output_strides, output_border, " // 13
1543 "output_size, output_kappa, output_alpha, output_beta, output_relu FROM layer_params ORDER BY layer ASC;"; // 18
1544 if (SQLITE_OK0 == sqlite3_prepare_v2(db, layer_params_qs, sizeof(layer_params_qs), &layer_params_stmt, 0))
1545 {
1546 ccv_array_t* layer_params = ccv_array_new(sizeof(ccv_convnet_layer_param_t), 3, 0);
1547 while (sqlite3_step(layer_params_stmt) == SQLITE_ROW100)
1548 {
1549 ccv_convnet_layer_param_t layer_param;
1550 layer_param.type = sqlite3_column_int(layer_params_stmt, 0);
1551 layer_param.input.matrix.rows = sqlite3_column_int(layer_params_stmt, 1);
1552 layer_param.input.matrix.cols = sqlite3_column_int(layer_params_stmt, 2);
1553 layer_param.input.matrix.channels = sqlite3_column_int(layer_params_stmt, 3);
1554 layer_param.input.matrix.partition = sqlite3_column_int(layer_params_stmt, 4);
1555 layer_param.input.node.count = sqlite3_column_int(layer_params_stmt, 5);
1556 layer_param.bias = layer_param.glorot = 0; // this is irrelevant to read convnet
1557 switch (layer_param.type)
1558 {
1559 case CCV_CONVNET_CONVOLUTIONAL:
1560 layer_param.output.convolutional.rows = sqlite3_column_int(layer_params_stmt, 6);
1561 layer_param.output.convolutional.cols = sqlite3_column_int(layer_params_stmt, 7);
1562 layer_param.output.convolutional.channels = sqlite3_column_int(layer_params_stmt, 8);
1563 layer_param.output.convolutional.partition = sqlite3_column_int(layer_params_stmt, 9);
1564 layer_param.output.convolutional.count = sqlite3_column_int(layer_params_stmt, 10);
1565 layer_param.output.convolutional.strides = sqlite3_column_int(layer_params_stmt, 11);
1566 layer_param.output.convolutional.border = sqlite3_column_int(layer_params_stmt, 12);
1567 break;
1568 case CCV_CONVNET_FULL_CONNECT:
1569 layer_param.output.full_connect.count = sqlite3_column_int(layer_params_stmt, 10);
1570 layer_param.output.full_connect.relu = sqlite3_column_int(layer_params_stmt, 17);
1571 break;
1572 case CCV_CONVNET_MAX_POOL:
1573 case CCV_CONVNET_AVERAGE_POOL:
1574 layer_param.output.pool.strides = sqlite3_column_int(layer_params_stmt, 11);
1575 layer_param.output.pool.border = sqlite3_column_int(layer_params_stmt, 12);
1576 layer_param.output.pool.size = sqlite3_column_int(layer_params_stmt, 13);
1577 break;
1578 case CCV_CONVNET_LOCAL_RESPONSE_NORM:
1579 layer_param.output.rnorm.size = sqlite3_column_int(layer_params_stmt, 13);
1580 layer_param.output.rnorm.kappa = sqlite3_column_double(layer_params_stmt, 14);
1581 layer_param.output.rnorm.alpha = sqlite3_column_double(layer_params_stmt, 15);
1582 layer_param.output.rnorm.beta = sqlite3_column_double(layer_params_stmt, 16);
1583 break;
1584 }
1585 ccv_array_push(layer_params, &layer_param);
1586 }
1587 sqlite3_finalize(layer_params_stmt);
1588 sqlite3_stmt* convnet_params_input_stmt = 0;
1589 // load convnet params for input
1590 const char convnet_params_input_qs[] =
1591 "SELECT input_height, input_width FROM convnet_params WHERE convnet = 0;";
1592 ccv_size_t input = ccv_size(0, 0);
1593 if (SQLITE_OK0 == sqlite3_prepare_v2(db, convnet_params_input_qs, sizeof(convnet_params_input_qs), &convnet_params_input_stmt, 0))
1594 {
1595 if (sqlite3_step(convnet_params_input_stmt) == SQLITE_ROW100)
1596 {
1597 input.height = sqlite3_column_int(convnet_params_input_stmt, 0);
1598 input.width = sqlite3_column_int(convnet_params_input_stmt, 1);
1599 }
1600 sqlite3_finalize(convnet_params_input_stmt);
1601 }
1602 assert(input.height != 0 && input.width != 0)((void) sizeof ((input.height != 0 && input.width != 0
) ? 1 : 0), __extension__ ({ if (input.height != 0 &&
input.width != 0) ; else __assert_fail ("input.height != 0 && input.width != 0"
, "ccv_convnet.c", 1602, __extension__ __PRETTY_FUNCTION__); }
))
;
1603 convnet = ccv_convnet_new(use_cwc_accel, input, (ccv_convnet_layer_param_t*)ccv_array_get(layer_params, 0)((void*)(((char*)((layer_params)->data)) + (size_t)(layer_params
)->rsize * (size_t)(0)))
, layer_params->rnum);
1604 ccv_array_free(layer_params);
1605 // load layer data
1606 sqlite3_stmt* layer_data_stmt = 0;
1607 const char layer_data_qs[] =
1608 "SELECT layer, weight, bias, half_precision FROM layer_data;";
1609 if (SQLITE_OK0 == sqlite3_prepare_v2(db, layer_data_qs, sizeof(layer_data_qs), &layer_data_stmt, 0))
1610 {
1611 while (sqlite3_step(layer_data_stmt) == SQLITE_ROW100)
1612 {
1613 ccv_convnet_layer_t* layer = convnet->layers + sqlite3_column_int(layer_data_stmt, 0);
1614 int half_precision = sqlite3_column_int(layer_data_stmt, 3);
1615 int wnum = sqlite3_column_bytes(layer_data_stmt, 1) / (half_precision ? sizeof(uint16_t) : sizeof(float));
1616 // if weights available, load weights
1617 if (wnum == layer->wnum)
1618 {
1619 const void* w = sqlite3_column_blob(layer_data_stmt, 1);
1620 if (half_precision)
1621 {
1622 float* f = (float*)ccmallocmalloc(sizeof(float) * layer->wnum);
1623 ccv_half_precision_to_float((uint16_t*)w, f, layer->wnum);
1624 w = f;
1625 }
1626 switch (layer->type)
1627 {
1628 case CCV_CONVNET_CONVOLUTIONAL:
1629 memcpy(layer->w, w, sizeof(float) * layer->wnum);
1630 break;
1631 case CCV_CONVNET_FULL_CONNECT:
1632 memcpy(layer->w, w, sizeof(float) * layer->wnum);
1633 break;
1634 }
1635 if (half_precision)
1636 ccfreefree((void*)w);
1637 }
1638 int bnum = sqlite3_column_bytes(layer_data_stmt, 2) / (half_precision ? sizeof(uint16_t) : sizeof(float));
1639 // if bias available, load bias
1640 if (bnum == (layer->type == CCV_CONVNET_CONVOLUTIONAL ? layer->net.convolutional.count : layer->net.full_connect.count))
1641 {
1642 const void* bias = sqlite3_column_blob(layer_data_stmt, 2);
1643 if (half_precision)
1644 {
1645 float* f = (float*)ccmallocmalloc(sizeof(float) * (layer->type == CCV_CONVNET_CONVOLUTIONAL ? layer->net.convolutional.count : layer->net.full_connect.count));
1646 ccv_half_precision_to_float((uint16_t*)bias, f, layer->type == CCV_CONVNET_CONVOLUTIONAL ? layer->net.convolutional.count : layer->net.full_connect.count);
1647 bias = f;
1648 }
1649 switch (layer->type)
1650 {
1651 case CCV_CONVNET_CONVOLUTIONAL:
1652 memcpy(layer->bias, bias, sizeof(float) * layer->net.convolutional.count);
1653 break;
1654 case CCV_CONVNET_FULL_CONNECT:
1655 memcpy(layer->bias, bias, sizeof(float) * layer->net.full_connect.count);
1656 break;
1657 }
1658 if (half_precision)
1659 ccfreefree((void*)bias);
1660 }
1661 }
1662 sqlite3_finalize(layer_data_stmt);
1663 }
1664 sqlite3_stmt* convnet_params_mean_activity_stmt = 0;
1665 // load convnet params for mean activity
1666 const char convnet_params_mean_activity_qs[] =
1667 "SELECT mean_activity FROM convnet_params WHERE convnet = 0;";
1668 if (SQLITE_OK0 == sqlite3_prepare_v2(db, convnet_params_mean_activity_qs, sizeof(convnet_params_mean_activity_qs), &convnet_params_mean_activity_stmt, 0))
1669 {
1670 if (sqlite3_step(convnet_params_mean_activity_stmt) == SQLITE_ROW100)
1671 {
1672 int elems = sqlite3_column_bytes(convnet_params_mean_activity_stmt, 0) / sizeof(float);
1673 if (elems == convnet->input.height * convnet->input.width * convnet->channels)
1674 memcpy(convnet->mean_activity->data.f32, sqlite3_column_blob(convnet_params_mean_activity_stmt, 0), sizeof(float) * elems);
1675 }
1676 sqlite3_finalize(convnet_params_mean_activity_stmt);
1677 }
1678 }
1679 sqlite3_close(db);
1680 return convnet;
1681 }
1682 return 0;
1683}
1684
1685void ccv_convnet_input_formation(ccv_size_t input, ccv_dense_matrix_t* a, ccv_dense_matrix_t** b)
1686{
1687 if (a->rows > input.height && a->cols > input.width)
1688 ccv_resample(a, b, CCV_32F, ccv_max(input.height, (int)(a->rows * (float)input.height / a->cols + 0.5))({ typeof (input.height) _a = (input.height); typeof ((int)(a
->rows * (float)input.height / a->cols + 0.5)) _b = ((int
)(a->rows * (float)input.height / a->cols + 0.5)); (_a >
_b) ? _a : _b; })
, ccv_max(input.width, (int)(a->cols * (float)input.width / a->rows + 0.5))({ typeof (input.width) _a = (input.width); typeof ((int)(a->
cols * (float)input.width / a->rows + 0.5)) _b = ((int)(a->
cols * (float)input.width / a->rows + 0.5)); (_a > _b) ?
_a : _b; })
, CCV_INTER_AREA);
1689 else if (a->rows < input.height || a->cols < input.width)
1690 ccv_resample(a, b, CCV_32F, ccv_max(input.height, (int)(a->rows * (float)input.height / a->cols + 0.5))({ typeof (input.height) _a = (input.height); typeof ((int)(a
->rows * (float)input.height / a->cols + 0.5)) _b = ((int
)(a->rows * (float)input.height / a->cols + 0.5)); (_a >
_b) ? _a : _b; })
, ccv_max(input.width, (int)(a->cols * (float)input.width / a->rows + 0.5))({ typeof (input.width) _a = (input.width); typeof ((int)(a->
cols * (float)input.width / a->rows + 0.5)) _b = ((int)(a->
cols * (float)input.width / a->rows + 0.5)); (_a > _b) ?
_a : _b; })
, CCV_INTER_CUBIC);
1691 else
1692 ccv_shift(a, (ccv_matrix_t**)b, CCV_32F, 0, 0); // converting to 32f
1693}
1694
1695void ccv_convnet_free(ccv_convnet_t* convnet)
1696{
1697 ccv_convnet_compact(convnet);
1698 int i;
1699 for (i = 0; i < convnet->count; i++)
1700 if (convnet->layers[i].w)
1701 ccfreefree(convnet->layers[i].w);
1702 if (convnet->mean_activity)
1703 ccv_matrix_free(convnet->mean_activity);
1704 ccfreefree(convnet);
1705}
1706
1707#endif