File: | nnc/cmd/lamb/ccv_nnc_lamb_cpu_ref.c |
Warning: | line 68, column 9 Value stored to 'bp' during its initialization is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv.h" |
2 | #include "ccv_internal.h" |
3 | #include "nnc/ccv_nnc.h" |
4 | #include "nnc/ccv_nnc_easy.h" |
5 | #include "nnc/ccv_nnc_internal.h" |
6 | #ifdef USE_OPENMP |
7 | #include <omp.h> |
8 | #endif |
9 | #ifdef USE_DISPATCH1 |
10 | #include <dispatch/dispatch.h> |
11 | #endif |
12 | |
13 | // Shared methods. |
14 | #include "../_ccv_nnc_cpu_ref.h" |
15 | |
16 | static int _ccv_nnc_lamb_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
17 | { |
18 | assert(input_size == 4)((void) sizeof ((input_size == 4) ? 1 : 0), __extension__ ({ if (input_size == 4) ; else __assert_fail ("input_size == 4", "lamb/ccv_nnc_lamb_cpu_ref.c" , 18, __extension__ __PRETTY_FUNCTION__); })); |
19 | assert(output_size == 3)((void) sizeof ((output_size == 3) ? 1 : 0), __extension__ ({ if (output_size == 3) ; else __assert_fail ("output_size == 3" , "lamb/ccv_nnc_lamb_cpu_ref.c", 19, __extension__ __PRETTY_FUNCTION__ ); })); |
20 | ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0]; |
21 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1]; |
22 | ccv_nnc_tensor_view_t* const m = (ccv_nnc_tensor_view_t*)inputs[2]; |
23 | ccv_nnc_tensor_view_t* const v = (ccv_nnc_tensor_view_t*)inputs[3]; |
24 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; |
25 | ccv_nnc_tensor_view_t* const n = (ccv_nnc_tensor_view_t*)outputs[1]; |
26 | ccv_nnc_tensor_view_t* const u = (ccv_nnc_tensor_view_t*)outputs[2]; |
27 | assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) + 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info .dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2" , "lamb/ccv_nnc_lamb_cpu_ref.c", 27, __extension__ __PRETTY_FUNCTION__ ); })); |
28 | assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) + 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info .dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2" , "lamb/ccv_nnc_lamb_cpu_ref.c", 28, __extension__ __PRETTY_FUNCTION__ ); })); |
29 | // Assuming this is float 32. |
30 | int adim[CCV_NNC_MAX_DIM_ALLOC(12)]; |
31 | ccv_nnc_tensor_view_get_dim(a, adim); |
32 | assert(ccv_nnc_tensor_view_check_dim(g, adim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(g, adim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(g, adim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(g, adim)" , "lamb/ccv_nnc_lamb_cpu_ref.c", 32, __extension__ __PRETTY_FUNCTION__ ); })); |
33 | assert(ccv_nnc_tensor_view_check_dim(m, adim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(m, adim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(m, adim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(m, adim)" , "lamb/ccv_nnc_lamb_cpu_ref.c", 33, __extension__ __PRETTY_FUNCTION__ ); })); |
34 | assert(ccv_nnc_tensor_view_check_dim(v, adim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(v, adim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(v, adim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(v, adim)" , "lamb/ccv_nnc_lamb_cpu_ref.c", 34, __extension__ __PRETTY_FUNCTION__ ); })); |
35 | assert(ccv_nnc_tensor_view_check_dim(b, adim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, adim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, adim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, adim)" , "lamb/ccv_nnc_lamb_cpu_ref.c", 35, __extension__ __PRETTY_FUNCTION__ ); })); |
36 | assert(ccv_nnc_tensor_view_check_dim(n, adim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(n, adim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(n, adim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(n, adim)" , "lamb/ccv_nnc_lamb_cpu_ref.c", 36, __extension__ __PRETTY_FUNCTION__ ); })); |
37 | assert(ccv_nnc_tensor_view_check_dim(u, adim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(u, adim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(u, adim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(u, adim)" , "lamb/ccv_nnc_lamb_cpu_ref.c", 37, __extension__ __PRETTY_FUNCTION__ ); })); |
38 | assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2) == 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "lamb/ccv_nnc_lamb_cpu_ref.c" , 38, __extension__ __PRETTY_FUNCTION__); })); // Need to change this logic for CCV_NNC_MAX_DIM == other number. |
39 | int ginc[CCV_NNC_MAX_DIM_ALLOC(12)]; |
40 | int ainc[CCV_NNC_MAX_DIM_ALLOC(12)]; |
41 | int minc[CCV_NNC_MAX_DIM_ALLOC(12)]; |
42 | int vinc[CCV_NNC_MAX_DIM_ALLOC(12)]; |
43 | int binc[CCV_NNC_MAX_DIM_ALLOC(12)]; |
44 | int ninc[CCV_NNC_MAX_DIM_ALLOC(12)]; |
45 | int uinc[CCV_NNC_MAX_DIM_ALLOC(12)]; |
46 | ccv_nnc_tensor_view_get_inc(g, ginc); |
47 | ccv_nnc_tensor_view_get_inc(a, ainc); |
48 | ccv_nnc_tensor_view_get_inc(m, minc); |
49 | ccv_nnc_tensor_view_get_inc(v, vinc); |
50 | ccv_nnc_tensor_view_get_inc(b, binc); |
51 | ccv_nnc_tensor_view_get_inc(n, ninc); |
52 | ccv_nnc_tensor_view_get_inc(u, uinc); |
53 | const int step = cmd.info.lamb.step; |
54 | const float rate = cmd.info.lamb.rate; |
55 | const float beta1 = cmd.info.lamb.beta1; |
56 | const float beta2 = cmd.info.lamb.beta2; |
57 | const float decay = cmd.info.lamb.decay; |
58 | const float epsilon = cmd.info.lamb.epsilon; |
59 | assert(step >= 1)((void) sizeof ((step >= 1) ? 1 : 0), __extension__ ({ if ( step >= 1) ; else __assert_fail ("step >= 1", "lamb/ccv_nnc_lamb_cpu_ref.c" , 59, __extension__ __PRETTY_FUNCTION__); })); |
60 | const float inv_bias_correction1 = 1. / (1 - powf(beta1, step)); |
61 | const float inv_bias_correction2 = 1. / (1 - powf(beta2, step)); |
62 | int i[CCV_NNC_MAX_DIM(2) + 1]; |
63 | int x; |
64 | float* gp = g->data.f32; |
65 | float* ap = a->data.f32; |
66 | float* mp = m->data.f32; |
67 | float* vp = v->data.f32; |
68 | float* bp = b->data.f32; |
Value stored to 'bp' during its initialization is never read | |
69 | float* np = n->data.f32; |
70 | float* up = u->data.f32; |
71 | float* const update = (float*)ccv_nnc_stream_context_get_workspace(stream_context, sizeof(float) * adim[0] * adim[1] * adim[2] * adim[3], CCV_TENSOR_CPU_MEMORY); |
72 | float* updatep = update; |
73 | double update_norm = 0; |
74 | double w_norm = 0; |
75 | for (i[0] = 0; i[0] < adim[0]; i[0]++) |
76 | { |
77 | for (i[1] = 0; i[1] < adim[1]; i[1]++) |
78 | { |
79 | for (i[2] = 0; i[2] < adim[2]; i[2]++) |
80 | { |
81 | for (x = 0; x < adim[3]; x++) |
82 | { |
83 | const float grad = gp[x]; |
84 | const float w = ap[x]; |
85 | const float mom = np[x] = beta1 * mp[x] + (1 - beta1) * grad; |
86 | const float vel = up[x] = beta2 * vp[x] + (1 - beta2) * grad * grad; |
87 | const float update = updatep[x] = (mom * inv_bias_correction1) / (sqrtf(vel * inv_bias_correction2) + epsilon) + w * decay; |
88 | w_norm += w * w; |
89 | update_norm += update * update; |
90 | } |
91 | gp += ginc[3]; |
92 | ap += ainc[3]; |
93 | mp += minc[3]; |
94 | vp += vinc[3]; |
95 | np += ninc[3]; |
96 | up += uinc[3]; |
97 | updatep += adim[3]; |
98 | } |
99 | gp += (ginc[2] - adim[2]) * ginc[3]; |
100 | ap += (ainc[2] - adim[2]) * ainc[3]; |
101 | mp += (minc[2] - adim[2]) * minc[3]; |
102 | vp += (vinc[2] - adim[2]) * vinc[3]; |
103 | np += (ninc[2] - adim[2]) * ninc[3]; |
104 | up += (uinc[2] - adim[2]) * uinc[3]; |
105 | } |
106 | gp += (ginc[1] - adim[1]) * ginc[2] * ginc[3]; |
107 | ap += (ainc[1] - adim[1]) * ainc[2] * ainc[3]; |
108 | mp += (minc[1] - adim[1]) * minc[2] * minc[3]; |
109 | vp += (vinc[1] - adim[1]) * vinc[2] * vinc[3]; |
110 | np += (ninc[1] - adim[1]) * ninc[2] * ninc[3]; |
111 | up += (uinc[1] - adim[1]) * uinc[2] * uinc[3]; |
112 | } |
113 | w_norm = sqrt(w_norm); |
114 | update_norm = sqrt(update_norm); |
115 | const float trust_ratio = w_norm > 0 && update_norm > 0 ? w_norm / update_norm : 1.; |
116 | const float rate_trust_ratio = rate * trust_ratio; |
117 | ap = a->data.f32; |
118 | bp = b->data.f32; |
119 | updatep = update; |
120 | for (i[0] = 0; i[0] < adim[0]; i[0]++) |
121 | { |
122 | for (i[1] = 0; i[1] < adim[1]; i[1]++) |
123 | { |
124 | for (i[2] = 0; i[2] < adim[2]; i[2]++) |
125 | { |
126 | for (x = 0; x < adim[3]; x++) |
127 | bp[x] = ap[x] - rate_trust_ratio * updatep[x]; |
128 | ap += ainc[3]; |
129 | bp += binc[3]; |
130 | updatep += adim[3]; |
131 | } |
132 | ap += (ainc[2] - adim[2]) * ainc[3]; |
133 | bp += (binc[2] - adim[2]) * binc[3]; |
134 | } |
135 | ap += (ainc[1] - adim[1]) * ainc[2] * ainc[3]; |
136 | bp += (binc[1] - adim[1]) * binc[2] * binc[3]; |
137 | } |
138 | return CCV_NNC_EXEC_SUCCESS; |
139 | } |
140 | |
141 | static int _ccv_nnc_lamb_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
142 | { |
143 | return CCV_NNC_EXEC_INVALID; |
144 | } |
145 | |
146 | REGISTER_COMMAND_BACKEND(CCV_NNC_LAMB_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_LAMB_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) |
147 | { |
148 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
149 | registry->tensor_datatypes = CCV_32F; |
150 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
151 | registry->algorithms = 1; |
152 | registry->exec = _ccv_nnc_lamb_forw; |
153 | } |
154 | |
155 | REGISTER_COMMAND_BACKEND(CCV_NNC_LAMB_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_LAMB_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) |
156 | { |
157 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
158 | registry->tensor_datatypes = CCV_32F; |
159 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
160 | registry->algorithms = 1; |
161 | registry->exec = _ccv_nnc_lamb_back; |
162 | } |