File: | nnc/cmd/loss/ccv_nnc_mse_cpu_ref.c |
Warning: | line 99, column 2 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv.h" | |||
2 | #include "ccv_internal.h" | |||
3 | #include "nnc/ccv_nnc.h" | |||
4 | #include "nnc/ccv_nnc_easy.h" | |||
5 | #include "nnc/ccv_nnc_internal.h" | |||
6 | #ifdef USE_OPENMP | |||
7 | #include <omp.h> | |||
8 | #endif | |||
9 | #ifdef USE_DISPATCH | |||
10 | #include <dispatch/dispatch.h> | |||
11 | #endif | |||
12 | ||||
13 | static int _ccv_nnc_mse_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
14 | { | |||
15 | assert(input_size == 2)((void) sizeof ((input_size == 2) ? 1 : 0), __extension__ ({ if (input_size == 2) ; else __assert_fail ("input_size == 2", "loss/ccv_nnc_mse_cpu_ref.c" , 15, __extension__ __PRETTY_FUNCTION__); })); | |||
16 | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0]; | |||
17 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info.dim ) <= 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= 2" , "loss/ccv_nnc_mse_cpu_ref.c", 17, __extension__ __PRETTY_FUNCTION__ ); })); | |||
18 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1]; | |||
19 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "loss/ccv_nnc_mse_cpu_ref.c", 19, __extension__ __PRETTY_FUNCTION__ ); })); | |||
20 | ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0]; | |||
21 | int dim[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
22 | int astride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
23 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
24 | int cstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
25 | ccv_nnc_tensor_view_get_dim(a, dim); | |||
26 | assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)" , "loss/ccv_nnc_mse_cpu_ref.c", 26, __extension__ __PRETTY_FUNCTION__ ); })); | |||
27 | ccv_nnc_tensor_view_get_stride(a, astride); | |||
28 | ccv_nnc_tensor_view_get_stride(b, bstride); | |||
29 | ccv_nnc_tensor_view_get_stride(c, cstride); | |||
30 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info.dim ) <= 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= 2" , "loss/ccv_nnc_mse_cpu_ref.c", 30, __extension__ __PRETTY_FUNCTION__ ); })); | |||
31 | const int batch_size = dim[CCV_NNC_MAX_DIM(2)]; | |||
32 | assert(ccv_nnc_tensor_count(c->info) == batch_size)((void) sizeof ((ccv_nnc_tensor_count(c->info) == batch_size ) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count(c->info ) == batch_size) ; else __assert_fail ("ccv_nnc_tensor_count(c->info) == batch_size" , "loss/ccv_nnc_mse_cpu_ref.c", 32, __extension__ __PRETTY_FUNCTION__ ); })); | |||
33 | const int count = dim[CCV_NNC_MAX_DIM(2) + 1]; | |||
34 | const int astep = astride[CCV_NNC_MAX_DIM(2)]; | |||
35 | const int bstep = bstride[CCV_NNC_MAX_DIM(2)]; | |||
36 | const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 1 : cstride[CCV_NNC_MAX_DIM(2)]; | |||
37 | if (cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN) | |||
38 | { | |||
39 | const float inv_mean = 1.0 / (float)count; | |||
40 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
41 | int j; | |||
42 | const float* const ap = a->data.f32 + i * astep; | |||
43 | const float* const bp = b->data.f32 + i * bstep; | |||
44 | float cp = 0; | |||
45 | for (j = 0; j < count; j++) | |||
46 | cp += (bp[j] - ap[j]) * (bp[j] - ap[j]); | |||
47 | c->data.f32[i * cstep] = cp * inv_mean; | |||
48 | } parallel_endfor} } | |||
49 | } else { | |||
50 | assert(cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM)((void) sizeof ((cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM ) ? 1 : 0), __extension__ ({ if (cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM ) ; else __assert_fail ("cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM" , "loss/ccv_nnc_mse_cpu_ref.c", 50, __extension__ __PRETTY_FUNCTION__ ); })); | |||
51 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
52 | int j; | |||
53 | const float* const ap = a->data.f32 + i * astep; | |||
54 | const float* const bp = b->data.f32 + i * bstep; | |||
55 | float cp = 0; | |||
56 | for (j = 0; j < count; j++) | |||
57 | cp += (bp[j] - ap[j]) * (bp[j] - ap[j]); | |||
58 | c->data.f32[i * cstep] = cp; | |||
59 | } parallel_endfor} } | |||
60 | } | |||
61 | return CCV_NNC_EXEC_SUCCESS; | |||
62 | } | |||
63 | ||||
64 | static int _ccv_nnc_mse_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
65 | { | |||
66 | assert(input_size >= 3)((void) sizeof ((input_size >= 3) ? 1 : 0), __extension__ ( { if (input_size >= 3) ; else __assert_fail ("input_size >= 3" , "loss/ccv_nnc_mse_cpu_ref.c", 66, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| ||||
67 | assert(output_size >= 1)((void) sizeof ((output_size >= 1) ? 1 : 0), __extension__ ({ if (output_size >= 1) ; else __assert_fail ("output_size >= 1" , "loss/ccv_nnc_mse_cpu_ref.c", 67, __extension__ __PRETTY_FUNCTION__ ); })); | |||
68 | const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0]; | |||
69 | assert(!g || !CCV_IS_TENSOR_VIEW(g))((void) sizeof ((!g || !((*(int*)(g)) & CCV_TENSOR_VIEW)) ? 1 : 0), __extension__ ({ if (!g || !((*(int*)(g)) & CCV_TENSOR_VIEW )) ; else __assert_fail ("!g || !CCV_IS_TENSOR_VIEW(g)", "loss/ccv_nnc_mse_cpu_ref.c" , 69, __extension__ __PRETTY_FUNCTION__); })); | |||
70 | const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1]; | |||
71 | const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2]; | |||
72 | ccv_nnc_tensor_view_t* const ha = (ccv_nnc_tensor_view_t*)outputs[0]; | |||
73 | ccv_nnc_tensor_view_t* const hb = output_size >= 2 ? (ccv_nnc_tensor_view_t*)outputs[1] : 0; | |||
74 | int dim[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
75 | int astride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
76 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
77 | int hastride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
78 | int hbstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
79 | ccv_nnc_tensor_view_get_dim(a, dim); | |||
80 | assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)" , "loss/ccv_nnc_mse_cpu_ref.c", 80, __extension__ __PRETTY_FUNCTION__ ); })); | |||
81 | if (ha) | |||
82 | { assert(ccv_nnc_tensor_view_check_dim(ha, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(ha, dim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(ha, dim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(ha, dim)" , "loss/ccv_nnc_mse_cpu_ref.c", 82, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
83 | if (hb
| |||
84 | { assert(ccv_nnc_tensor_view_check_dim(hb, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(hb, dim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(hb, dim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(hb, dim)" , "loss/ccv_nnc_mse_cpu_ref.c", 84, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
85 | ccv_nnc_tensor_view_get_stride(a, astride); | |||
86 | ccv_nnc_tensor_view_get_stride(b, bstride); | |||
87 | if (ha
| |||
88 | ccv_nnc_tensor_view_get_stride(ha, hastride); | |||
89 | if (hb
| |||
90 | ccv_nnc_tensor_view_get_stride(hb, hbstride); | |||
91 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info.dim ) <= 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= 2" , "loss/ccv_nnc_mse_cpu_ref.c", 91, __extension__ __PRETTY_FUNCTION__ ); })); | |||
92 | const int batch_size = dim[CCV_NNC_MAX_DIM(2)]; | |||
93 | const int count = dim[CCV_NNC_MAX_DIM(2) + 1]; | |||
94 | const float inv_mean_2 = cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN ? 2.0 / (float)count : 2.0; | |||
95 | assert(cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN || cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM)((void) sizeof ((cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN || cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM) ? 1 : 0 ), __extension__ ({ if (cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN || cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM) ; else __assert_fail ("cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN || cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM" , "loss/ccv_nnc_mse_cpu_ref.c", 95, __extension__ __PRETTY_FUNCTION__ ); })); | |||
96 | const int astep = astride[CCV_NNC_MAX_DIM(2)]; | |||
97 | const int bstep = bstride[CCV_NNC_MAX_DIM(2)]; | |||
98 | const int hastep = hastride[CCV_NNC_MAX_DIM(2)]; | |||
99 | const int hbstep = hbstride[CCV_NNC_MAX_DIM(2)]; | |||
| ||||
100 | if (g) | |||
101 | { | |||
102 | int gstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
103 | ccv_nnc_tensor_view_get_stride(g, gstride); | |||
104 | assert(ccv_nnc_tensor_count(g->info) == batch_size)((void) sizeof ((ccv_nnc_tensor_count(g->info) == batch_size ) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count(g->info ) == batch_size) ; else __assert_fail ("ccv_nnc_tensor_count(g->info) == batch_size" , "loss/ccv_nnc_mse_cpu_ref.c", 104, __extension__ __PRETTY_FUNCTION__ ); })); | |||
105 | const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 1 : gstride[CCV_NNC_MAX_DIM(2)]; | |||
106 | if (ha) | |||
107 | { | |||
108 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
109 | int j; | |||
110 | const float* const ap = a->data.f32 + i * astep; | |||
111 | const float* const bp = b->data.f32 + i * bstep; | |||
112 | float* const hp = ha->data.f32 + i * hastep; | |||
113 | const float gp = inv_mean_2 * g->data.f32[i * gstep]; | |||
114 | for (j = 0; j < count; j++) | |||
115 | hp[j] = gp * (ap[j] - bp[j]); | |||
116 | } parallel_endfor} } | |||
117 | } | |||
118 | if (hb) | |||
119 | { | |||
120 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
121 | int j; | |||
122 | const float* const ap = a->data.f32 + i * astep; | |||
123 | const float* const bp = b->data.f32 + i * bstep; | |||
124 | float* const hp = hb->data.f32 + i * hbstep; | |||
125 | const float gp = inv_mean_2 * g->data.f32[i * gstep]; | |||
126 | for (j = 0; j < count; j++) | |||
127 | hp[j] = gp * (bp[j] - ap[j]); | |||
128 | } parallel_endfor} } | |||
129 | } | |||
130 | } else { | |||
131 | if (ha) | |||
132 | { | |||
133 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
134 | int j; | |||
135 | const float* const ap = a->data.f32 + i * astep; | |||
136 | const float* const bp = b->data.f32 + i * bstep; | |||
137 | float* const hp = ha->data.f32 + i * hastep; | |||
138 | for (j = 0; j < count; j++) | |||
139 | hp[j] = inv_mean_2 * (ap[j] - bp[j]); | |||
140 | } parallel_endfor} } | |||
141 | } | |||
142 | if (hb) | |||
143 | { | |||
144 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
145 | int j; | |||
146 | const float* const ap = a->data.f32 + i * astep; | |||
147 | const float* const bp = b->data.f32 + i * bstep; | |||
148 | float* const hp = hb->data.f32 + i * hbstep; | |||
149 | for (j = 0; j < count; j++) | |||
150 | hp[j] = inv_mean_2 * (bp[j] - ap[j]); | |||
151 | } parallel_endfor} } | |||
152 | } | |||
153 | } | |||
154 | return CCV_NNC_EXEC_SUCCESS; | |||
155 | } | |||
156 | ||||
157 | REGISTER_COMMAND_BACKEND(CCV_NNC_MSE_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_MSE_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | |||
158 | { | |||
159 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; | |||
160 | registry->tensor_datatypes = CCV_32F; | |||
161 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | |||
162 | registry->algorithms = 1; | |||
163 | registry->exec = _ccv_nnc_mse_forw; | |||
164 | } | |||
165 | ||||
166 | REGISTER_COMMAND_BACKEND(CCV_NNC_MSE_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_MSE_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | |||
167 | { | |||
168 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; | |||
169 | registry->tensor_datatypes = CCV_32F; | |||
170 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | |||
171 | registry->algorithms = 1; | |||
172 | registry->exec = _ccv_nnc_mse_back; | |||
173 | } |