| File: | nnc/cmd/loss/ccv_nnc_mse_cpu_ref.c |
| Warning: | line 98, column 2 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv.h" | |||
| 2 | #include "ccv_internal.h" | |||
| 3 | #include "nnc/ccv_nnc.h" | |||
| 4 | #include "nnc/ccv_nnc_easy.h" | |||
| 5 | #include "nnc/ccv_nnc_internal.h" | |||
| 6 | #ifdef USE_OPENMP | |||
| 7 | #include <omp.h> | |||
| 8 | #endif | |||
| 9 | #ifdef USE_DISPATCH | |||
| 10 | #include <dispatch/dispatch.h> | |||
| 11 | #endif | |||
| 12 | ||||
| 13 | static int _ccv_nnc_mse_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
| 14 | { | |||
| 15 | assert(input_size == 2)((void) sizeof ((input_size == 2) ? 1 : 0), __extension__ ({ if (input_size == 2) ; else __assert_fail ("input_size == 2", "loss/ccv_nnc_mse_cpu_ref.c" , 15, __extension__ __PRETTY_FUNCTION__); })); | |||
| 16 | const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0]; | |||
| 17 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info.dim ) <= 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= 2" , "loss/ccv_nnc_mse_cpu_ref.c", 17, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 18 | const ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)inputs[1]; | |||
| 19 | assert(output_size == 1)((void) sizeof ((output_size == 1) ? 1 : 0), __extension__ ({ if (output_size == 1) ; else __assert_fail ("output_size == 1" , "loss/ccv_nnc_mse_cpu_ref.c", 19, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 20 | ccv_nnc_tensor_view_t* c = (ccv_nnc_tensor_view_t*)outputs[0]; | |||
| 21 | int dim[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 22 | int astride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 23 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 24 | int cstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 25 | ccv_nnc_tensor_view_get_dim(a, dim); | |||
| 26 | assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)" , "loss/ccv_nnc_mse_cpu_ref.c", 26, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 27 | ccv_nnc_tensor_view_get_stride(a, astride); | |||
| 28 | ccv_nnc_tensor_view_get_stride(b, bstride); | |||
| 29 | ccv_nnc_tensor_view_get_stride(c, cstride); | |||
| 30 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info.dim ) <= 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= 2" , "loss/ccv_nnc_mse_cpu_ref.c", 30, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 31 | const int batch_size = dim[CCV_NNC_MAX_DIM(2)]; | |||
| 32 | assert(ccv_nnc_tensor_count(c->info) == batch_size)((void) sizeof ((ccv_nnc_tensor_count(c->info) == batch_size ) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count(c->info ) == batch_size) ; else __assert_fail ("ccv_nnc_tensor_count(c->info) == batch_size" , "loss/ccv_nnc_mse_cpu_ref.c", 32, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 33 | const int count = dim[CCV_NNC_MAX_DIM(2) + 1]; | |||
| 34 | const int astep = astride[CCV_NNC_MAX_DIM(2)]; | |||
| 35 | const int bstep = bstride[CCV_NNC_MAX_DIM(2)]; | |||
| 36 | const int cstep = ccv_nnc_tensor_nd(c->info.dim) == 1 ? 1 : cstride[CCV_NNC_MAX_DIM(2)]; | |||
| 37 | if (cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN) | |||
| 38 | { | |||
| 39 | const float inv_mean = 1.0 / (float)count; | |||
| 40 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
| 41 | int j; | |||
| 42 | const float* const ap = a->data.f32 + i * astep; | |||
| 43 | const float* const bp = b->data.f32 + i * bstep; | |||
| 44 | float cp = 0; | |||
| 45 | for (j = 0; j < count; j++) | |||
| 46 | cp += (bp[j] - ap[j]) * (bp[j] - ap[j]); | |||
| 47 | c->data.f32[i * cstep] = cp * inv_mean; | |||
| 48 | } parallel_endfor} } | |||
| 49 | } else { | |||
| 50 | assert(cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM)((void) sizeof ((cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM ) ? 1 : 0), __extension__ ({ if (cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM ) ; else __assert_fail ("cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM" , "loss/ccv_nnc_mse_cpu_ref.c", 50, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 51 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
| 52 | int j; | |||
| 53 | const float* const ap = a->data.f32 + i * astep; | |||
| 54 | const float* const bp = b->data.f32 + i * bstep; | |||
| 55 | float cp = 0; | |||
| 56 | for (j = 0; j < count; j++) | |||
| 57 | cp += (bp[j] - ap[j]) * (bp[j] - ap[j]); | |||
| 58 | c->data.f32[i * cstep] = cp; | |||
| 59 | } parallel_endfor} } | |||
| 60 | } | |||
| 61 | return CCV_NNC_EXEC_SUCCESS; | |||
| 62 | } | |||
| 63 | ||||
| 64 | static int _ccv_nnc_mse_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
| 65 | { | |||
| 66 | assert(input_size >= 3)((void) sizeof ((input_size >= 3) ? 1 : 0), __extension__ ( { if (input_size >= 3) ; else __assert_fail ("input_size >= 3" , "loss/ccv_nnc_mse_cpu_ref.c", 66, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| ||||
| 67 | assert(output_size >= 1)((void) sizeof ((output_size >= 1) ? 1 : 0), __extension__ ({ if (output_size >= 1) ; else __assert_fail ("output_size >= 1" , "loss/ccv_nnc_mse_cpu_ref.c", 67, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 68 | const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0]; | |||
| 69 | assert(!g || !CCV_IS_TENSOR_VIEW(g))((void) sizeof ((!g || !((*(int*)(g)) & CCV_TENSOR_VIEW)) ? 1 : 0), __extension__ ({ if (!g || !((*(int*)(g)) & CCV_TENSOR_VIEW )) ; else __assert_fail ("!g || !CCV_IS_TENSOR_VIEW(g)", "loss/ccv_nnc_mse_cpu_ref.c" , 69, __extension__ __PRETTY_FUNCTION__); })); | |||
| 70 | const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1]; | |||
| 71 | const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)inputs[2]; | |||
| 72 | ccv_nnc_tensor_view_t* const ha = (ccv_nnc_tensor_view_t*)outputs[0]; | |||
| 73 | ccv_nnc_tensor_view_t* const hb = output_size >= 2 ? (ccv_nnc_tensor_view_t*)outputs[1] : 0; | |||
| 74 | int dim[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 75 | int astride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 76 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 77 | int hastride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 78 | int hbstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 79 | ccv_nnc_tensor_view_get_dim(a, dim); | |||
| 80 | assert(ccv_nnc_tensor_view_check_dim(b, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(b, dim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(b, dim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(b, dim)" , "loss/ccv_nnc_mse_cpu_ref.c", 80, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 81 | if (ha) | |||
| 82 | { assert(ccv_nnc_tensor_view_check_dim(ha, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(ha, dim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(ha, dim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(ha, dim)" , "loss/ccv_nnc_mse_cpu_ref.c", 82, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 83 | if (hb
| |||
| 84 | { assert(ccv_nnc_tensor_view_check_dim(hb, dim))((void) sizeof ((ccv_nnc_tensor_view_check_dim(hb, dim)) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_view_check_dim(hb, dim )) ; else __assert_fail ("ccv_nnc_tensor_view_check_dim(hb, dim)" , "loss/ccv_nnc_mse_cpu_ref.c", 84, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 85 | ccv_nnc_tensor_view_get_stride(a, astride); | |||
| 86 | ccv_nnc_tensor_view_get_stride(b, bstride); | |||
| 87 | if (ha
| |||
| 88 | ccv_nnc_tensor_view_get_stride(ha, hastride); | |||
| 89 | if (hb
| |||
| 90 | ccv_nnc_tensor_view_get_stride(hb, hbstride); | |||
| 91 | assert(ccv_nnc_tensor_nd(a->info.dim) <= 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info.dim ) <= 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= 2" , "loss/ccv_nnc_mse_cpu_ref.c", 91, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 92 | const int batch_size = dim[CCV_NNC_MAX_DIM(2)]; | |||
| 93 | const int count = dim[CCV_NNC_MAX_DIM(2) + 1]; | |||
| 94 | const float inv_mean_2 = cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN ? 2.0 / (float)count : 2.0; | |||
| 95 | assert(cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN || cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM)((void) sizeof ((cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN || cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM) ? 1 : 0 ), __extension__ ({ if (cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN || cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM) ; else __assert_fail ("cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_MEAN || cmd.info.mse.reduce_op == CCV_NNC_MSE_REDUCE_SUM" , "loss/ccv_nnc_mse_cpu_ref.c", 95, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 96 | const int astep = astride[CCV_NNC_MAX_DIM(2)]; | |||
| 97 | const int bstep = bstride[CCV_NNC_MAX_DIM(2)]; | |||
| 98 | const int hastep = hastride[CCV_NNC_MAX_DIM(2)]; | |||
| ||||
| 99 | const int hbstep = hbstride[CCV_NNC_MAX_DIM(2)]; | |||
| 100 | if (g) | |||
| 101 | { | |||
| 102 | int gstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 103 | ccv_nnc_tensor_view_get_stride(g, gstride); | |||
| 104 | assert(ccv_nnc_tensor_count(g->info) == batch_size)((void) sizeof ((ccv_nnc_tensor_count(g->info) == batch_size ) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count(g->info ) == batch_size) ; else __assert_fail ("ccv_nnc_tensor_count(g->info) == batch_size" , "loss/ccv_nnc_mse_cpu_ref.c", 104, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 105 | const int gstep = ccv_nnc_tensor_nd(g->info.dim) == 1 ? 1 : gstride[CCV_NNC_MAX_DIM(2)]; | |||
| 106 | if (ha) | |||
| 107 | { | |||
| 108 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
| 109 | int j; | |||
| 110 | const float* const ap = a->data.f32 + i * astep; | |||
| 111 | const float* const bp = b->data.f32 + i * bstep; | |||
| 112 | float* const hp = ha->data.f32 + i * hastep; | |||
| 113 | const float gp = inv_mean_2 * g->data.f32[i * gstep]; | |||
| 114 | for (j = 0; j < count; j++) | |||
| 115 | hp[j] = gp * (ap[j] - bp[j]); | |||
| 116 | } parallel_endfor} } | |||
| 117 | } | |||
| 118 | if (hb) | |||
| 119 | { | |||
| 120 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
| 121 | int j; | |||
| 122 | const float* const ap = a->data.f32 + i * astep; | |||
| 123 | const float* const bp = b->data.f32 + i * bstep; | |||
| 124 | float* const hp = hb->data.f32 + i * hbstep; | |||
| 125 | const float gp = inv_mean_2 * g->data.f32[i * gstep]; | |||
| 126 | for (j = 0; j < count; j++) | |||
| 127 | hp[j] = gp * (bp[j] - ap[j]); | |||
| 128 | } parallel_endfor} } | |||
| 129 | } | |||
| 130 | } else { | |||
| 131 | if (ha) | |||
| 132 | { | |||
| 133 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
| 134 | int j; | |||
| 135 | const float* const ap = a->data.f32 + i * astep; | |||
| 136 | const float* const bp = b->data.f32 + i * bstep; | |||
| 137 | float* const hp = ha->data.f32 + i * hastep; | |||
| 138 | for (j = 0; j < count; j++) | |||
| 139 | hp[j] = inv_mean_2 * (ap[j] - bp[j]); | |||
| 140 | } parallel_endfor} } | |||
| 141 | } | |||
| 142 | if (hb) | |||
| 143 | { | |||
| 144 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
| 145 | int j; | |||
| 146 | const float* const ap = a->data.f32 + i * astep; | |||
| 147 | const float* const bp = b->data.f32 + i * bstep; | |||
| 148 | float* const hp = hb->data.f32 + i * hbstep; | |||
| 149 | for (j = 0; j < count; j++) | |||
| 150 | hp[j] = inv_mean_2 * (bp[j] - ap[j]); | |||
| 151 | } parallel_endfor} } | |||
| 152 | } | |||
| 153 | } | |||
| 154 | return CCV_NNC_EXEC_SUCCESS; | |||
| 155 | } | |||
| 156 | ||||
| 157 | REGISTER_COMMAND_BACKEND(CCV_NNC_MSE_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_MSE_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | |||
| 158 | { | |||
| 159 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; | |||
| 160 | registry->tensor_datatypes = CCV_32F; | |||
| 161 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | |||
| 162 | registry->algorithms = 1; | |||
| 163 | registry->exec = _ccv_nnc_mse_forw; | |||
| 164 | } | |||
| 165 | ||||
| 166 | REGISTER_COMMAND_BACKEND(CCV_NNC_MSE_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_MSE_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | |||
| 167 | { | |||
| 168 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; | |||
| 169 | registry->tensor_datatypes = CCV_32F; | |||
| 170 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | |||
| 171 | registry->algorithms = 1; | |||
| 172 | registry->exec = _ccv_nnc_mse_back; | |||
| 173 | } |