| File: | nnc/cmd/pad/ccv_nnc_pad_cpu_ref.c |
| Warning: | line 57, column 38 The right operand of '>=' is a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | #include "ccv.h" | |||
| 2 | #include "ccv_internal.h" | |||
| 3 | #include "nnc/ccv_nnc.h" | |||
| 4 | #include "nnc/ccv_nnc_easy.h" | |||
| 5 | #include "nnc/ccv_nnc_internal.h" | |||
| 6 | #ifdef USE_OPENMP | |||
| 7 | #include <omp.h> | |||
| 8 | #endif | |||
| 9 | #ifdef USE_DISPATCH | |||
| 10 | #include <dispatch/dispatch.h> | |||
| 11 | #endif | |||
| 12 | ||||
| 13 | static int _ccv_nnc_pad_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
| 14 | { | |||
| 15 | assert(input_size == 1)((void) sizeof ((input_size == 1) ? 1 : 0), __extension__ ({ if (input_size == 1) ; else __assert_fail ("input_size == 1", "pad/ccv_nnc_pad_cpu_ref.c" , 15, __extension__ __PRETTY_FUNCTION__); })); | |||
| ||||
| 16 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0]; | |||
| 17 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; | |||
| 18 | assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) + 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info .dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2" , "pad/ccv_nnc_pad_cpu_ref.c", 18, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 19 | assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) + 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info .dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2" , "pad/ccv_nnc_pad_cpu_ref.c", 19, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 20 | // Assuming this is float 32. | |||
| 21 | int adim[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 22 | int bdim[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 23 | ccv_nnc_tensor_view_get_dim(a, adim); | |||
| 24 | ccv_nnc_tensor_view_get_dim(b, bdim); | |||
| 25 | int astride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 26 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 27 | assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2) == 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "pad/ccv_nnc_pad_cpu_ref.c" , 27, __extension__ __PRETTY_FUNCTION__); })); // Need to change this logic for CCV_NNC_MAX_DIM == other number. | |||
| 28 | ccv_nnc_tensor_view_get_stride(a, astride); | |||
| 29 | ccv_nnc_tensor_view_get_stride(b, bstride); | |||
| 30 | int i[CCV_NNC_MAX_DIM(2) + 2]; | |||
| 31 | int x; | |||
| 32 | float* const ap = a->data.f32; | |||
| 33 | float* const bp = b->data.f32; | |||
| 34 | const int nd = ccv_nnc_tensor_nd(a->info.dim); | |||
| 35 | const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd; | |||
| 36 | assert(offset >= 0)((void) sizeof ((offset >= 0) ? 1 : 0), __extension__ ({ if (offset >= 0) ; else __assert_fail ("offset >= 0", "pad/ccv_nnc_pad_cpu_ref.c" , 36, __extension__ __PRETTY_FUNCTION__); })); | |||
| 37 | for (x = 0; x < nd; x++) // We don't support negative pad. | |||
| 38 | { assert(cmd.info.size.dim[x] >= 0 && cmd.info.pad.end[x] >= 0)((void) sizeof ((cmd.info.size.dim[x] >= 0 && cmd. info.pad.end[x] >= 0) ? 1 : 0), __extension__ ({ if (cmd.info .size.dim[x] >= 0 && cmd.info.pad.end[x] >= 0) ; else __assert_fail ("cmd.info.size.dim[x] >= 0 && cmd.info.pad.end[x] >= 0" , "pad/ccv_nnc_pad_cpu_ref.c", 38, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 39 | int begin[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 40 | for (x = 0; x
| |||
| 41 | begin[x + offset] = cmd.info.size.dim[x]; | |||
| 42 | for (x = 0; x < offset; x++) | |||
| 43 | begin[x] = 0; | |||
| 44 | // Non-optimal case, need to do skip if needed. | |||
| 45 | if (cmd.info.pad.type == CCV_NNC_PAD_ZERO) | |||
| 46 | { | |||
| 47 | for (i[0] = 0; i[0] < bdim[0]; i[0]++) | |||
| 48 | { | |||
| 49 | float* const ap0 = (i[0] >= begin[0] && i[0] < adim[0] + begin[0]) ? ap + (i[0] - begin[0]) * astride[0] : 0; | |||
| 50 | float* const bp0 = bp + i[0] * bstride[0]; | |||
| 51 | for (i[1] = 0; i[1] < bdim[1]; i[1]++) | |||
| 52 | { | |||
| 53 | float* const ap1 = (ap0 && i[1] >= begin[1] && i[1] < adim[1] + begin[1]) ? ap0 + (i[1] - begin[1]) * astride[1] : 0; | |||
| 54 | float* bp1 = bp0 + i[1] * bstride[1]; | |||
| 55 | for (i[2] = 0; i[2] < bdim[2]; i[2]++) | |||
| 56 | { | |||
| 57 | float* const ap2 = (ap1
| |||
| ||||
| 58 | for (x = 0; x < bdim[3]; x++) | |||
| 59 | bp1[x] = (ap2 && x >= begin[3] && x < adim[3] + begin[3]) ? ap2[x - begin[3]] : 0; | |||
| 60 | bp1 += bstride[2]; | |||
| 61 | } | |||
| 62 | } | |||
| 63 | } | |||
| 64 | } else { | |||
| 65 | assert(cmd.info.pad.type == CCV_NNC_PAD_REPLICATE)((void) sizeof ((cmd.info.pad.type == CCV_NNC_PAD_REPLICATE) ? 1 : 0), __extension__ ({ if (cmd.info.pad.type == CCV_NNC_PAD_REPLICATE ) ; else __assert_fail ("cmd.info.pad.type == CCV_NNC_PAD_REPLICATE" , "pad/ccv_nnc_pad_cpu_ref.c", 65, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 66 | for (i[0] = 0; i[0] < bdim[0]; i[0]++) | |||
| 67 | { | |||
| 68 | float* const ap0 = ap + ccv_min(adim[0] - 1, ccv_max(0, i[0] - begin[0]))({ typeof (adim[0] - 1) _a = (adim[0] - 1); typeof (({ typeof (0) _a = (0); typeof (i[0] - begin[0]) _b = (i[0] - begin[0] ); (_a > _b) ? _a : _b; })) _b = (({ typeof (0) _a = (0); typeof (i[0] - begin[0]) _b = (i[0] - begin[0]); (_a > _b) ? _a : _b; })); (_a < _b) ? _a : _b; }) * astride[0]; | |||
| 69 | float* const bp0 = bp + i[0] * bstride[0]; | |||
| 70 | for (i[1] = 0; i[1] < bdim[1]; i[1]++) | |||
| 71 | { | |||
| 72 | float* const ap1 = ap0 + ccv_min(adim[1] - 1, ccv_max(0, i[1] - begin[1]))({ typeof (adim[1] - 1) _a = (adim[1] - 1); typeof (({ typeof (0) _a = (0); typeof (i[1] - begin[1]) _b = (i[1] - begin[1] ); (_a > _b) ? _a : _b; })) _b = (({ typeof (0) _a = (0); typeof (i[1] - begin[1]) _b = (i[1] - begin[1]); (_a > _b) ? _a : _b; })); (_a < _b) ? _a : _b; }) * astride[1]; | |||
| 73 | float* bp1 = bp0 + i[1] * bstride[1]; | |||
| 74 | for (i[2] = 0; i[2] < bdim[2]; i[2]++) | |||
| 75 | { | |||
| 76 | float* const ap2 = ap1 + ccv_min(adim[2] - 1, ccv_max(0, i[2] - begin[2]))({ typeof (adim[2] - 1) _a = (adim[2] - 1); typeof (({ typeof (0) _a = (0); typeof (i[2] - begin[2]) _b = (i[2] - begin[2] ); (_a > _b) ? _a : _b; })) _b = (({ typeof (0) _a = (0); typeof (i[2] - begin[2]) _b = (i[2] - begin[2]); (_a > _b) ? _a : _b; })); (_a < _b) ? _a : _b; }) * astride[2]; | |||
| 77 | for (x = 0; x < bdim[3]; x++) | |||
| 78 | bp1[x] = ap2[ccv_min(adim[3] - 1, ccv_max(0, x - begin[3]))({ typeof (adim[3] - 1) _a = (adim[3] - 1); typeof (({ typeof (0) _a = (0); typeof (x - begin[3]) _b = (x - begin[3]); (_a > _b) ? _a : _b; })) _b = (({ typeof (0) _a = (0); typeof (x - begin[3]) _b = (x - begin[3]); (_a > _b) ? _a : _b; } )); (_a < _b) ? _a : _b; })]; | |||
| 79 | bp1 += bstride[2]; | |||
| 80 | } | |||
| 81 | } | |||
| 82 | } | |||
| 83 | } | |||
| 84 | return CCV_NNC_EXEC_SUCCESS; | |||
| 85 | } | |||
| 86 | ||||
| 87 | static int _ccv_nnc_pad_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) | |||
| 88 | { | |||
| 89 | assert(input_size == 1)((void) sizeof ((input_size == 1) ? 1 : 0), __extension__ ({ if (input_size == 1) ; else __assert_fail ("input_size == 1", "pad/ccv_nnc_pad_cpu_ref.c" , 89, __extension__ __PRETTY_FUNCTION__); })); | |||
| 90 | ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0]; | |||
| 91 | ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0]; | |||
| 92 | assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(a->info.dim) <= (2) + 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(a->info .dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2" , "pad/ccv_nnc_pad_cpu_ref.c", 92, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 93 | assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2)((void) sizeof ((ccv_nnc_tensor_nd(b->info.dim) <= (2) + 2) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_nd(b->info .dim) <= (2) + 2) ; else __assert_fail ("ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2" , "pad/ccv_nnc_pad_cpu_ref.c", 93, __extension__ __PRETTY_FUNCTION__ ); })); | |||
| 94 | // Assuming this is float 32. | |||
| 95 | int adim[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 96 | int bdim[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 97 | ccv_nnc_tensor_view_get_dim(a, adim); | |||
| 98 | ccv_nnc_tensor_view_get_dim(b, bdim); | |||
| 99 | int astride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 100 | int bstride[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 101 | assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2) == 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "pad/ccv_nnc_pad_cpu_ref.c" , 101, __extension__ __PRETTY_FUNCTION__); })); // Need to change this logic for CCV_NNC_MAX_DIM == other number. | |||
| 102 | ccv_nnc_tensor_view_get_stride(a, astride); | |||
| 103 | ccv_nnc_tensor_view_get_stride(b, bstride); | |||
| 104 | int i[CCV_NNC_MAX_DIM(2) + 2]; | |||
| 105 | int x; | |||
| 106 | float* const ap = a->data.f32; | |||
| 107 | float* const bp = b->data.f32; | |||
| 108 | const int nd = ccv_nnc_tensor_nd(a->info.dim); | |||
| 109 | const int offset = CCV_NNC_MAX_DIM(2) + 2 - nd; | |||
| 110 | assert(offset >= 0)((void) sizeof ((offset >= 0) ? 1 : 0), __extension__ ({ if (offset >= 0) ; else __assert_fail ("offset >= 0", "pad/ccv_nnc_pad_cpu_ref.c" , 110, __extension__ __PRETTY_FUNCTION__); })); | |||
| 111 | for (x = 0; x < nd; x++) // We don't support negative pad. | |||
| 112 | { assert(cmd.info.size.dim[x] >= 0 && cmd.info.pad.end[x] >= 0)((void) sizeof ((cmd.info.size.dim[x] >= 0 && cmd. info.pad.end[x] >= 0) ? 1 : 0), __extension__ ({ if (cmd.info .size.dim[x] >= 0 && cmd.info.pad.end[x] >= 0) ; else __assert_fail ("cmd.info.size.dim[x] >= 0 && cmd.info.pad.end[x] >= 0" , "pad/ccv_nnc_pad_cpu_ref.c", 112, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
| 113 | int begin[CCV_NNC_MAX_DIM_ALLOC(12)]; | |||
| 114 | for (x = 0; x < nd; x++) | |||
| 115 | begin[x + offset] = cmd.info.size.dim[x]; | |||
| 116 | for (x = 0; x < offset; x++) | |||
| 117 | begin[x] = 0; | |||
| 118 | // Non-optimal case, need to do skip if needed. | |||
| 119 | for (i[0] = 0; i[0] < bdim[0]; i[0]++) | |||
| 120 | { | |||
| 121 | float* const ap0 = ap + (i[0] + begin[0]) * astride[0]; | |||
| 122 | float* const bp0 = bp + i[0] * bstride[0]; | |||
| 123 | for (i[1] = 0; i[1] < bdim[1]; i[1]++) | |||
| 124 | { | |||
| 125 | float* const ap1 = ap0 + (i[1] + begin[1]) * astride[1]; | |||
| 126 | float* bp1 = bp0 + i[1] * bstride[1]; | |||
| 127 | for (i[2] = 0; i[2] < bdim[2]; i[2]++) | |||
| 128 | { | |||
| 129 | float* const ap2 = ap1 + (i[2] + begin[2]) * astride[2]; | |||
| 130 | for (x = 0; x < bdim[3]; x++) | |||
| 131 | bp1[x] = ap2[x + begin[3]]; | |||
| 132 | bp1 += bstride[2]; | |||
| 133 | } | |||
| 134 | } | |||
| 135 | } | |||
| 136 | return CCV_NNC_EXEC_SUCCESS; | |||
| 137 | } | |||
| 138 | ||||
| 139 | REGISTER_COMMAND_BACKEND(CCV_NNC_PAD_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_PAD_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | |||
| 140 | { | |||
| 141 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; | |||
| 142 | registry->tensor_datatypes = CCV_32F; | |||
| 143 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | |||
| 144 | registry->algorithms = 1; | |||
| 145 | registry->exec = _ccv_nnc_pad_forw; | |||
| 146 | } | |||
| 147 | ||||
| 148 | REGISTER_COMMAND_BACKEND(CCV_NNC_PAD_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_PAD_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry) | |||
| 149 | { | |||
| 150 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; | |||
| 151 | registry->tensor_datatypes = CCV_32F; | |||
| 152 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; | |||
| 153 | registry->algorithms = 1; | |||
| 154 | registry->exec = _ccv_nnc_pad_back; | |||
| 155 | } |