/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/swish/ccv_nnc_swish_cpu_ref.c
Line | Count | Source |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | #ifdef USE_OPENMP |
7 | | #include <omp.h> |
8 | | #endif |
9 | | #ifdef USE_DISPATCH |
10 | | #include <dispatch/dispatch.h> |
11 | | #endif |
12 | | |
13 | | static int _ccv_nnc_swish_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
14 | 5 | { |
15 | 5 | assert(input_size == 1); |
16 | 5 | const ccv_nnc_tensor_t* a = inputs[0]; |
17 | 5 | assert(CCV_IS_TENSOR_CONTIGUOUS(a)); |
18 | 5 | assert(output_size == 1); |
19 | 5 | ccv_nnc_tensor_t* b = outputs[0]; |
20 | 5 | assert(CCV_IS_TENSOR_CONTIGUOUS(b)); |
21 | 5 | const int count = ccv_nnc_tensor_count(a->info); |
22 | 5 | int i; |
23 | 14 | for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; i++9 ) |
24 | 9 | { |
25 | 9 | assert(a->info.dim[i] == b->info.dim[i]); |
26 | 9 | } |
27 | 5 | float* ap = a->data.f32; |
28 | 5 | float* bp = b->data.f32; |
29 | 2.41k | for (i = 0; i < count; i++2.41k ) |
30 | 2.41k | bp[i] = ap[i] / (1. + exp(-ap[i])); |
31 | 5 | return CCV_NNC_EXEC_SUCCESS; |
32 | 5 | } |
33 | | |
34 | | static int _ccv_nnc_swish_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
35 | 3 | { |
36 | 3 | assert(input_size == 3); |
37 | 3 | const ccv_nnc_tensor_t* g = inputs[0]; // gradient |
38 | 3 | assert(CCV_IS_TENSOR_CONTIGUOUS(g)); |
39 | 3 | const ccv_nnc_tensor_t* a = inputs[1]; |
40 | 3 | assert(CCV_IS_TENSOR_CONTIGUOUS(a)); |
41 | 3 | assert(output_size == 1); |
42 | 3 | ccv_nnc_tensor_t* h = outputs[0]; |
43 | 3 | assert(CCV_IS_TENSOR_CONTIGUOUS(h)); |
44 | 3 | const int count = ccv_nnc_tensor_count(g->info); |
45 | 3 | int i; |
46 | 8 | for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && g->info.dim[i] > 0; i++5 ) |
47 | 5 | { |
48 | 5 | assert(a->info.dim[i] == g->info.dim[i]); |
49 | 5 | assert(g->info.dim[i] == h->info.dim[i]); |
50 | 5 | } |
51 | 3 | float* ap = a->data.f32; |
52 | 3 | float* gp = g->data.f32; |
53 | 3 | float* hp = h->data.f32; |
54 | | /** |
55 | | * e^x*(x+e^x+1)/(e^x+1)^2 |
56 | | * = x*e^x/(e^x+1)^2+e^x/(e^x+1) = x*e^x/(e^x+1)^2+y |
57 | | * |
58 | | * e^x/(e^x+1)^2 = ((e^x+1)^2-e^2x-1)/(2*(e^x+1)^2) |
59 | | * = 1/2*(1-e^2x/(e^x+1)^2-1/(e^x+1)^2) |
60 | | * = 1/2*(1-y^2-1/(e^x+1)^2) |
61 | | * |
62 | | * y = e^x/(e^x+1) = 1 - 1/(e^x+1) |
63 | | * 1/(e^x+1) = 1-y |
64 | | * |
65 | | * 1/2*(1-y^2-1/(e^x+1)^2) = 1/2*(1-y^2-(1-y)^2) |
66 | | * = 1/2*(1-y^2-1+2y-y^2) |
67 | | * = y-y^2 |
68 | | * |
69 | | * x*e^x/(e^x+1)^2+y = x*(y-y^2)+y |
70 | | */ |
71 | 2.01k | for (i = 0; i < count; i++2.01k ) |
72 | 2.01k | { |
73 | 2.01k | const float x = ap[i]; |
74 | 2.01k | const float y = 1. / (1. + exp(-x)); |
75 | 2.01k | const float y2 = y * y; |
76 | 2.01k | hp[i] = gp[i] * (x * (y - y2) + y); |
77 | 2.01k | } |
78 | 3 | return CCV_NNC_EXEC_SUCCESS; |
79 | 3 | } |
80 | | |
81 | | REGISTER_COMMAND_BACKEND(CCV_NNC_SWISH_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
82 | 1 | { |
83 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
84 | 1 | registry->tensor_datatypes = CCV_32F; |
85 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
86 | 1 | registry->algorithms = 1; |
87 | 1 | registry->exec = _ccv_nnc_swish_forw; |
88 | 1 | } |
89 | | |
90 | | REGISTER_COMMAND_BACKEND(CCV_NNC_SWISH_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry) |
91 | 1 | { |
92 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN; |
93 | 1 | registry->tensor_datatypes = CCV_32F; |
94 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
95 | 1 | registry->algorithms = 1; |
96 | 1 | registry->exec = _ccv_nnc_swish_back; |
97 | 1 | } |