Coverage Report

Created: 2024-06-21 10:32

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/gelu/ccv_nnc_gelu_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_gelu_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
211
{
15
211
  assert(input_size == 1);
16
211
  const ccv_nnc_tensor_t* a = inputs[0];
17
211
  assert(CCV_IS_TENSOR_CONTIGUOUS(a));
18
211
  assert(output_size == 1);
19
211
  ccv_nnc_tensor_t* b = outputs[0];
20
211
  assert(CCV_IS_TENSOR_CONTIGUOUS(b));
21
211
  const int count = ccv_nnc_tensor_count(a->info);
22
211
  int i;
23
430
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; 
i++219
)
24
219
  {
25
219
    assert(a->info.dim[i] == b->info.dim[i]);
26
219
  }
27
211
  float* ap = a->data.f32;
28
211
  float* bp = b->data.f32;
29
211
  if (cmd.info.gelu.tanh)
30
2.41k
    
for (i = 0; 5
i < count;
i++2.41k
)
31
2.41k
    {
32
2.41k
      const float x = ap[i];
33
2.41k
      bp[i] = 0.5 * x * (1 + tanh(0.797884560802865355 * (x + 0.044715 * x * x * x)));
34
2.41k
    }
35
206
  else
36
4.62k
    
for (i = 0; 206
i < count;
i++4.42k
)
37
4.42k
    {
38
4.42k
      const float x = ap[i];
39
4.42k
      bp[i] = x * 0.5 * (1. + erf(x * 0.70710678118654752440));
40
4.42k
    }
41
211
  return CCV_NNC_EXEC_SUCCESS;
42
211
}
43
44
static int _ccv_nnc_gelu_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
45
106
{
46
106
  assert(input_size >= 2);
47
106
  const ccv_nnc_tensor_t* g = inputs[0]; // gradient
48
106
  assert(CCV_IS_TENSOR_CONTIGUOUS(g));
49
106
  const ccv_nnc_tensor_t* a = inputs[1];
50
106
  assert(CCV_IS_TENSOR_CONTIGUOUS(a));
51
106
  assert(output_size == 1);
52
106
  ccv_nnc_tensor_t* h = outputs[0];
53
106
  assert(CCV_IS_TENSOR_CONTIGUOUS(h));
54
106
  const int count = ccv_nnc_tensor_count(g->info);
55
106
  int i;
56
216
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && g->info.dim[i] > 0; 
i++110
)
57
110
  {
58
110
    assert(a->info.dim[i] == g->info.dim[i]);
59
110
    assert(g->info.dim[i] == h->info.dim[i]);
60
110
  }
61
106
  float* ap = a->data.f32;
62
106
  float* gp = g->data.f32;
63
106
  float* hp = h->data.f32;
64
106
  if (cmd.info.gelu.tanh)
65
3
  {
66
2.01k
    for (i = 0; i < count; 
i++2.01k
)
67
2.01k
    {
68
2.01k
      const float x = ap[i];
69
2.01k
      const float x_sq = x * x;
70
2.01k
      const float x_cube = x_sq * x;
71
2.01k
      const float inner = 0.797884560802865355 * (x + 0.044715 * x_cube);
72
2.01k
      const float tanh_inner = tanh(inner);
73
2.01k
      const float left = 0.5 * x;
74
2.01k
      const float right = 1 + tanh_inner;
75
2.01k
      const float left_derivative = 0.5 * right;
76
2.01k
      const float tanh_derivative = 1 - tanh_inner * tanh_inner;
77
2.01k
      const float inner_derivative = 0.797884560802865355 * (1 + 3 * 0.044715 * x_sq);
78
2.01k
      const float right_derivative = left * tanh_derivative * inner_derivative;
79
2.01k
      hp[i] = gp[i] * (left_derivative + right_derivative);
80
2.01k
    }
81
103
  } else {
82
3.11k
    for (i = 0; i < count; 
i++3.01k
)
83
3.01k
    {
84
3.01k
      const float x = ap[i];
85
3.01k
      const float cdf = 0.5 * (1. + erf(x * 0.70710678118654752440));
86
3.01k
      const float pdf = exp(-0.5 * x * x) * 0.797884560802865355;
87
3.01k
      hp[i] = gp[i] * (cdf + x * pdf);
88
3.01k
    }
89
103
  }
90
106
  return CCV_NNC_EXEC_SUCCESS;
91
106
}
92
93
REGISTER_COMMAND_BACKEND(CCV_NNC_GELU_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
94
1
{
95
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
96
1
  registry->tensor_datatypes = CCV_32F;
97
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
98
1
  registry->algorithms = 1;
99
1
  registry->exec = _ccv_nnc_gelu_forw;
100
1
}
101
102
REGISTER_COMMAND_BACKEND(CCV_NNC_GELU_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
103
1
{
104
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
105
1
  registry->tensor_datatypes = CCV_32F;
106
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
107
1
  registry->algorithms = 1;
108
1
  registry->exec = _ccv_nnc_gelu_back;
109
1
}