Coverage Report

Created: 2024-08-18 16:21

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/sgd/ccv_nnc_sgd_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
// Shared methods.
14
#include "../_ccv_nnc_cpu_ref.h"
15
16
static int _ccv_nnc_sgd_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
17
18.3k
{
18
18.3k
  assert(input_size == 3);
19
18.3k
  assert(output_size == 2);
20
18.3k
  ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
21
18.3k
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
22
18.3k
  ccv_nnc_tensor_view_t* const m = (ccv_nnc_tensor_view_t*)inputs[2];
23
18.3k
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
24
18.3k
  ccv_nnc_tensor_view_t* const n = (ccv_nnc_tensor_view_t*)outputs[1];
25
18.3k
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
26
18.3k
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
27
  // Assuming this is float 32.
28
18.3k
  int adim[CCV_NNC_MAX_DIM_ALLOC];
29
18.3k
  ccv_nnc_tensor_view_get_dim(a, adim);
30
18.3k
  assert(ccv_nnc_tensor_view_check_dim(g, adim));
31
18.3k
  assert(ccv_nnc_tensor_view_check_dim(m, adim));
32
18.3k
  assert(ccv_nnc_tensor_view_check_dim(b, adim));
33
18.3k
  assert(ccv_nnc_tensor_view_check_dim(n, adim));
34
18.3k
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
35
18.3k
  int gstride[CCV_NNC_MAX_DIM_ALLOC];
36
18.3k
  int astride[CCV_NNC_MAX_DIM_ALLOC];
37
18.3k
  int mstride[CCV_NNC_MAX_DIM_ALLOC];
38
18.3k
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
39
18.3k
  int nstride[CCV_NNC_MAX_DIM_ALLOC];
40
18.3k
  ccv_nnc_tensor_view_get_stride(g, gstride);
41
18.3k
  ccv_nnc_tensor_view_get_stride(a, astride);
42
18.3k
  ccv_nnc_tensor_view_get_stride(m, mstride);
43
18.3k
  ccv_nnc_tensor_view_get_stride(b, bstride);
44
18.3k
  ccv_nnc_tensor_view_get_stride(n, nstride);
45
18.3k
  const float rate = cmd.info.sgd.rate;
46
18.3k
  const float scale = cmd.info.sgd.scale;
47
18.3k
  const float decay = cmd.info.sgd.decay;
48
18.3k
  const float momentum = cmd.info.sgd.momentum;
49
18.3k
  const float dampening = cmd.info.sgd.dampening;
50
18.3k
  const float inv_dampening = 1 - dampening;
51
18.3k
  const int nesterov = cmd.info.sgd.nesterov;
52
18.3k
  if (nesterov)
53
1.65k
    { assert(dampening == 0); }
54
18.3k
  int i[CCV_NNC_MAX_DIM + 1];
55
18.3k
  int x;
56
18.3k
  float* const gp = g->data.f32;
57
18.3k
  float* const ap = a->data.f32;
58
18.3k
  float* const mp = m->data.f32;
59
18.3k
  float* const bp = b->data.f32;
60
18.3k
  float* const np = n->data.f32;
61
18.3k
  if (nesterov)
62
1.65k
  {
63
15.8k
    for (i[0] = 0; i[0] < adim[0]; 
i[0]++14.1k
)
64
14.1k
    {
65
14.1k
      float* const gp0 = gp + i[0] * gstride[0];
66
14.1k
      float* const ap0 = ap + i[0] * astride[0];
67
14.1k
      float* const mp0 = mp + i[0] * mstride[0];
68
14.1k
      float* const bp0 = bp + i[0] * bstride[0];
69
14.1k
      float* const np0 = np + i[0] * nstride[0];
70
79.5k
      for (i[1] = 0; i[1] < adim[1]; 
i[1]++65.3k
)
71
65.3k
      {
72
65.3k
        float* gp1 = gp0 + i[1] * gstride[1];
73
65.3k
        float* ap1 = ap0 + i[1] * astride[1];
74
65.3k
        float* mp1 = mp0 + i[1] * mstride[1];
75
65.3k
        float* bp1 = bp0 + i[1] * bstride[1];
76
65.3k
        float* np1 = np0 + i[1] * nstride[1];
77
415k
        for (i[2] = 0; i[2] < adim[2]; 
i[2]++350k
)
78
350k
        {
79
23.3M
          for (x = 0; x < adim[3]; 
x++22.9M
)
80
22.9M
          {
81
22.9M
            float grad = scale * gp1[x];
82
22.9M
            const float mom = np1[x] = momentum * mp1[x] + grad + decay * ap1[x];
83
22.9M
            grad += momentum * mom;
84
22.9M
            bp1[x] = ap1[x] - rate * grad;
85
22.9M
          }
86
350k
          gp1 += gstride[2];
87
350k
          ap1 += astride[2];
88
350k
          mp1 += mstride[2];
89
350k
          bp1 += bstride[2];
90
350k
          np1 += nstride[2];
91
350k
        }
92
65.3k
      }
93
14.1k
    }
94
16.7k
  } else {
95
58.4k
    for (i[0] = 0; i[0] < adim[0]; 
i[0]++41.7k
)
96
41.7k
    {
97
41.7k
      float* const gp0 = gp + i[0] * gstride[0];
98
41.7k
      float* const ap0 = ap + i[0] * astride[0];
99
41.7k
      float* const mp0 = mp + i[0] * mstride[0];
100
41.7k
      float* const bp0 = bp + i[0] * bstride[0];
101
41.7k
      float* const np0 = np + i[0] * nstride[0];
102
185k
      for (i[1] = 0; i[1] < adim[1]; 
i[1]++144k
)
103
144k
      {
104
144k
        float* gp1 = gp0 + i[1] * gstride[1];
105
144k
        float* ap1 = ap0 + i[1] * astride[1];
106
144k
        float* mp1 = mp0 + i[1] * mstride[1];
107
144k
        float* bp1 = bp0 + i[1] * bstride[1];
108
144k
        float* np1 = np0 + i[1] * nstride[1];
109
856k
        for (i[2] = 0; i[2] < adim[2]; 
i[2]++711k
)
110
711k
        {
111
46.6M
          for (x = 0; x < adim[3]; 
x++45.9M
)
112
45.9M
          {
113
45.9M
            const float mom = np1[x] = momentum * mp1[x] + inv_dampening * (scale * gp1[x] + decay * ap1[x]);
114
45.9M
            bp1[x] = ap1[x] - rate * mom;
115
45.9M
          }
116
711k
          gp1 += gstride[2];
117
711k
          ap1 += astride[2];
118
711k
          mp1 += mstride[2];
119
711k
          bp1 += bstride[2];
120
711k
          np1 += nstride[2];
121
711k
        }
122
144k
      }
123
41.7k
    }
124
16.7k
  }
125
18.3k
  return CCV_NNC_EXEC_SUCCESS;
126
18.3k
}
127
128
static int _ccv_nnc_sgd_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
129
0
{
130
0
  return CCV_NNC_EXEC_INVALID;
131
0
}
132
133
REGISTER_COMMAND_BACKEND(CCV_NNC_SGD_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
134
1
{
135
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
136
1
  registry->tensor_datatypes = CCV_32F;
137
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
138
1
  registry->algorithms = 1;
139
1
  registry->exec = _ccv_nnc_sgd_forw;
140
1
}
141
142
REGISTER_COMMAND_BACKEND(CCV_NNC_SGD_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
143
1
{
144
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
145
1
  registry->tensor_datatypes = CCV_32F;
146
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
147
1
  registry->algorithms = 1;
148
1
  registry->exec = _ccv_nnc_sgd_back;
149
1
}