Coverage Report

Created: 2025-04-03 22:59

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/rmsprop/ccv_nnc_rmsprop_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
// Shared methods.
14
#include "../_ccv_nnc_cpu_ref.h"
15
16
static int _ccv_nnc_rmsprop_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
17
1.00k
{
18
1.00k
  assert(input_size == 4);
19
1.00k
  assert(output_size == 3);
20
1.00k
  ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
21
1.00k
  ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[1];
22
1.00k
  ccv_nnc_tensor_view_t* const m = (ccv_nnc_tensor_view_t*)inputs[2];
23
1.00k
  ccv_nnc_tensor_view_t* const v = (ccv_nnc_tensor_view_t*)inputs[3];
24
1.00k
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
25
1.00k
  ccv_nnc_tensor_view_t* const n = (ccv_nnc_tensor_view_t*)outputs[1];
26
1.00k
  ccv_nnc_tensor_view_t* const u = (ccv_nnc_tensor_view_t*)outputs[2];
27
1.00k
  assert(ccv_nnc_tensor_nd(a->info.dim) <= CCV_NNC_MAX_DIM + 2);
28
1.00k
  assert(ccv_nnc_tensor_nd(b->info.dim) <= CCV_NNC_MAX_DIM + 2);
29
  // Assuming this is float 32.
30
1.00k
  int adim[CCV_NNC_MAX_DIM_ALLOC];
31
1.00k
  ccv_nnc_tensor_view_get_dim(a, adim);
32
1.00k
  assert(ccv_nnc_tensor_view_check_dim(g, adim));
33
1.00k
  assert(ccv_nnc_tensor_view_check_dim(m, adim));
34
1.00k
  assert(ccv_nnc_tensor_view_check_dim(v, adim));
35
1.00k
  assert(ccv_nnc_tensor_view_check_dim(b, adim));
36
1.00k
  assert(ccv_nnc_tensor_view_check_dim(n, adim));
37
1.00k
  assert(ccv_nnc_tensor_view_check_dim(u, adim));
38
1.00k
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
39
1.00k
  int gstride[CCV_NNC_MAX_DIM_ALLOC];
40
1.00k
  int astride[CCV_NNC_MAX_DIM_ALLOC];
41
1.00k
  int mstride[CCV_NNC_MAX_DIM_ALLOC];
42
1.00k
  int vstride[CCV_NNC_MAX_DIM_ALLOC];
43
1.00k
  int bstride[CCV_NNC_MAX_DIM_ALLOC];
44
1.00k
  int nstride[CCV_NNC_MAX_DIM_ALLOC];
45
1.00k
  int ustride[CCV_NNC_MAX_DIM_ALLOC];
46
1.00k
  ccv_nnc_tensor_view_get_stride(g, gstride);
47
1.00k
  ccv_nnc_tensor_view_get_stride(a, astride);
48
1.00k
  ccv_nnc_tensor_view_get_stride(m, mstride);
49
1.00k
  ccv_nnc_tensor_view_get_stride(v, vstride);
50
1.00k
  ccv_nnc_tensor_view_get_stride(b, bstride);
51
1.00k
  ccv_nnc_tensor_view_get_stride(n, nstride);
52
1.00k
  ccv_nnc_tensor_view_get_stride(u, ustride);
53
1.00k
  const float rate = cmd.info.rmsprop.rate;
54
1.00k
  const float scale = cmd.info.rmsprop.scale;
55
1.00k
  const float decay = cmd.info.rmsprop.decay;
56
1.00k
  const float alpha = cmd.info.rmsprop.alpha;
57
1.00k
  const float momentum = cmd.info.rmsprop.momentum;
58
1.00k
  const float epsilon = cmd.info.rmsprop.epsilon;
59
1.00k
  int i[CCV_NNC_MAX_DIM + 1];
60
1.00k
  int x;
61
1.00k
  float* const gp = g->data.f32;
62
1.00k
  float* const ap = a->data.f32;
63
1.00k
  float* const mp = m->data.f32;
64
1.00k
  float* const vp = v->data.f32;
65
1.00k
  float* const bp = b->data.f32;
66
1.00k
  float* const np = n->data.f32;
67
1.00k
  float* const up = u->data.f32;
68
2.00k
  for (i[0] = 0; i[0] < adim[0]; 
i[0]++1.00k
)
69
1.00k
  {
70
1.00k
    float* const gp0 = gp + i[0] * gstride[0];
71
1.00k
    float* const ap0 = ap + i[0] * astride[0];
72
1.00k
    float* const mp0 = mp + i[0] * mstride[0];
73
1.00k
    float* const vp0 = vp + i[0] * vstride[0];
74
1.00k
    float* const bp0 = bp + i[0] * bstride[0];
75
1.00k
    float* const np0 = np + i[0] * nstride[0];
76
1.00k
    float* const up0 = up + i[0] * ustride[0];
77
2.00k
    for (i[1] = 0; i[1] < adim[1]; 
i[1]++1.00k
)
78
1.00k
    {
79
1.00k
      float* gp1 = gp0 + i[1] * gstride[1];
80
1.00k
      float* ap1 = ap0 + i[1] * astride[1];
81
1.00k
      float* mp1 = mp0 + i[1] * mstride[1];
82
1.00k
      float* vp1 = vp0 + i[1] * vstride[1];
83
1.00k
      float* bp1 = bp0 + i[1] * bstride[1];
84
1.00k
      float* np1 = np0 + i[1] * nstride[1];
85
1.00k
      float* up1 = up0 + i[1] * ustride[1];
86
3.00k
      for (i[2] = 0; i[2] < adim[2]; 
i[2]++2.00k
)
87
2.00k
      {
88
6.03k
        for (x = 0; x < adim[3]; 
x++4.03k
)
89
4.03k
        {
90
4.03k
          float grad = scale * gp1[x];
91
4.03k
          grad += decay * ap1[x];
92
4.03k
          const float vel = up1[x] = alpha * vp1[x] + (1 - alpha) * grad * grad;
93
4.03k
          const float mom = np1[x] = momentum * mp1[x] + grad / (sqrtf(vel) + epsilon);
94
4.03k
          bp1[x] = ap1[x] - rate * mom;
95
4.03k
        }
96
2.00k
        gp1 += gstride[2];
97
2.00k
        ap1 += astride[2];
98
2.00k
        mp1 += mstride[2];
99
2.00k
        vp1 += vstride[2];
100
2.00k
        bp1 += bstride[2];
101
2.00k
        np1 += nstride[2];
102
2.00k
        up1 += ustride[2];
103
2.00k
      }
104
1.00k
    }
105
1.00k
  }
106
1.00k
  return CCV_NNC_EXEC_SUCCESS;
107
1.00k
}
108
109
static int _ccv_nnc_rmsprop_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
110
0
{
111
0
  return CCV_NNC_EXEC_INVALID;
112
0
}
113
114
REGISTER_COMMAND_BACKEND(CCV_NNC_RMSPROP_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
115
1
{
116
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
117
1
  registry->tensor_datatypes = CCV_32F;
118
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
119
1
  registry->algorithms = 1;
120
1
  registry->exec = _ccv_nnc_rmsprop_forw;
121
1
}
122
123
REGISTER_COMMAND_BACKEND(CCV_NNC_RMSPROP_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
124
1
{
125
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_CHWN;
126
1
  registry->tensor_datatypes = CCV_32F;
127
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
128
1
  registry->algorithms = 1;
129
1
  registry->exec = _ccv_nnc_rmsprop_back;
130
1
}