Coverage Report

Created: 2024-12-10 23:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/softmax/ccv_nnc_softmax_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_softmax_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
508
{
15
508
  assert(input_size == 1);
16
508
  const ccv_nnc_tensor_t* a = inputs[0];
17
508
  assert(CCV_IS_TENSOR_CONTIGUOUS(a));
18
508
  assert(output_size == 1);
19
508
  ccv_nnc_tensor_t* b = outputs[0];
20
508
  assert(CCV_IS_TENSOR_CONTIGUOUS(b));
21
508
  const int axis_count = ccv_nnc_tensor_nd(a->info.dim);
22
508
  const int batch_size = axis_count < 2 ? 
1498
:
a->info.dim[0]10
;
23
508
  const int count = ccv_nnc_tensor_count(a->info) / batch_size;
24
508
  int i;
25
1.02k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; 
i++520
)
26
520
    { assert(a->info.dim[i] == b->info.dim[i]); }
27
98.9k
  
parallel_for508
(i, batch_size) {
28
98.9k
    int j;
29
98.9k
    float* const ap = a->data.f32 + i * count;
30
98.9k
    float* const bp = b->data.f32 + i * count;
31
98.9k
    double maxval = ap[0];
32
13.8M
    for (j = 1; j < count; 
j++13.7M
)
33
13.7M
      if (ap[j] > maxval)
34
442k
        maxval = ap[j];
35
98.9k
    double sumval = 0;
36
13.9M
    for (j = 0; j < count; 
j++13.8M
)
37
13.8M
      sumval += (bp[j] = expf(ap[j] - maxval));
38
98.9k
    sumval = 1.0 / sumval;
39
13.9M
    for (j = 0; j < count; 
j++13.8M
)
40
13.8M
      bp[j] *= sumval;
41
98.9k
  } parallel_endfor
42
508
  return CCV_NNC_EXEC_SUCCESS;
43
508
}
44
45
static int _ccv_nnc_softmax_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
46
6
{
47
6
  assert(input_size == 3);
48
6
  assert(output_size == 1);
49
6
  const ccv_nnc_tensor_t* g = inputs[0];
50
6
  assert(CCV_IS_TENSOR_CONTIGUOUS(g));
51
6
  const ccv_nnc_tensor_t* b = inputs[2];
52
6
  assert(CCV_IS_TENSOR_CONTIGUOUS(b));
53
6
  ccv_nnc_tensor_t* h = outputs[0];
54
6
  assert(CCV_IS_TENSOR_CONTIGUOUS(h));
55
6
  const int axis_count = ccv_nnc_tensor_nd(g->info.dim);
56
6
  const int batch_size = axis_count < 2 ? 
11
:
g->info.dim[0]5
;
57
6
  const int count = ccv_nnc_tensor_count(g->info) / batch_size;
58
6
  int i;
59
17
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && g->info.dim[i] > 0; 
i++11
)
60
11
    { assert(g->info.dim[i] == h->info.dim[i] && h->info.dim[i] == b->info.dim[i]); }
61
32.7k
  
parallel_for6
(i, batch_size) {
62
32.7k
    int j;
63
32.7k
    float* const gp = g->data.f32 + i * count;
64
32.7k
    float* const bp = b->data.f32 + i * count;
65
32.7k
    float* const hp = h->data.f32 + i * count;
66
32.7k
    float sumval = 0;
67
4.22M
    for (j = 0; j < count; 
j++4.19M
)
68
4.19M
      sumval += gp[j] * bp[j];
69
4.22M
    for (j = 0; j < count; 
j++4.19M
)
70
4.19M
      hp[j] = (gp[j] - sumval) * bp[j];
71
32.7k
  } parallel_endfor
72
6
  return CCV_NNC_EXEC_SUCCESS;
73
6
}
74
75
REGISTER_COMMAND_BACKEND(CCV_NNC_SOFTMAX_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
76
1
{
77
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
78
1
  registry->tensor_datatypes = CCV_32F;
79
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
80
1
  registry->algorithms = 1;
81
1
  registry->exec = _ccv_nnc_softmax_forw;
82
1
}
83
84
REGISTER_COMMAND_BACKEND(CCV_NNC_SOFTMAX_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
85
1
{
86
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
87
1
  registry->tensor_datatypes = CCV_32F;
88
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
89
1
  registry->algorithms = 1;
90
1
  registry->exec = _ccv_nnc_softmax_back;
91
1
}