Coverage Report

Created: 2025-05-09 19:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/scatter_add/ccv_nnc_scatter_add_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_scatter_add_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
5
{
15
5
  assert(input_size == 2);
16
5
  assert(output_size == 1);
17
5
  const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
18
5
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
19
5
  assert(a_nd <= 2);
20
5
  const ccv_nnc_tensor_view_t* const indices = (ccv_nnc_tensor_view_t*)inputs[1];
21
5
  assert(ccv_nnc_tensor_nd(indices->info.dim) == 1);
22
5
  ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
23
5
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
24
5
  assert(b_nd <= 2);
25
5
  const int a_cols = a_nd < 2 ? 
11
:
a->info.dim[1]4
;
26
5
  const int a_cols_inc = CCV_IS_TENSOR_VIEW(a) ? 
(1
a_nd < 21
?
10
:
a->stride[0]1
) :
a_cols4
;
27
5
  const int a_rows = a->info.dim[0];
28
5
  const int b_cols = b_nd < 2 ? 
11
:
b->info.dim[1]4
;
29
5
  const int b_cols_inc = CCV_IS_TENSOR_VIEW(b) ? 
(1
b_nd < 21
?
10
:
b->stride[0]1
) :
b_cols4
;
30
5
  const int b_rows = b->info.dim[0];
31
5
  assert(a_rows == indices->info.dim[0]);
32
5
  assert(indices->info.datatype == CCV_32S);
33
5
  assert(a_cols == b_cols);
34
5
  assert(a->info.datatype == b->info.datatype);
35
5
  ccv_nnc_tensor_zero((ccv_nnc_tensor_t*)b);
36
5
  int i;
37
5
  if (a->info.datatype == CCV_32F)
38
4
  {
39
13
    for (i = 0; i < a_rows; 
i++9
)
40
9
    {
41
9
      const int index = indices->data.i32[i];
42
9
      assert(index < b_rows);
43
9
      float* const bp = b->data.f32 + b_cols_inc * index;
44
9
      float* const ap = a->data.f32 + a_cols_inc * i;
45
15
      
parallel_for9
(j, a_cols) {
46
15
        bp[j] += ap[j];
47
15
      } parallel_endfor
48
9
    }
49
4
  } else {
50
11
    for (i = 0; i < a_rows; 
i++10
)
51
10
    {
52
10
      const int index = indices->data.i32[i];
53
10
      assert(index < b_rows);
54
10
      ccv_float16_t* const bp = b->data.f16 + b_cols_inc * index;
55
10
      ccv_float16_t* const ap = a->data.f16 + a_cols_inc * i;
56
100
      
parallel_for10
(j, a_cols) {
57
100
        float t, v;
58
100
        ccv_half_precision_to_float((uint16_t*)ap + j, &t, 1);
59
100
        ccv_half_precision_to_float((uint16_t*)bp + j, &v, 1);
60
100
        v += t;
61
100
        ccv_float_to_half_precision(&v, (uint16_t*)bp + j, 1);
62
100
      } parallel_endfor
63
10
    }
64
1
  }
65
5
  return CCV_NNC_EXEC_SUCCESS;
66
5
}
67
68
static int _ccv_nnc_scatter_add_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
69
4
{
70
4
  assert(input_size >= 3);
71
4
  assert(output_size <= 2);
72
4
  const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
73
4
  const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
74
4
  assert(g_nd <= 2);
75
4
  const ccv_nnc_tensor_view_t* const indices = (ccv_nnc_tensor_view_t*)inputs[2];
76
4
  assert(ccv_nnc_tensor_nd(indices->info.dim) == 1);
77
4
  ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
78
4
  const int h_nd = ccv_nnc_tensor_nd(h->info.dim);
79
4
  assert(h_nd <= 2);
80
4
  if (output_size >= 2 && 
outputs[1]0
)
81
0
    ccv_nnc_tensor_zero(outputs[1]);
82
4
  const int g_cols = g_nd < 2 ? 
11
:
g->info.dim[1]3
;
83
4
  const int g_cols_inc = CCV_IS_TENSOR_VIEW(g) ? 
(1
g_nd < 21
?
10
:
g->stride[0]1
) :
g_cols3
;
84
4
  const int g_rows = g->info.dim[0];
85
4
  const int h_cols = h_nd < 2 ? 
11
:
h->info.dim[1]3
;
86
4
  const int h_cols_inc = CCV_IS_TENSOR_VIEW(h) ? 
(1
h_nd < 21
?
10
:
h->stride[0]1
) :
h_cols3
;
87
4
  const int h_rows = h->info.dim[0];
88
4
  assert(h_rows == indices->info.dim[0]);
89
4
  assert(g_cols == h_cols);
90
4
  assert(indices->info.datatype == CCV_32S);
91
4
  assert(g->info.datatype == h->info.datatype);
92
4
  assert(g->info.datatype == CCV_32F || g->info.datatype == CCV_16F);
93
4
  const size_t data_size = CCV_GET_DATA_TYPE_SIZE(g->info.datatype);
94
4
  assert(g->info.datatype == CCV_32F || g->info.datatype == CCV_16F);
95
17
  
parallel_for4
(i, h_rows) {
96
17
    const int index = indices->data.i32[i];
97
17
    assert(index < g_rows);
98
17
    uint8_t* const hp = h->data.u8 + data_size * h_cols_inc * i;
99
17
    uint8_t* const gp = g->data.u8 + data_size * g_cols_inc * index;
100
17
    memcpy(hp, gp, data_size * g_cols);
101
17
  } parallel_endfor
102
4
  return CCV_NNC_EXEC_SUCCESS;
103
4
}
104
105
REGISTER_COMMAND_BACKEND(CCV_NNC_SCATTER_ADD_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
106
1
{
107
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
108
1
  registry->tensor_datatypes = CCV_32F | CCV_16F | CCV_32S;
109
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
110
1
  registry->algorithms = 1;
111
1
  registry->exec = _ccv_nnc_scatter_add_forw;
112
1
}
113
114
REGISTER_COMMAND_BACKEND(CCV_NNC_SCATTER_ADD_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
115
1
{
116
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
117
1
  registry->tensor_datatypes = CCV_32F | CCV_16F | CCV_32S;
118
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
119
1
  registry->algorithms = 1;
120
1
  registry->exec = _ccv_nnc_scatter_add_back;
121
1
}