Coverage Report

Created: 2025-02-24 17:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/index/ccv_nnc_index_select_cpu_ref.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_index_select_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
7
{
15
7
  assert(input_size == 2);
16
7
  assert(output_size == 1);
17
7
  const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
18
7
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
19
7
  assert(a_nd <= 2);
20
7
  const ccv_nnc_tensor_view_t* const indices = (ccv_nnc_tensor_view_t*)inputs[1];
21
7
  assert(ccv_nnc_tensor_nd(indices->info.dim) == 1);
22
7
  const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
23
7
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
24
7
  assert(b_nd <= 2);
25
7
  const int a_cols = a_nd < 2 ? 
11
:
a->info.dim[1]6
;
26
7
  const int a_cols_inc = CCV_IS_TENSOR_VIEW(a) ? 
(1
a_nd < 21
?
10
:
a->stride[0]1
) :
a_cols6
;
27
7
  const int a_rows = a->info.dim[0];
28
7
  const int b_cols = b_nd < 2 ? 
11
:
b->info.dim[1]6
;
29
7
  const int b_cols_inc = CCV_IS_TENSOR_VIEW(b) ? 
(1
b_nd < 21
?
10
:
b->stride[0]1
) :
b_cols6
;
30
7
  const int b_rows = b->info.dim[0];
31
7
  assert(b_rows == indices->info.dim[0]);
32
7
  assert(a_cols == b_cols);
33
7
  assert(a->info.datatype == b->info.datatype);
34
7
  const size_t data_size = CCV_GET_DATA_TYPE_SIZE(a->info.datatype);
35
7
  if (indices->info.datatype == CCV_32S)
36
6
  {
37
6
    assert(a->info.datatype == CCV_32F || a->info.datatype == CCV_16F);
38
23
    
parallel_for6
(i, b_rows) {
39
23
      const int index = indices->data.i32[i];
40
23
      assert(index < a_rows);
41
23
      uint8_t* const bp = b->data.u8 + data_size * b_cols_inc * i;
42
23
      uint8_t* const ap = a->data.u8 + data_size * a_cols_inc * index;
43
23
      memcpy(bp, ap, data_size * a_cols);
44
23
    } parallel_endfor
45
6
  } else {
46
1
    assert(indices->info.datatype == CCV_32F);
47
1
    assert(a->info.datatype == CCV_32F);
48
2
    
parallel_for1
(i, b_rows) {
49
2
      const int j0 = (int)indices->data.f32[i];
50
2
      const int j1 = j0 + 1;
51
2
      const float w1 = indices->data.f32[i] - j0;
52
2
      const float w0 = 1 - w1;
53
2
      assert(j0 >= 0);
54
2
      assert(j0 < a_rows);
55
2
      float* const bp = b->data.f32 + b_cols_inc * i;
56
2
      float* const ap0 = a->data.f32 + a_cols_inc * j0;
57
2
      float* const ap1 = a->data.f32 + a_cols_inc * ccv_min(j1, a_rows - 1);
58
2
      int j;
59
6
      for (j = 0; j < a_cols; 
j++4
)
60
4
        bp[j] = ap0[j] * w0 + ap1[j] * w1;
61
2
    } parallel_endfor
62
1
  }
63
7
  return CCV_NNC_EXEC_SUCCESS;
64
7
}
65
66
static int _ccv_nnc_index_select_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
67
4
{
68
4
  assert(input_size >= 3);
69
4
  assert(output_size <= 2);
70
4
  const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
71
4
  const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
72
4
  assert(g_nd <= 2);
73
4
  const ccv_nnc_tensor_view_t* const indices = (ccv_nnc_tensor_view_t*)inputs[2];
74
4
  assert(ccv_nnc_tensor_nd(indices->info.dim) == 1);
75
4
  const ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
76
4
  const int h_nd = ccv_nnc_tensor_nd(h->info.dim);
77
4
  assert(h_nd <= 2);
78
4
  ccv_nnc_tensor_zero((ccv_nnc_tensor_t*)h);
79
4
  if (output_size >= 2 && 
outputs[1]0
)
80
0
    ccv_nnc_tensor_zero(outputs[1]);
81
4
  const int g_cols = g_nd < 2 ? 
11
:
g->info.dim[1]3
;
82
4
  const int g_cols_inc = CCV_IS_TENSOR_VIEW(g) ? 
(1
g_nd < 21
?
10
:
g->stride[0]1
) :
g_cols3
;
83
4
  const int g_rows = g->info.dim[0];
84
4
  const int h_cols = h_nd < 2 ? 
11
:
h->info.dim[1]3
;
85
4
  const int h_cols_inc = CCV_IS_TENSOR_VIEW(h) ? 
(1
h_nd < 21
?
10
:
h->stride[0]1
) :
h_cols3
;
86
4
  const int h_rows = h->info.dim[0];
87
4
  assert(g_rows == indices->info.dim[0]);
88
4
  assert(g_cols == h_cols);
89
4
  assert(indices->info.datatype == CCV_32S);
90
4
  assert(g->info.datatype == h->info.datatype);
91
4
  assert(g->info.datatype == CCV_32F || g->info.datatype == CCV_16F);
92
4
  int i;
93
4
  if (g->info.datatype == CCV_32F)
94
3
  {
95
10
    for (i = 0; i < g_rows; 
i++7
)
96
7
    {
97
7
      const int index = indices->data.i32[i];
98
7
      assert(index < h_rows);
99
7
      float* const hp = h->data.f32 + h_cols_inc * index;
100
7
      float* const gp = g->data.f32 + g_cols_inc * i;
101
11
      
parallel_for7
(j, g_cols) {
102
11
        hp[j] += gp[j];
103
11
      } parallel_endfor
104
7
    }
105
3
  } else {
106
11
    for (i = 0; i < g_rows; 
i++10
)
107
10
    {
108
10
      const int index = indices->data.i32[i];
109
10
      assert(index < h_rows);
110
10
      ccv_float16_t* const hp = h->data.f16 + h_cols_inc * index;
111
10
      ccv_float16_t* const gp = g->data.f16 + g_cols_inc * i;
112
100
      
parallel_for10
(j, g_cols) {
113
100
        float t, v;
114
100
        ccv_half_precision_to_float((uint16_t*)gp + j, &t, 1);
115
100
        ccv_half_precision_to_float((uint16_t*)hp + j, &v, 1);
116
100
        v += t;
117
100
        ccv_float_to_half_precision(&v, (uint16_t*)hp + j, 1);
118
100
      } parallel_endfor
119
10
    }
120
1
  }
121
4
  return CCV_NNC_EXEC_SUCCESS;
122
4
}
123
124
REGISTER_COMMAND_BACKEND(CCV_NNC_INDEX_SELECT_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
125
1
{
126
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
127
1
  registry->tensor_datatypes = CCV_32F | CCV_16F | CCV_32S;
128
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
129
1
  registry->algorithms = 1;
130
1
  registry->exec = _ccv_nnc_index_select_forw;
131
1
}
132
133
REGISTER_COMMAND_BACKEND(CCV_NNC_INDEX_SELECT_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
134
1
{
135
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
136
1
  registry->tensor_datatypes = CCV_32F | CCV_16F | CCV_32S;
137
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
138
1
  registry->algorithms = 1;
139
1
  registry->exec = _ccv_nnc_index_select_back;
140
1
}