Coverage Report

Created: 2021-09-30 21:42

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/index/ccv_nnc_index_select_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_index_select_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
5
{
15
5
  assert(input_size == 2);
16
5
  assert(output_size == 1);
17
5
  const ccv_nnc_tensor_view_t* const a = (ccv_nnc_tensor_view_t*)inputs[0];
18
5
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
19
5
  assert(a_nd <= 2);
20
5
  const ccv_nnc_tensor_view_t* const indices = (ccv_nnc_tensor_view_t*)inputs[1];
21
5
  assert(ccv_nnc_tensor_nd(indices->info.dim) == 1);
22
5
  const ccv_nnc_tensor_view_t* const b = (ccv_nnc_tensor_view_t*)outputs[0];
23
5
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
24
5
  assert(b_nd <= 2);
25
5
  const int a_cols = a_nd < 2 ? 
11
:
a->info.dim[1]4
;
26
5
  const int a_cols_inc = CCV_IS_TENSOR_VIEW(a) ? 
(a_nd < 2 1
?
10
:
a->inc[1]1
) :
a_cols4
;
27
5
  const int a_rows = a->info.dim[0];
28
5
  const int b_cols = b_nd < 2 ? 
11
:
b->info.dim[1]4
;
29
5
  const int b_cols_inc = CCV_IS_TENSOR_VIEW(b) ? 
(b_nd < 2 1
?
10
:
b->inc[1]1
) :
b_cols4
;
30
5
  const int b_rows = b->info.dim[0];
31
5
  assert(b_rows == indices->info.dim[0]);
32
5
  assert(a_cols == b_cols);
33
5
  assert(indices->info.datatype == CCV_32S);
34
5
  assert(a->info.datatype == b->info.datatype);
35
5
  assert(a->info.datatype == CCV_32F || a->info.datatype == CCV_16F);
36
5
  const size_t data_size = CCV_GET_DATA_TYPE_SIZE(a->info.datatype);
37
5
  parallel_for(i, b_rows) {
38
0
    const int index = indices->data.i32[i];
39
0
    assert(index < a_rows);
40
11
    uint8_t* const bp = b->data.u8 + data_size * b_cols_inc * i;
41
11
    uint8_t* const ap = a->data.u8 + data_size * a_cols_inc * index;
42
11
    memcpy(bp, ap, data_size * a_cols);
43
16
  } parallel_endfor
44
5
  return CCV_NNC_EXEC_SUCCESS;
45
5
}
46
47
static int _ccv_nnc_index_select_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
48
4
{
49
4
  assert(input_size >= 3);
50
4
  assert(output_size <= 2);
51
4
  const ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
52
4
  const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
53
4
  assert(g_nd <= 2);
54
4
  const ccv_nnc_tensor_view_t* const indices = (ccv_nnc_tensor_view_t*)inputs[2];
55
4
  assert(ccv_nnc_tensor_nd(indices->info.dim) == 1);
56
4
  const ccv_nnc_tensor_view_t* const h = (ccv_nnc_tensor_view_t*)outputs[0];
57
4
  const int h_nd = ccv_nnc_tensor_nd(h->info.dim);
58
4
  assert(h_nd <= 2);
59
4
  ccv_nnc_tensor_zero((ccv_nnc_tensor_t*)h);
60
4
  if (output_size >= 2 && 
outputs[1]0
)
61
0
    ccv_nnc_tensor_zero(outputs[1]);
62
4
  const int g_cols = g_nd < 2 ? 
11
:
g->info.dim[1]3
;
63
4
  const int g_cols_inc = CCV_IS_TENSOR_VIEW(g) ? 
(g_nd < 2 1
?
10
:
g->inc[1]1
) :
g_cols3
;
64
4
  const int g_rows = g->info.dim[0];
65
4
  const int h_cols = h_nd < 2 ? 
11
:
h->info.dim[1]3
;
66
4
  const int h_cols_inc = CCV_IS_TENSOR_VIEW(h) ? 
(h_nd < 2 1
?
10
:
h->inc[1]1
) :
h_cols3
;
67
4
  const int h_rows = h->info.dim[0];
68
4
  assert(g_rows == indices->info.dim[0]);
69
4
  assert(g_cols == h_cols);
70
4
  assert(indices->info.datatype == CCV_32S);
71
4
  assert(g->info.datatype == h->info.datatype);
72
4
  assert(g->info.datatype == CCV_32F || g->info.datatype == CCV_16F);
73
4
  int i;
74
4
  if (g->info.datatype == CCV_32F)
75
3
  {
76
10
    for (i = 0; i < g_rows; 
i++7
)
77
7
    {
78
7
      const int index = indices->data.i32[i];
79
7
      assert(index < h_rows);
80
7
      float* const hp = h->data.f32 + h_cols_inc * index;
81
7
      float* const gp = g->data.f32 + g_cols_inc * i;
82
7
      parallel_for(j, g_cols) {
83
0
        hp[j] += gp[j];
84
7
      } parallel_endfor
85
7
    }
86
3
  } else {
87
11
    for (i = 0; i < g_rows; 
i++10
)
88
10
    {
89
10
      const int index = indices->data.i32[i];
90
10
      assert(index < h_rows);
91
10
      ccv_float16_t* const hp = h->data.f16 + h_cols_inc * index;
92
10
      ccv_float16_t* const gp = g->data.f16 + g_cols_inc * i;
93
10
      parallel_for(j, g_cols) {
94
0
        float t, v;
95
0
        ccv_half_precision_to_float((uint16_t*)gp + j, &t, 1);
96
0
        ccv_half_precision_to_float((uint16_t*)hp + j, &v, 1);
97
0
        v += t;
98
0
        ccv_float_to_half_precision(&v, (uint16_t*)hp + j, 1);
99
10
      } parallel_endfor
100
10
    }
101
1
  }
102
4
  return CCV_NNC_EXEC_SUCCESS;
103
4
}
104
105
REGISTER_COMMAND_BACKEND(CCV_NNC_INDEX_SELECT_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
106
1
{
107
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
108
1
  registry->tensor_datatypes = CCV_32F | CCV_16F | CCV_32S;
109
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
110
1
  registry->algorithms = 1;
111
1
  registry->exec = _ccv_nnc_index_select_forw;
112
1
}
113
114
REGISTER_COMMAND_BACKEND(CCV_NNC_INDEX_SELECT_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
115
1
{
116
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
117
1
  registry->tensor_datatypes = CCV_32F | CCV_16F | CCV_32S;
118
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
119
1
  registry->algorithms = 1;
120
1
  registry->exec = _ccv_nnc_index_select_back;
121
1
}