Coverage Report

Created: 2025-05-07 17:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/convolution/ccv_nnc_conv_cpu_opt.c
Line
Count
Source
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
7
#include "_ccv_nnc_conv_cpu_opt.h"
8
9
FIND_FILE(cpu_opt/_ccv_nnc_conv_cpu_4x4_3x3_winograd.c, cpu_opt/_ccv_nnc_conv_cpu_fft.c, cpu_opt/_ccv_nnc_conv_cpu_gemm.c, cpu_opt/_ccv_nnc_conv_cpu_opt.c)
10
11
enum {
12
  CCV_NNC_CMD_OPT_CONV_ALGO_DC, // Direct convolution
13
  CCV_NNC_CMD_OPT_CONV_ALGO_GEMM, // GEMM (for 1x1)
14
  CCV_NNC_CMD_OPT_CONV_ALGO_WINOGRAD, // Winograd algorithm
15
  CCV_NNC_CMD_OPT_CONV_ALGO_FFT, // Fast Fourier transform
16
  CCV_NNC_CMD_OPT_CONV_ALGO_COUNT
17
};
18
19
static int _ccv_nnc_conv_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
20
1.56k
{
21
1.56k
  assert(input_size >= 2);
22
1.56k
  const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[0];
23
1.56k
  const ccv_nnc_tensor_t* w = inputs[1];
24
1.56k
  assert(CCV_IS_TENSOR_CONTIGUOUS(w));
25
1.56k
  const ccv_nnc_tensor_t* bias = input_size > 2 ? 
inputs[2]1.56k
:
01
;
26
1.56k
  assert(!bias || !CCV_IS_TENSOR_VIEW(bias));
27
1.56k
  assert(output_size == 1);
28
1.56k
  ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0];
29
1.56k
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
30
1.56k
  assert(a_nd == CCV_NNC_MAX_DIM + 1 || a_nd == CCV_NNC_MAX_DIM + 2);
31
1.56k
  const int* adim = (a_nd == CCV_NNC_MAX_DIM + 1) ? 
a->info.dim394
:
a->info.dim + 11.17k
;
32
1.56k
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
33
1.56k
  assert(b_nd == CCV_NNC_MAX_DIM + 1 || b_nd == CCV_NNC_MAX_DIM + 2);
34
1.56k
  const int* bdim = (b_nd == CCV_NNC_MAX_DIM + 1) ? 
b->info.dim394
:
b->info.dim + 11.17k
;
35
1.56k
  assert(w->info.dim[CCV_NNC_MAX_DIM + 1] == adim[CCV_NNC_MAX_DIM]);
36
1.56k
  assert(bdim[CCV_NNC_MAX_DIM] == cmd.info.convolution.count);
37
1.56k
  if (cmd.info.convolution.groups != 1)
38
0
    return CCV_NNC_EXEC_INVALID;
39
1.56k
  if (cmd.info.convolution.dilation[0] > 1 || cmd.info.convolution.dilation[1] > 1)
40
0
    return CCV_NNC_EXEC_INVALID;
41
1.56k
  int i;
42
  // Make sure the weights dimension matches the network dimension
43
6.26k
  for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC; 
i++4.70k
)
44
6.26k
  {
45
6.26k
    if (w->info.dim[i] == 0 || 
cmd.info.size.dim[i - 1] == 04.70k
)
46
1.56k
      break;
47
6.26k
    assert(w->info.dim[i] == cmd.info.size.dim[i - 1]);
48
4.70k
  }
49
1.56k
  switch (cmd.algorithm)
50
1.56k
  {
51
1.11k
    case CCV_NNC_CMD_OPT_CONV_ALGO_DC:
52
1.11k
      return _ccv_nnc_conv_forw_cpu_opt(a, w, bias, hint, b);
53
145
    case CCV_NNC_CMD_OPT_CONV_ALGO_GEMM:
54
145
      if (w->info.dim[1] == 1 && 
w->info.dim[2] == 14
&&
hint.stride.dim[0] <= 14
&&
hint.stride.dim[1] <= 14
&&
55
145
        
hint.border.begin[0] == 04
&&
hint.border.begin[1] == 04
&&
hint.border.end[0] == 04
&&
hint.border.end[1] == 04
&&
56
145
        
!4
CCV_IS_TENSOR_VIEW4
(a) &&
!4
CCV_IS_TENSOR_VIEW4
(b) &&
!4
CCV_IS_TENSOR_VIEW4
(w) &&
(4
!bias4
||
!4
CCV_IS_TENSOR_VIEW4
(bias)))
57
4
        return _ccv_nnc_conv_forw_gemm_cpu_opt(a, w, bias, hint, b);
58
141
      return CCV_NNC_EXEC_INVALID;
59
161
    case CCV_NNC_CMD_OPT_CONV_ALGO_WINOGRAD:
60
161
      if (w->info.dim[1] == 3 && 
w->info.dim[2] == 395
&&
hint.stride.dim[0] <= 195
&&
hint.stride.dim[1] <= 195
)
61
95
        return _ccv_nnc_conv_forw_4x4_3x3_winograd_cpu_opt(a, w, bias, hint, b, stream_context);
62
66
      return CCV_NNC_EXEC_INVALID;
63
144
    case CCV_NNC_CMD_OPT_CONV_ALGO_FFT:
64
144
      return CCV_NNC_EXEC_INVALID; // Placeholder, for fft.
65
0
    case -1:
66
      // Pass-through
67
0
      break;
68
1.56k
  }
69
  // If the size is 3x3, and no stride, choose Winograd kernel
70
0
  if (w->info.dim[1] == 3 && w->info.dim[2] == 3 && hint.stride.dim[0] <= 1 && hint.stride.dim[1] <= 1)
71
0
    return _ccv_nnc_conv_forw_4x4_3x3_winograd_cpu_opt(a, w, bias, hint, b, stream_context);
72
  // If the size is 1x1, and no stride, and not a tensor view object, no padding, choose GEMM kernel
73
0
  if (w->info.dim[1] == 1 && w->info.dim[2] == 1 && hint.stride.dim[0] <= 1 && hint.stride.dim[1] <= 1 &&
74
0
    hint.border.begin[0] == 0 && hint.border.begin[1] == 0 && hint.border.end[0] == 0 && hint.border.end[1] == 0 &&
75
0
    !CCV_IS_TENSOR_VIEW(a) && !CCV_IS_TENSOR_VIEW(b) && !CCV_IS_TENSOR_VIEW(w) && (!bias || !CCV_IS_TENSOR_VIEW(bias)))
76
0
    return _ccv_nnc_conv_forw_gemm_cpu_opt(a, w, bias, hint, b);
77
  // Otherwise, use direct convolution kernel
78
0
  return _ccv_nnc_conv_forw_cpu_opt(a, w, bias, hint, b);
79
0
}
80
81
REGISTER_COMMAND_BACKEND(CCV_NNC_CONVOLUTION_FORWARD, CCV_NNC_BACKEND_CPU_OPT)(ccv_nnc_cmd_backend_registry_t* const registry)
82
1
{
83
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
84
1
  registry->tensor_datatypes = CCV_32F;
85
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
86
1
  registry->algorithms = CCV_NNC_CMD_OPT_CONV_ALGO_COUNT;
87
1
  registry->exec = _ccv_nnc_conv_forw;
88
1
}