/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd/blas/ccv_nnc_gemm_cpu_opt.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "ccv.h" |
2 | | #include "ccv_internal.h" |
3 | | #include "nnc/ccv_nnc.h" |
4 | | #include "nnc/ccv_nnc_easy.h" |
5 | | #include "nnc/ccv_nnc_internal.h" |
6 | | |
7 | | #include "_ccv_nnc_gemm_cpu_opt.h" |
8 | | |
9 | | FIND_FILE(cpu_opt/_ccv_nnc_gemm_cpu_opt.c, cpu_sys/_ccv_nnc_gemm_cpu_sys.c) |
10 | | |
11 | | enum { |
12 | | CCV_NNC_CMD_OPT_GEMM_ALGO_DIRECT, // Direct multiplication |
13 | | CCV_NNC_CMD_OPT_GEMM_ALGO_SYSTEM, // Use system GEMM |
14 | | CCV_NNC_CMD_OPT_GEMM_ALGO_COUNT |
15 | | }; |
16 | | |
17 | | static int _ccv_nnc_gemm_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
18 | 2.36k | { |
19 | 2.36k | assert(input_size >= 2); |
20 | 2.36k | const ccv_nnc_tensor_view_t* w = (const ccv_nnc_tensor_view_t*)inputs[1]; |
21 | 2.36k | const ccv_nnc_tensor_view_t* bias = input_size > 2 ? (const ccv_nnc_tensor_view_t*)inputs[2]881 : 01.47k ; |
22 | 2.36k | const ccv_nnc_tensor_view_t* a = (const ccv_nnc_tensor_view_t*)inputs[0]; |
23 | 2.36k | ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[0]; |
24 | | // Cannot compute if w is not transposed and dimensions are batched. |
25 | | // Copy the most of parameters, but reshape the dimension of a to a vector. |
26 | 2.36k | assert(output_size == 1); |
27 | 2.36k | switch (cmd.algorithm) |
28 | 2.36k | { |
29 | 289 | case CCV_NNC_CMD_OPT_GEMM_ALGO_DIRECT: |
30 | | // Cannot handle this with DIRECT. |
31 | 289 | if (ccv_nnc_tensor_nd(a->info.dim) > 2 || ccv_nnc_tensor_nd(b->info.dim) > 2 || |
32 | 289 | ccv_nnc_tensor_nd(w->info.dim) > 2 || |
33 | 289 | (bias && ccv_nnc_tensor_nd(bias->info.dim) > 1130 ) || |
34 | 289 | cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1] || |
35 | 289 | cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1]) |
36 | 3 | break; |
37 | 286 | return _ccv_nnc_gemm_forw_cpu_opt(a, w, bias, b); |
38 | 2.07k | case CCV_NNC_CMD_OPT_GEMM_ALGO_SYSTEM: |
39 | 2.07k | return _ccv_nnc_gemm_forw_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, a, w, bias, b); |
40 | 0 | case -1: |
41 | | // Pass-through |
42 | 0 | break; |
43 | 2.36k | } |
44 | 3 | #if (defined HAVE_CBLAS || defined HAVE_ACCELERATE_FRAMEWORK) |
45 | 3 | return _ccv_nnc_gemm_forw_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, a, w, bias, b); |
46 | 0 | #endif |
47 | 0 | if (ccv_nnc_tensor_nd(a->info.dim) > 2 || ccv_nnc_tensor_nd(b->info.dim) > 2 || |
48 | 0 | ccv_nnc_tensor_nd(w->info.dim) > 2 || |
49 | 0 | (bias && ccv_nnc_tensor_nd(bias->info.dim) > 1) || |
50 | 0 | cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1] || |
51 | 0 | cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1]) |
52 | 0 | return CCV_NNC_EXEC_INVALID; |
53 | 0 | assert(w->info.dim[2] == 0); // It is a 2-d array |
54 | 0 | assert(!bias || bias->info.dim[1] == 0); // It is a 1-d array |
55 | 0 | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); |
56 | 0 | assert(a_nd == 1 || a_nd == 2); |
57 | 0 | const int* adim = (a_nd == 1) ? a->info.dim : a->info.dim + 1; |
58 | 0 | const int b_nd = ccv_nnc_tensor_nd(b->info.dim); |
59 | 0 | assert(b_nd == 1 || b_nd == 2); |
60 | 0 | const int* bdim = (b_nd == 1) ? b->info.dim : b->info.dim + 1; |
61 | 0 | const int batch_size = a_nd == 1 ? 1 : ccv_max(1, a->info.dim[0]); |
62 | 0 | assert(batch_size == (b_nd == 1) ? 1 : ccv_max(1, b->info.dim[0])); |
63 | 0 | assert(!bias || bdim[0] == bias->info.dim[0]); |
64 | 0 | assert(bdim[0] == w->info.dim[0]); |
65 | 0 | assert(adim[0] == w->info.dim[1]); |
66 | 0 | return _ccv_nnc_gemm_forw_cpu_opt(a, w, bias, b); |
67 | 0 | } |
68 | | |
69 | | static int _ccv_nnc_gemm_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context) |
70 | 5.80k | { |
71 | | // inputs: gradient, forw prop input, [w] |
72 | | // outputs: [output gradient], weight updates, bias updates |
73 | 5.80k | assert(input_size >= 2 && output_size >= 2); |
74 | 5.80k | const ccv_nnc_tensor_view_t* g = (const ccv_nnc_tensor_view_t*)inputs[0]; |
75 | 5.80k | const ccv_nnc_tensor_view_t* a = (const ccv_nnc_tensor_view_t*)inputs[1]; |
76 | 5.80k | ccv_nnc_tensor_view_t* dw = (ccv_nnc_tensor_view_t*)outputs[1]; |
77 | 5.80k | ccv_nnc_tensor_view_t* bias = output_size > 2 ? (ccv_nnc_tensor_view_t*)outputs[2]3.08k : 02.72k ; |
78 | 5.80k | const ccv_nnc_tensor_view_t* w = (input_size > 2) ? (const ccv_nnc_tensor_view_t*)inputs[2] : 00 ; |
79 | 5.80k | ccv_nnc_tensor_view_t* h = (ccv_nnc_tensor_view_t*)outputs[0]; |
80 | | // Cannot compute if w is not transposed and dimensions are batched. |
81 | 5.80k | switch (cmd.algorithm) |
82 | 5.80k | { |
83 | 200 | case CCV_NNC_CMD_OPT_GEMM_ALGO_DIRECT: |
84 | | // Cannot handle this with DIRECT. |
85 | 200 | if ((!a || ccv_nnc_tensor_nd(a->info.dim) > 296 ) || ccv_nnc_tensor_nd(g->info.dim) > 296 || |
86 | 200 | (96 !dw96 || ccv_nnc_tensor_nd(dw->info.dim) > 296 ) || |
87 | 200 | (96 bias96 && ccv_nnc_tensor_nd(bias->info.dim) > 157 ) || |
88 | 200 | cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1]96 || |
89 | 200 | cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1]96 ) |
90 | 107 | break; |
91 | 93 | return _ccv_nnc_gemm_back_cpu_opt(g, a, w, dw, bias, h, flags); |
92 | 5.60k | case CCV_NNC_CMD_OPT_GEMM_ALGO_SYSTEM: |
93 | 5.60k | return _ccv_nnc_gemm_back_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, g, a, w, dw, bias, h, flags); |
94 | 0 | case -1: |
95 | | // Pass-through |
96 | 0 | break; |
97 | 5.80k | } |
98 | 107 | #if (defined HAVE_CBLAS || defined HAVE_ACCELERATE_FRAMEWORK) |
99 | 107 | return _ccv_nnc_gemm_back_cpu_sys(cmd.info.blas.transpose_a, cmd.info.blas.transpose_b, g, a, w, dw, bias, h, flags); |
100 | | #else |
101 | | if (ccv_nnc_tensor_nd(a->info.dim) > 2 || ccv_nnc_tensor_nd(g->info.dim) > 2 || |
102 | | ccv_nnc_tensor_nd(dw->info.dim) > 2 || |
103 | | (bias && ccv_nnc_tensor_nd(bias->info.dim) > 1) || |
104 | | cmd.info.blas.transpose_a[0] != cmd.info.blas.transpose_a[1] || |
105 | | cmd.info.blas.transpose_b[0] == cmd.info.blas.transpose_b[1]) |
106 | | return CCV_NNC_EXEC_INVALID; |
107 | | assert(dw->info.dim[2] == 0); // It is a 2-d array. |
108 | | assert(!bias || bias->info.dim[1] == 0); // It is a 1-d array. |
109 | | const int a_nd = ccv_nnc_tensor_nd(a->info.dim); |
110 | | assert(a_nd == 1 || a_nd == 2); |
111 | | const int* adim = (a_nd == 1) ? a->info.dim : a->info.dim + 1; |
112 | | const int g_nd = ccv_nnc_tensor_nd(g->info.dim); |
113 | | assert(g_nd == 1 || g_nd == 2); |
114 | | const int* gdim = (g_nd == 1) ? g->info.dim : g->info.dim + 1; |
115 | | const int batch_size = a_nd == 1 ? 1 : ccv_max(1, a->info.dim[0]); |
116 | | assert(batch_size == (g_nd == 1) ? 1 : ccv_max(1, g->info.dim[0])); |
117 | | assert(!bias || bias->info.dim[0] == gdim[0]); |
118 | | assert(gdim[0] == dw->info.dim[0]); |
119 | | assert(adim[0] == dw->info.dim[1]); |
120 | | if (h) |
121 | | { |
122 | | assert(h->info.dim[2] == 0); // It is a 2-d array. |
123 | | const int h_nd = ccv_nnc_tensor_nd(h->info.dim); |
124 | | assert(h_nd == 1 || h_nd == 2); |
125 | | const int* hdim = (h_nd == 1) ? h->info.dim : h->info.dim + 1; |
126 | | assert(hdim[0] == adim[0]); |
127 | | } |
128 | | if (w) |
129 | | { |
130 | | assert(w->info.dim[2] == 0); // It is a 2-d array. |
131 | | assert(w->info.dim[0] == dw->info.dim[0]); |
132 | | assert(w->info.dim[1] == dw->info.dim[1]); |
133 | | } |
134 | | return _ccv_nnc_gemm_back_cpu_opt(g, a, w, dw, bias, h, flags); |
135 | | #endif |
136 | 5.80k | } |
137 | | |
138 | | REGISTER_COMMAND_BACKEND(CCV_NNC_GEMM_FORWARD, CCV_NNC_BACKEND_CPU_OPT)(ccv_nnc_cmd_backend_registry_t* const registry) |
139 | 1 | { |
140 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; |
141 | 1 | registry->tensor_datatypes = CCV_32F; |
142 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
143 | 1 | registry->algorithms = CCV_NNC_CMD_OPT_GEMM_ALGO_COUNT; |
144 | 1 | registry->exec = _ccv_nnc_gemm_forw; |
145 | 1 | } |
146 | | |
147 | | REGISTER_COMMAND_BACKEND(CCV_NNC_GEMM_BACKWARD, CCV_NNC_BACKEND_CPU_OPT)(ccv_nnc_cmd_backend_registry_t* const registry) |
148 | 1 | { |
149 | 1 | registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW; |
150 | 1 | registry->tensor_datatypes = CCV_32F; |
151 | 1 | registry->tensor_memory = CCV_TENSOR_CPU_MEMORY; |
152 | 1 | registry->algorithms = CCV_NNC_CMD_OPT_GEMM_ALGO_COUNT; |
153 | 1 | registry->exec = _ccv_nnc_gemm_back; |
154 | 1 | } |