Bug Summary

File:nnc/cmd/scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c
Warning:line 258, column 28
Array access (from variable 'amp2') results in a null pointer dereference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_nnc_scaled_dot_product_attention_cpu_ref.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/cmd -resource-dir /usr/local/lib/clang/19 -I ../../ -I .. -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2026-05-27-144330-3062811-1 -x c scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c
1#include "ccv.h"
2#include "ccv_internal.h"
3#include "nnc/ccv_nnc.h"
4#include "nnc/ccv_nnc_easy.h"
5#include "nnc/ccv_nnc_internal.h"
6#ifdef USE_OPENMP
7#include <omp.h>
8#endif
9#ifdef USE_DISPATCH
10#include <dispatch/dispatch.h>
11#endif
12
13// Shared methods.
14#include "../_ccv_nnc_cpu_ref.h"
15
16static int _ccv_nnc_scaled_dot_product_attention_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
17{
18 assert(input_size >= 3)((void) sizeof ((input_size >= 3) ? 1 : 0), __extension__ (
{ if (input_size >= 3) ; else __assert_fail ("input_size >= 3"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 18, __extension__ __PRETTY_FUNCTION__); }))
;
1
Assuming 'input_size' is >= 3
2
Taking true branch
19 assert(output_size >= 1)((void) sizeof ((output_size >= 1) ? 1 : 0), __extension__
({ if (output_size >= 1) ; else __assert_fail ("output_size >= 1"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 19, __extension__ __PRETTY_FUNCTION__); }))
;
3
Assuming 'output_size' is >= 1
4
Taking true branch
20 const int is_varlen = cmd.info.scaled_dot_product_attention.is_varlen;
21 ccv_nnc_tensor_view_t* const q = (ccv_nnc_tensor_view_t*)inputs[0];
22 ccv_nnc_tensor_view_t* const k = (ccv_nnc_tensor_view_t*)inputs[1];
23 ccv_nnc_tensor_view_t* const v = (ccv_nnc_tensor_view_t*)inputs[2];
24 ccv_nnc_tensor_view_t* const attn_mask = input_size > 3 ? (ccv_nnc_tensor_view_t*)inputs[3] : 0;
5
Assuming 'input_size' is > 3
6
'?' condition is true
25 ccv_nnc_tensor_view_t* const w = input_size > 4 ? (ccv_nnc_tensor_view_t*)inputs[4] : 0;
7
Assuming 'input_size' is > 4
8
'?' condition is true
26 ccv_nnc_tensor_view_t* const bias = input_size > 5 ? (ccv_nnc_tensor_view_t*)inputs[5] : 0;
9
Assuming 'input_size' is > 5
10
'?' condition is true
27 ccv_nnc_tensor_view_t* const q_seq_offsets = is_varlen && input_size > 6 ? (ccv_nnc_tensor_view_t*)inputs[6] : 0;
11
Assuming 'is_varlen' is 0
28 ccv_nnc_tensor_view_t* const kv_seq_offsets = is_varlen
11.1
'is_varlen' is 0
&& input_size > 7 ? (ccv_nnc_tensor_view_t*)inputs[7] : 0;
29 const int attention_sinks = cmd.info.scaled_dot_product_attention.attention_sinks;
30 ccv_nnc_tensor_view_t* const sinks = attention_sinks && input_size > 8 ? (ccv_nnc_tensor_view_t*)inputs[8] : 0;
12
Assuming 'attention_sinks' is not equal to 0
13
Assuming 'input_size' is > 8
14
'?' condition is true
31 if (bias) // bias always requires a weight matrix.
15
Assuming 'bias' is null
32 { assert(w)((void) sizeof ((w) ? 1 : 0), __extension__ ({ if (w) ; else __assert_fail
("w", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 32, __extension__ __PRETTY_FUNCTION__); }))
; }
33 if (is_varlen
15.1
'is_varlen' is 0
&& (attn_mask || w || bias || !q_seq_offsets || !kv_seq_offsets))
34 return CCV_NNC_EXEC_INVALID;
35 if (attention_sinks
15.2
'attention_sinks' is not equal to 0
&& !sinks)
16
Assuming 'sinks' is non-null
36 return CCV_NNC_EXEC_INVALID;
37 ccv_nnc_tensor_view_t* const c = (w) ? (ccv_nnc_tensor_view_t*)outputs[2] : (ccv_nnc_tensor_view_t*)outputs[0];
17
Taking false branch
18
Assuming 'w' is non-null
19
'?' condition is true
38 const int q_nd = ccv_nnc_tensor_nd(q->info.dim);
39 assert(q_nd == 3 || q_nd == 4)((void) sizeof ((q_nd == 3 || q_nd == 4) ? 1 : 0), __extension__
({ if (q_nd == 3 || q_nd == 4) ; else __assert_fail ("q_nd == 3 || q_nd == 4"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 39, __extension__ __PRETTY_FUNCTION__); }))
;
20
Assuming 'q_nd' is not equal to 3
21
Assuming 'q_nd' is equal to 4
22
Taking true branch
40 const int k_nd = ccv_nnc_tensor_nd(k->info.dim);
41 assert(k_nd == 3 || k_nd == 4)((void) sizeof ((k_nd == 3 || k_nd == 4) ? 1 : 0), __extension__
({ if (k_nd == 3 || k_nd == 4) ; else __assert_fail ("k_nd == 3 || k_nd == 4"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 41, __extension__ __PRETTY_FUNCTION__); }))
;
23
Assuming 'k_nd' is not equal to 3
24
Assuming 'k_nd' is equal to 4
25
Taking true branch
42 const int v_nd = ccv_nnc_tensor_nd(v->info.dim);
43 assert(v_nd == 3 || v_nd == 4)((void) sizeof ((v_nd == 3 || v_nd == 4) ? 1 : 0), __extension__
({ if (v_nd == 3 || v_nd == 4) ; else __assert_fail ("v_nd == 3 || v_nd == 4"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 43, __extension__ __PRETTY_FUNCTION__); }))
;
26
Assuming 'v_nd' is not equal to 3
27
Assuming 'v_nd' is equal to 4
28
Taking true branch
44 const int c_nd = ccv_nnc_tensor_nd(c->info.dim);
45 assert(c_nd == 3 || c_nd == 4)((void) sizeof ((c_nd == 3 || c_nd == 4) ? 1 : 0), __extension__
({ if (c_nd == 3 || c_nd == 4) ; else __assert_fail ("c_nd == 3 || c_nd == 4"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 45, __extension__ __PRETTY_FUNCTION__); }))
;
29
Assuming 'c_nd' is not equal to 3
30
Assuming 'c_nd' is equal to 4
31
Taking true branch
46 assert(q_nd == k_nd && k_nd == v_nd && v_nd == c_nd)((void) sizeof ((q_nd == k_nd && k_nd == v_nd &&
v_nd == c_nd) ? 1 : 0), __extension__ ({ if (q_nd == k_nd &&
k_nd == v_nd && v_nd == c_nd) ; else __assert_fail (
"q_nd == k_nd && k_nd == v_nd && v_nd == c_nd"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 46, __extension__ __PRETTY_FUNCTION__); }))
;
32
Taking true branch
47 if (is_varlen
32.1
'is_varlen' is 0
&& q_nd != 4)
48 return CCV_NNC_EXEC_INVALID;
49 // Assuming this is float 32.
50 int qdim[CCV_NNC_MAX_DIM_ALLOC(12)];
51 int kdim[CCV_NNC_MAX_DIM_ALLOC(12)];
52 int vdim[CCV_NNC_MAX_DIM_ALLOC(12)];
53 int cdim[CCV_NNC_MAX_DIM_ALLOC(12)];
54 int amdim[CCV_NNC_MAX_DIM_ALLOC(12)];
55 ccv_nnc_tensor_view_get_dim(q, qdim);
56 ccv_nnc_tensor_view_get_dim(k, kdim);
57 ccv_nnc_tensor_view_get_dim(v, vdim);
58 ccv_nnc_tensor_view_get_dim(c, cdim);
59 if (is_varlen
32.2
'is_varlen' is 0
)
33
Taking false branch
60 {
61 assert(q_seq_offsets->info.datatype == CCV_32S)((void) sizeof ((q_seq_offsets->info.datatype == CCV_32S) ?
1 : 0), __extension__ ({ if (q_seq_offsets->info.datatype
== CCV_32S) ; else __assert_fail ("q_seq_offsets->info.datatype == CCV_32S"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 61, __extension__ __PRETTY_FUNCTION__); }))
;
62 assert(kv_seq_offsets->info.datatype == CCV_32S)((void) sizeof ((kv_seq_offsets->info.datatype == CCV_32S)
? 1 : 0), __extension__ ({ if (kv_seq_offsets->info.datatype
== CCV_32S) ; else __assert_fail ("kv_seq_offsets->info.datatype == CCV_32S"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 62, __extension__ __PRETTY_FUNCTION__); }))
;
63 assert(CCV_IS_TENSOR_CONTIGUOUS(q_seq_offsets))((void) sizeof (((!((*(int*)(q_seq_offsets)) & CCV_TENSOR_VIEW
) || (((ccv_nnc_tensor_view_t*)q_seq_offsets)->contiguous ==
1))) ? 1 : 0), __extension__ ({ if ((!((*(int*)(q_seq_offsets
)) & CCV_TENSOR_VIEW) || (((ccv_nnc_tensor_view_t*)q_seq_offsets
)->contiguous == 1))) ; else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(q_seq_offsets)"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 63, __extension__ __PRETTY_FUNCTION__); }))
;
64 assert(CCV_IS_TENSOR_CONTIGUOUS(kv_seq_offsets))((void) sizeof (((!((*(int*)(kv_seq_offsets)) & CCV_TENSOR_VIEW
) || (((ccv_nnc_tensor_view_t*)kv_seq_offsets)->contiguous
== 1))) ? 1 : 0), __extension__ ({ if ((!((*(int*)(kv_seq_offsets
)) & CCV_TENSOR_VIEW) || (((ccv_nnc_tensor_view_t*)kv_seq_offsets
)->contiguous == 1))) ; else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(kv_seq_offsets)"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 64, __extension__ __PRETTY_FUNCTION__); }))
;
65 }
66 if (q_nd
33.1
'q_nd' is not equal to 3
== 3)
34
Taking false branch
67 {
68 qdim[0] = qdim[1], qdim[1] = qdim[2], qdim[2] = 1;
69 kdim[0] = kdim[1], kdim[1] = kdim[2], kdim[2] = 1;
70 vdim[0] = vdim[1], vdim[1] = vdim[2], vdim[2] = 1;
71 cdim[0] = cdim[1], cdim[1] = cdim[2], cdim[2] = 1;
72 }
73 assert(qdim[0] == kdim[0] && kdim[0] == vdim[0] && vdim[0] == cdim[0])((void) sizeof ((qdim[0] == kdim[0] && kdim[0] == vdim
[0] && vdim[0] == cdim[0]) ? 1 : 0), __extension__ ({
if (qdim[0] == kdim[0] && kdim[0] == vdim[0] &&
vdim[0] == cdim[0]) ; else __assert_fail ("qdim[0] == kdim[0] && kdim[0] == vdim[0] && vdim[0] == cdim[0]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 73, __extension__ __PRETTY_FUNCTION__); }))
;
35
Assuming the condition is true
36
Assuming the condition is true
37
Assuming the condition is true
38
Taking true branch
74 assert(qdim[2] == cdim[2])((void) sizeof ((qdim[2] == cdim[2]) ? 1 : 0), __extension__ (
{ if (qdim[2] == cdim[2]) ; else __assert_fail ("qdim[2] == cdim[2]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 74, __extension__ __PRETTY_FUNCTION__); }))
;
39
Assuming the condition is true
40
Taking true branch
75 assert(kdim[2] == vdim[2])((void) sizeof ((kdim[2] == vdim[2]) ? 1 : 0), __extension__ (
{ if (kdim[2] == vdim[2]) ; else __assert_fail ("kdim[2] == vdim[2]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 75, __extension__ __PRETTY_FUNCTION__); }))
;
41
Assuming the condition is true
42
Taking true branch
76 assert(qdim[2] % kdim[2] == 0)((void) sizeof ((qdim[2] % kdim[2] == 0) ? 1 : 0), __extension__
({ if (qdim[2] % kdim[2] == 0) ; else __assert_fail ("qdim[2] % kdim[2] == 0"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 76, __extension__ __PRETTY_FUNCTION__); }))
;
43
Assuming the condition is true
44
Taking true branch
77 assert(qdim[2] >= kdim[2])((void) sizeof ((qdim[2] >= kdim[2]) ? 1 : 0), __extension__
({ if (qdim[2] >= kdim[2]) ; else __assert_fail ("qdim[2] >= kdim[2]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 77, __extension__ __PRETTY_FUNCTION__); }))
;
45
Assuming the condition is true
46
Taking true branch
78 assert(qdim[3] == kdim[3])((void) sizeof ((qdim[3] == kdim[3]) ? 1 : 0), __extension__ (
{ if (qdim[3] == kdim[3]) ; else __assert_fail ("qdim[3] == kdim[3]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 78, __extension__ __PRETTY_FUNCTION__); }))
;
47
Assuming the condition is true
48
Taking true branch
79 assert(kdim[1] == vdim[1])((void) sizeof ((kdim[1] == vdim[1]) ? 1 : 0), __extension__ (
{ if (kdim[1] == vdim[1]) ; else __assert_fail ("kdim[1] == vdim[1]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 79, __extension__ __PRETTY_FUNCTION__); }))
;
49
Assuming the condition is true
50
Taking true branch
80 assert(cdim[1] == qdim[1])((void) sizeof ((cdim[1] == qdim[1]) ? 1 : 0), __extension__ (
{ if (cdim[1] == qdim[1]) ; else __assert_fail ("cdim[1] == qdim[1]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 80, __extension__ __PRETTY_FUNCTION__); }))
;
51
Assuming the condition is true
52
Taking true branch
81 assert(cdim[3] == vdim[3])((void) sizeof ((cdim[3] == vdim[3]) ? 1 : 0), __extension__ (
{ if (cdim[3] == vdim[3]) ; else __assert_fail ("cdim[3] == vdim[3]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 81, __extension__ __PRETTY_FUNCTION__); }))
;
53
Assuming the condition is true
54
Taking true branch
82 const float* const sinkp = sinks
54.1
'sinks' is non-null
? sinks->data.f32 : 0;
55
'?' condition is true
83 int sink_head_stride = 0;
84 if (attention_sinks
55.1
'attention_sinks' is not equal to 0
)
56
Taking true branch
85 {
86 assert(CCV_IS_TENSOR_CONTIGUOUS(sinks))((void) sizeof (((!((*(int*)(sinks)) & CCV_TENSOR_VIEW) ||
(((ccv_nnc_tensor_view_t*)sinks)->contiguous == 1))) ? 1 :
0), __extension__ ({ if ((!((*(int*)(sinks)) & CCV_TENSOR_VIEW
) || (((ccv_nnc_tensor_view_t*)sinks)->contiguous == 1))) ;
else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(sinks)", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 86, __extension__ __PRETTY_FUNCTION__); }))
;
57
Assuming the condition is true
87 const int sink_count = ccv_nnc_tensor_count(sinks->info);
88 assert(sink_count == 1 || sink_count == qdim[2])((void) sizeof ((sink_count == 1 || sink_count == qdim[2]) ? 1
: 0), __extension__ ({ if (sink_count == 1 || sink_count == qdim
[2]) ; else __assert_fail ("sink_count == 1 || sink_count == qdim[2]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 88, __extension__ __PRETTY_FUNCTION__); }))
;
58
Assuming 'sink_count' is equal to 1
89 sink_head_stride = (sink_count
58.1
'sink_count' is equal to 1
== 1) ? 0 : 1;
59
'?' condition is true
90 }
91 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 91, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
60
Taking true branch
92 int qstride[CCV_NNC_MAX_DIM_ALLOC(12)];
93 int kstride[CCV_NNC_MAX_DIM_ALLOC(12)];
94 int vstride[CCV_NNC_MAX_DIM_ALLOC(12)];
95 int cstride[CCV_NNC_MAX_DIM_ALLOC(12)];
96 int amstride[CCV_NNC_MAX_DIM_ALLOC(12)];
97 ccv_nnc_tensor_view_get_stride(q, qstride);
98 ccv_nnc_tensor_view_get_stride(k, kstride);
99 ccv_nnc_tensor_view_get_stride(v, vstride);
100 ccv_nnc_tensor_view_get_stride(c, cstride);
101 if (q_nd
60.1
'q_nd' is not equal to 3
== 3)
61
Taking false branch
102 {
103 qstride[0] = qstride[1], qstride[1] = qstride[2], qstride[2] = qstride[3];
104 kstride[0] = kstride[1], kstride[1] = kstride[2], kstride[2] = kstride[3];
105 vstride[0] = vstride[1], vstride[1] = vstride[2], vstride[2] = vstride[3];
106 cstride[0] = cstride[1], cstride[1] = cstride[2], cstride[2] = cstride[3];
107 }
108 if (attn_mask)
62
Assuming 'attn_mask' is non-null
63
Taking true branch
109 {
110 ccv_nnc_tensor_view_get_dim(attn_mask, amdim);
111 ccv_nnc_tensor_view_get_stride(attn_mask, amstride);
112 assert(amdim[0] == qdim[0] || amdim[0] == 1)((void) sizeof ((amdim[0] == qdim[0] || amdim[0] == 1) ? 1 : 0
), __extension__ ({ if (amdim[0] == qdim[0] || amdim[0] == 1)
; else __assert_fail ("amdim[0] == qdim[0] || amdim[0] == 1"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 112, __extension__ __PRETTY_FUNCTION__); }))
;
64
Assuming the condition is true
113 assert(amdim[1] == qdim[2] || amdim[1] == 1)((void) sizeof ((amdim[1] == qdim[2] || amdim[1] == 1) ? 1 : 0
), __extension__ ({ if (amdim[1] == qdim[2] || amdim[1] == 1)
; else __assert_fail ("amdim[1] == qdim[2] || amdim[1] == 1"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 113, __extension__ __PRETTY_FUNCTION__); }))
;
65
Assuming the condition is true
114 assert(amdim[2] == qdim[1])((void) sizeof ((amdim[2] == qdim[1]) ? 1 : 0), __extension__
({ if (amdim[2] == qdim[1]) ; else __assert_fail ("amdim[2] == qdim[1]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 114, __extension__ __PRETTY_FUNCTION__); }))
;
66
Assuming the condition is true
67
Taking true branch
115 assert(amdim[3] == kdim[1])((void) sizeof ((amdim[3] == kdim[1]) ? 1 : 0), __extension__
({ if (amdim[3] == kdim[1]) ; else __assert_fail ("amdim[3] == kdim[1]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 115, __extension__ __PRETTY_FUNCTION__); }))
;
68
Assuming the condition is true
69
Taking true branch
116 }
117 int i[CCV_NNC_MAX_DIM(2) + 2];
118 float* qk = ccv_nnc_stream_context_get_workspace(stream_context, sizeof(float) * qdim[1] * kdim[1], CCV_TENSOR_CPU_MEMORY);
119 const float* const qp = q->data.f32;
120 const float* const kp = k->data.f32;
121 const float* const vp = v->data.f32;
122 const float* const amp = attn_mask
69.1
'attn_mask' is non-null
? attn_mask->data.f32 : 0;
70
'?' condition is true
123 float* const cp = c->data.f32;
124 const float scale = cmd.info.scaled_dot_product_attention.scale;
125 const int is_causal = cmd.info.scaled_dot_product_attention.is_causal;
126 const int h_h_k_ratio = qdim[2] / kdim[2];
127 assert(kdim[2] == vdim[2])((void) sizeof ((kdim[2] == vdim[2]) ? 1 : 0), __extension__ (
{ if (kdim[2] == vdim[2]) ; else __assert_fail ("kdim[2] == vdim[2]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 127, __extension__ __PRETTY_FUNCTION__); }))
;
71
Taking true branch
128 assert(qdim[2] >= kdim[2])((void) sizeof ((qdim[2] >= kdim[2]) ? 1 : 0), __extension__
({ if (qdim[2] >= kdim[2]) ; else __assert_fail ("qdim[2] >= kdim[2]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 128, __extension__ __PRETTY_FUNCTION__); }))
;
72
Taking true branch
129 assert(qdim[2] % kdim[2] == 0)((void) sizeof ((qdim[2] % kdim[2] == 0) ? 1 : 0), __extension__
({ if (qdim[2] % kdim[2] == 0) ; else __assert_fail ("qdim[2] % kdim[2] == 0"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 129, __extension__ __PRETTY_FUNCTION__); }))
;
73
Taking true branch
130 if (is_varlen
73.1
'is_varlen' is 0
)
74
Taking false branch
131 {
132 const int batch_size = ccv_nnc_tensor_count(q_seq_offsets->info) - 1;
133 assert(batch_size > 0)((void) sizeof ((batch_size > 0) ? 1 : 0), __extension__ (
{ if (batch_size > 0) ; else __assert_fail ("batch_size > 0"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 133, __extension__ __PRETTY_FUNCTION__); }))
;
134 assert(ccv_nnc_tensor_count(kv_seq_offsets->info) == batch_size + 1)((void) sizeof ((ccv_nnc_tensor_count(kv_seq_offsets->info
) == batch_size + 1) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count
(kv_seq_offsets->info) == batch_size + 1) ; else __assert_fail
("ccv_nnc_tensor_count(kv_seq_offsets->info) == batch_size + 1"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 134, __extension__ __PRETTY_FUNCTION__); }))
;
135 assert(qdim[0] == 1)((void) sizeof ((qdim[0] == 1) ? 1 : 0), __extension__ ({ if (
qdim[0] == 1) ; else __assert_fail ("qdim[0] == 1", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 135, __extension__ __PRETTY_FUNCTION__); }))
;
136 assert(kdim[0] == 1)((void) sizeof ((kdim[0] == 1) ? 1 : 0), __extension__ ({ if (
kdim[0] == 1) ; else __assert_fail ("kdim[0] == 1", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 136, __extension__ __PRETTY_FUNCTION__); }))
;
137 assert(vdim[0] == 1)((void) sizeof ((vdim[0] == 1) ? 1 : 0), __extension__ ({ if (
vdim[0] == 1) ; else __assert_fail ("vdim[0] == 1", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 137, __extension__ __PRETTY_FUNCTION__); }))
;
138 assert(cdim[0] == 1)((void) sizeof ((cdim[0] == 1) ? 1 : 0), __extension__ ({ if (
cdim[0] == 1) ; else __assert_fail ("cdim[0] == 1", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 138, __extension__ __PRETTY_FUNCTION__); }))
;
139 assert(cdim[1] == qdim[1])((void) sizeof ((cdim[1] == qdim[1]) ? 1 : 0), __extension__ (
{ if (cdim[1] == qdim[1]) ; else __assert_fail ("cdim[1] == qdim[1]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 139, __extension__ __PRETTY_FUNCTION__); }))
;
140 assert(cdim[2] == qdim[2])((void) sizeof ((cdim[2] == qdim[2]) ? 1 : 0), __extension__ (
{ if (cdim[2] == qdim[2]) ; else __assert_fail ("cdim[2] == qdim[2]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 140, __extension__ __PRETTY_FUNCTION__); }))
;
141 assert(cdim[3] == vdim[3])((void) sizeof ((cdim[3] == vdim[3]) ? 1 : 0), __extension__ (
{ if (cdim[3] == vdim[3]) ; else __assert_fail ("cdim[3] == vdim[3]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 141, __extension__ __PRETTY_FUNCTION__); }))
;
142 assert(kdim[1] == vdim[1])((void) sizeof ((kdim[1] == vdim[1]) ? 1 : 0), __extension__ (
{ if (kdim[1] == vdim[1]) ; else __assert_fail ("kdim[1] == vdim[1]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 142, __extension__ __PRETTY_FUNCTION__); }))
;
143 const int* const q_offset = q_seq_offsets->data.i32;
144 const int* const kv_offset = kv_seq_offsets->data.i32;
145 assert(q_offset[0] == 0)((void) sizeof ((q_offset[0] == 0) ? 1 : 0), __extension__ ({
if (q_offset[0] == 0) ; else __assert_fail ("q_offset[0] == 0"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 145, __extension__ __PRETTY_FUNCTION__); }))
;
146 assert(kv_offset[0] == 0)((void) sizeof ((kv_offset[0] == 0) ? 1 : 0), __extension__ (
{ if (kv_offset[0] == 0) ; else __assert_fail ("kv_offset[0] == 0"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 146, __extension__ __PRETTY_FUNCTION__); }))
;
147 assert(q_offset[batch_size] == qdim[1])((void) sizeof ((q_offset[batch_size] == qdim[1]) ? 1 : 0), __extension__
({ if (q_offset[batch_size] == qdim[1]) ; else __assert_fail
("q_offset[batch_size] == qdim[1]", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 147, __extension__ __PRETTY_FUNCTION__); }))
;
148 assert(kv_offset[batch_size] == kdim[1])((void) sizeof ((kv_offset[batch_size] == kdim[1]) ? 1 : 0), __extension__
({ if (kv_offset[batch_size] == kdim[1]) ; else __assert_fail
("kv_offset[batch_size] == kdim[1]", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 148, __extension__ __PRETTY_FUNCTION__); }))
;
149 for (i[0] = 0; i[0] < batch_size; i[0]++)
150 {
151 const int q_start = q_offset[i[0]];
152 const int q_end = q_offset[i[0] + 1];
153 const int k_start = kv_offset[i[0]];
154 const int k_end = kv_offset[i[0] + 1];
155 assert(q_start <= q_end)((void) sizeof ((q_start <= q_end) ? 1 : 0), __extension__
({ if (q_start <= q_end) ; else __assert_fail ("q_start <= q_end"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 155, __extension__ __PRETTY_FUNCTION__); }))
;
156 assert(k_start <= k_end)((void) sizeof ((k_start <= k_end) ? 1 : 0), __extension__
({ if (k_start <= k_end) ; else __assert_fail ("k_start <= k_end"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 156, __extension__ __PRETTY_FUNCTION__); }))
;
157 const int R = q_end - q_start;
158 const int K = k_end - k_start;
159 assert(R > 0)((void) sizeof ((R > 0) ? 1 : 0), __extension__ ({ if (R >
0) ; else __assert_fail ("R > 0", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 159, __extension__ __PRETTY_FUNCTION__); }))
;
160 assert(K > 0)((void) sizeof ((K > 0) ? 1 : 0), __extension__ ({ if (K >
0) ; else __assert_fail ("K > 0", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 160, __extension__ __PRETTY_FUNCTION__); }))
;
161 assert(R <= cmd.info.scaled_dot_product_attention.max_seqlen_q)((void) sizeof ((R <= cmd.info.scaled_dot_product_attention
.max_seqlen_q) ? 1 : 0), __extension__ ({ if (R <= cmd.info
.scaled_dot_product_attention.max_seqlen_q) ; else __assert_fail
("R <= cmd.info.scaled_dot_product_attention.max_seqlen_q"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 161, __extension__ __PRETTY_FUNCTION__); }))
;
162 assert(K <= cmd.info.scaled_dot_product_attention.max_seqlen_kv)((void) sizeof ((K <= cmd.info.scaled_dot_product_attention
.max_seqlen_kv) ? 1 : 0), __extension__ ({ if (K <= cmd.info
.scaled_dot_product_attention.max_seqlen_kv) ; else __assert_fail
("K <= cmd.info.scaled_dot_product_attention.max_seqlen_kv"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 162, __extension__ __PRETTY_FUNCTION__); }))
;
163 const float* const qp0 = qp + q_start * qstride[1];
164 const float* const kp0 = kp + k_start * kstride[1];
165 const float* const vp0 = vp + k_start * vstride[1];
166 float* const cp0 = cp + q_start * cstride[1];
167 for (i[1] = 0; i[1] < qdim[2]; i[1]++)
168 {
169 const float* const qp1 = qp0 + i[1] * qstride[2];
170 const float* const kp1 = kp0 + (i[1] / h_h_k_ratio) * kstride[2];
171 const float* const vp1 = vp0 + (i[1] / h_h_k_ratio) * vstride[2];
172 float* const cp1 = cp0 + i[1] * cstride[2];
173 const double sink = attention_sinks ? (double)sinkp[i[1] * sink_head_stride] : 0;
174 parallel_for(x, R){ int x; for ((x) = 0; (x) < (R); (x)++) { {
175 int y, k;
176 const float* const qp2 = qp1 + x * qstride[1];
177 float* const cp2 = cp1 + x * cstride[1];
178 float* const qk0 = qk + x * K;
179 for (y = 0; y < K; y++)
180 {
181 const float* const kp2 = kp1 + y * kstride[1];
182 float v = 0;
183 for (k = 0; k < qdim[3]; k++)
184 v += qp2[k * qstride[3]] * kp2[k * kstride[3]];
185 qk0[y] = scale * v;
186 }
187 if (is_causal)
188 {
189 const int x_end = ccv_max(x - R + K + 1, 0)({ typeof (x - R + K + 1) _a = (x - R + K + 1); typeof (0) _b
= (0); (_a > _b) ? _a : _b; })
;
190 for (y = x_end; y < K; y++)
191 qk0[y] = 0;
192 double maxval = attention_sinks ? sink : qk0[0];
193 for (y = attention_sinks ? 0 : 1; y < x_end; y++)
194 if (qk0[y] > maxval)
195 maxval = qk0[y];
196 double sumval = attention_sinks ? expf(sink - maxval) : 0;
197 for (y = 0; y < x_end; y++)
198 sumval += (qk0[y] = expf(qk0[y] - maxval));
199 sumval = 1.0 / sumval;
200 for (y = 0; y < x_end; y++)
201 qk0[y] *= sumval;
202 } else {
203 double maxval = attention_sinks ? sink : qk0[0];
204 for (y = attention_sinks ? 0 : 1; y < K; y++)
205 if (qk0[y] > maxval)
206 maxval = qk0[y];
207 double sumval = attention_sinks ? expf(sink - maxval) : 0;
208 for (y = 0; y < K; y++)
209 sumval += (qk0[y] = expf(qk0[y] - maxval));
210 sumval = 1.0 / sumval;
211 for (y = 0; y < K; y++)
212 qk0[y] *= sumval;
213 }
214 for (k = 0; k < vdim[3]; k++)
215 cp2[k * cstride[3]] = 0;
216 for (y = 0; y < K; y++)
217 {
218 const float* const vp2 = vp1 + y * vstride[1];
219 const float v = qk0[y];
220 for (k = 0; k < vdim[3]; k++)
221 cp2[k * cstride[3]] += v * vp2[k * vstride[3]];
222 }
223 } parallel_endfor} }
224 }
225 }
226 return CCV_NNC_EXEC_SUCCESS;
227 }
228 for (i[0] = 0; i[0] < qdim[0]; i[0]++)
75
Assuming the condition is true
76
Loop condition is true. Entering loop body
229 {
230 const float* const qp0 = qp + i[0] * qstride[0];
231 const float* const kp0 = kp + i[0] * kstride[0];
232 const float* const vp0 = vp + i[0] * vstride[0];
233 const float* const amp0 = amp && amdim[0] > 1 ? amp + i[0] * amstride[0] : amp;
77
Assuming 'amp' is null
234 float* const cp0 = cp + i[0] * cstride[0];
235 for (i[1] = 0; i[1] < qdim[2]; i[1]++)
78
Assuming the condition is true
79
Loop condition is true. Entering loop body
236 {
237 const float* const qp1 = qp0 + i[1] * qstride[2];
238 const float* const kp1 = kp0 + (i[1] / h_h_k_ratio) * kstride[2];
239 const float* const vp1 = vp0 + (i[1] / h_h_k_ratio) * vstride[2];
240 const float* const amp1 = amp
79.1
'amp' is null
&& amdim[1] > 1 ? amp0 + i[1] * amstride[1] : amp0;
241 float* const cp1 = cp0 + i[1] * cstride[2];
242 const double sink = attention_sinks
79.2
'attention_sinks' is not equal to 0
? (double)sinkp[i[1] * sink_head_stride] : 0;
80
'?' condition is true
243 // Compute Q @ K^T
244 parallel_for(x, qdim[1]){ int x; for ((x) = 0; (x) < (qdim[1]); (x)++) { {
81
Assuming the condition is true
82
Loop condition is true. Entering loop body
245 int y, k;
246 const float* const qp2 = qp1 + x * qstride[1];
247 float* const cp2 = cp1 + x * cstride[1];
248 float* const qk0 = qk + x * kdim[1];
249 const float* const amp2 = amp1
82.1
'amp1' is null
? amp1 + x * amstride[2] : 0;
83
'?' condition is false
84
'amp2' initialized to a null pointer value
250 if (attn_mask
84.1
'attn_mask' is non-null
)
85
Taking true branch
251 {
252 for (y = 0; y < kdim[1]; y++)
86
Assuming the condition is true
87
Loop condition is true. Entering loop body
253 {
254 const float* const kp2 = kp1 + y * kstride[1];
255 float v = 0;
256 for (k = 0; k < qdim[3]; k++)
88
Assuming the condition is false
89
Loop condition is false. Execution continues on line 258
257 v += qp2[k * qstride[3]] * kp2[k * kstride[3]];
258 qk0[y] = scale * v + amp2[y * amstride[3]];
90
Array access (from variable 'amp2') results in a null pointer dereference
259 }
260 } else {
261 for (y = 0; y < kdim[1]; y++)
262 {
263 const float* const kp2 = kp1 + y * kstride[1];
264 float v = 0;
265 for (k = 0; k < qdim[3]; k++)
266 v += qp2[k * qstride[3]] * kp2[k * kstride[3]];
267 qk0[y] = scale * v;
268 }
269 }
270 // Compute softmax on qk.
271 if (is_causal)
272 {
273 const int x_end = ccv_max(x - qdim[1] + kdim[1] + 1, 0)({ typeof (x - qdim[1] + kdim[1] + 1) _a = (x - qdim[1] + kdim
[1] + 1); typeof (0) _b = (0); (_a > _b) ? _a : _b; })
;
274 for (y = x_end; y < kdim[1]; y++)
275 qk0[y] = 0;
276 double maxval = attention_sinks ? sink : qk0[0];
277 for (y = attention_sinks ? 0 : 1; y < x_end; y++)
278 if (qk0[y] > maxval)
279 maxval = qk0[y];
280 double sumval = attention_sinks ? expf(sink - maxval) : 0;
281 for (y = 0; y < x_end; y++)
282 sumval += (qk0[y] = expf(qk0[y] - maxval));
283 sumval = 1.0 / sumval;
284 for (y = 0; y < x_end; y++)
285 qk0[y] *= sumval;
286 } else {
287 double maxval = attention_sinks ? sink : qk0[0];
288 for (y = attention_sinks ? 0 : 1; y < kdim[1]; y++)
289 if (qk0[y] > maxval)
290 maxval = qk0[y];
291 double sumval = attention_sinks ? expf(sink - maxval) : 0;
292 for (y = 0; y < kdim[1]; y++)
293 sumval += (qk0[y] = expf(qk0[y] - maxval));
294 sumval = 1.0 / sumval;
295 for (y = 0; y < kdim[1]; y++)
296 qk0[y] *= sumval;
297 }
298 for (k = 0; k < vdim[3]; k++)
299 cp2[k * cstride[3]] = 0;
300 for (y = 0; y < kdim[1]; y++)
301 {
302 const float* const vp2 = vp1 + y * vstride[1];
303 const float v = qk0[y];
304 for (k = 0; k < vdim[3]; k++)
305 cp2[k * cstride[3]] += v * vp2[k * vstride[3]];
306 }
307 } parallel_endfor} }
308 }
309 }
310 if (w)
311 {
312 const int num_heads = cdim[2];
313 ccv_nnc_tensor_view_t* const d = (ccv_nnc_tensor_view_t*)outputs[0];
314 const int w_nd = ccv_nnc_tensor_nd(w->info.dim);
315 assert(w_nd == 2)((void) sizeof ((w_nd == 2) ? 1 : 0), __extension__ ({ if (w_nd
== 2) ; else __assert_fail ("w_nd == 2", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 315, __extension__ __PRETTY_FUNCTION__); }))
;
316 assert(CCV_IS_TENSOR_CONTIGUOUS(w))((void) sizeof (((!((*(int*)(w)) & CCV_TENSOR_VIEW) || ((
(ccv_nnc_tensor_view_t*)w)->contiguous == 1))) ? 1 : 0), __extension__
({ if ((!((*(int*)(w)) & CCV_TENSOR_VIEW) || (((ccv_nnc_tensor_view_t
*)w)->contiguous == 1))) ; else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(w)"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 316, __extension__ __PRETTY_FUNCTION__); }))
;
317 const int d_nd = ccv_nnc_tensor_nd(d->info.dim);
318 assert(d_nd == 3)((void) sizeof ((d_nd == 3) ? 1 : 0), __extension__ ({ if (d_nd
== 3) ; else __assert_fail ("d_nd == 3", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 318, __extension__ __PRETTY_FUNCTION__); }))
;
319 int ddim[CCV_NNC_MAX_DIM_ALLOC(12)];
320 int dstride[CCV_NNC_MAX_DIM_ALLOC(12)];
321 ccv_nnc_tensor_view_get_dim(d, ddim);
322 ccv_nnc_tensor_view_get_stride(d, dstride);
323 assert(ddim[2] == cdim[1])((void) sizeof ((ddim[2] == cdim[1]) ? 1 : 0), __extension__ (
{ if (ddim[2] == cdim[1]) ; else __assert_fail ("ddim[2] == cdim[1]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 323, __extension__ __PRETTY_FUNCTION__); }))
;
324 assert(ddim[3] == num_heads * cdim[3])((void) sizeof ((ddim[3] == num_heads * cdim[3]) ? 1 : 0), __extension__
({ if (ddim[3] == num_heads * cdim[3]) ; else __assert_fail (
"ddim[3] == num_heads * cdim[3]", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 324, __extension__ __PRETTY_FUNCTION__); }))
;
325 assert(w->info.dim[1] == ddim[3])((void) sizeof ((w->info.dim[1] == ddim[3]) ? 1 : 0), __extension__
({ if (w->info.dim[1] == ddim[3]) ; else __assert_fail ("w->info.dim[1] == ddim[3]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 325, __extension__ __PRETTY_FUNCTION__); }))
;
326 assert(w->info.dim[0] == ddim[3])((void) sizeof ((w->info.dim[0] == ddim[3]) ? 1 : 0), __extension__
({ if (w->info.dim[0] == ddim[3]) ; else __assert_fail ("w->info.dim[0] == ddim[3]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 326, __extension__ __PRETTY_FUNCTION__); }))
;
327 float* const dp = d->data.f32;
328 const float* const wp = w->data.f32;
329 const float* const cp = c->data.f32;
330 if (bias)
331 {
332 assert(ccv_nnc_tensor_count(bias->info) == ddim[3])((void) sizeof ((ccv_nnc_tensor_count(bias->info) == ddim[
3]) ? 1 : 0), __extension__ ({ if (ccv_nnc_tensor_count(bias->
info) == ddim[3]) ; else __assert_fail ("ccv_nnc_tensor_count(bias->info) == ddim[3]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 332, __extension__ __PRETTY_FUNCTION__); }))
;
333 assert(CCV_IS_TENSOR_CONTIGUOUS(bias))((void) sizeof (((!((*(int*)(bias)) & CCV_TENSOR_VIEW) ||
(((ccv_nnc_tensor_view_t*)bias)->contiguous == 1))) ? 1 :
0), __extension__ ({ if ((!((*(int*)(bias)) & CCV_TENSOR_VIEW
) || (((ccv_nnc_tensor_view_t*)bias)->contiguous == 1))) ;
else __assert_fail ("CCV_IS_TENSOR_CONTIGUOUS(bias)", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 333, __extension__ __PRETTY_FUNCTION__); }))
;
334 const float* const biasp = bias->data.f32;
335 for (i[0] = 0; i[0] < ddim[1]; i[0]++)
336 {
337 const float* const cp0 = cp + i[0] * cstride[0];
338 float* const dp0 = dp + i[0] * dstride[1];
339 parallel_for(y, ddim[2]){ int y; for ((y) = 0; (y) < (ddim[2]); (y)++) { {
340 int x, j, k;
341 const float* const cp1 = cp0 + y * cstride[1];
342 float* const dp1 = dp0 + y * dstride[2];
343 for (x = 0; x < ddim[3]; x++)
344 {
345 const float* const wp0 = wp + x * ddim[3];
346 float v = biasp[x];
347 for (j = 0; j < num_heads; j++)
348 {
349 const float* const cp2 = cp1 + j * cstride[2];
350 for (k = 0; k < cdim[3]; k++)
351 v += wp0[j * cdim[3] + k] * cp2[k * cstride[3]];
352 }
353 dp1[x * dstride[3]] = v;
354 }
355 } parallel_endfor} }
356 }
357 } else {
358 for (i[0] = 0; i[0] < ddim[1]; i[0]++)
359 {
360 const float* const cp0 = cp + i[0] * cstride[0];
361 float* const dp0 = dp + i[0] * dstride[1];
362 parallel_for(y, ddim[2]){ int y; for ((y) = 0; (y) < (ddim[2]); (y)++) { {
363 int x, j, k;
364 const float* const cp1 = cp0 + y * cstride[1];
365 float* const dp1 = dp0 + y * dstride[2];
366 for (x = 0; x < ddim[3]; x++)
367 {
368 const float* const wp0 = wp + x * ddim[3];
369 float v = 0;
370 for (j = 0; j < num_heads; j++)
371 {
372 const float* const cp2 = cp1 + j * cstride[2];
373 for (k = 0; k < cdim[3]; k++)
374 v += wp0[j * cdim[3] + k] * cp2[k * cstride[3]];
375 }
376 dp1[x * dstride[3]] = v;
377 }
378 } parallel_endfor} }
379 }
380 }
381 }
382 return CCV_NNC_EXEC_SUCCESS;
383}
384
385static int _ccv_nnc_scaled_dot_product_attention_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
386{
387 // Assuming no saved_softmax, we need to recompute from q, k, v.
388 // We cannot do this with masks (yet).
389 assert(input_size >= 6)((void) sizeof ((input_size >= 6) ? 1 : 0), __extension__ (
{ if (input_size >= 6) ; else __assert_fail ("input_size >= 6"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 389, __extension__ __PRETTY_FUNCTION__); }))
;
390 if (cmd.info.scaled_dot_product_attention.is_varlen)
391 return CCV_NNC_EXEC_INVALID;
392 if (cmd.info.scaled_dot_product_attention.attention_sinks)
393 return CCV_NNC_EXEC_INVALID;
394 ccv_nnc_tensor_view_t* const g = (ccv_nnc_tensor_view_t*)inputs[0];
395 ccv_nnc_tensor_view_t* const q = (ccv_nnc_tensor_view_t*)inputs[3];
396 ccv_nnc_tensor_view_t* const k = (ccv_nnc_tensor_view_t*)inputs[4];
397 ccv_nnc_tensor_view_t* const v = (ccv_nnc_tensor_view_t*)inputs[5];
398 ccv_nnc_tensor_view_t* const dq = (ccv_nnc_tensor_view_t*)outputs[0];
399 ccv_nnc_tensor_view_t* const dk = (ccv_nnc_tensor_view_t*)outputs[1];
400 ccv_nnc_tensor_view_t* const dv = (ccv_nnc_tensor_view_t*)outputs[2];
401 const int q_nd = ccv_nnc_tensor_nd(q->info.dim);
402 assert(q_nd == 3 || q_nd == 4)((void) sizeof ((q_nd == 3 || q_nd == 4) ? 1 : 0), __extension__
({ if (q_nd == 3 || q_nd == 4) ; else __assert_fail ("q_nd == 3 || q_nd == 4"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 402, __extension__ __PRETTY_FUNCTION__); }))
;
403 const int k_nd = ccv_nnc_tensor_nd(k->info.dim);
404 assert(k_nd == 3 || k_nd == 4)((void) sizeof ((k_nd == 3 || k_nd == 4) ? 1 : 0), __extension__
({ if (k_nd == 3 || k_nd == 4) ; else __assert_fail ("k_nd == 3 || k_nd == 4"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 404, __extension__ __PRETTY_FUNCTION__); }))
;
405 const int v_nd = ccv_nnc_tensor_nd(v->info.dim);
406 assert(v_nd == 3 || v_nd == 4)((void) sizeof ((v_nd == 3 || v_nd == 4) ? 1 : 0), __extension__
({ if (v_nd == 3 || v_nd == 4) ; else __assert_fail ("v_nd == 3 || v_nd == 4"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 406, __extension__ __PRETTY_FUNCTION__); }))
;
407 const int g_nd = ccv_nnc_tensor_nd(g->info.dim);
408 assert(g_nd == 3 || g_nd == 4)((void) sizeof ((g_nd == 3 || g_nd == 4) ? 1 : 0), __extension__
({ if (g_nd == 3 || g_nd == 4) ; else __assert_fail ("g_nd == 3 || g_nd == 4"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 408, __extension__ __PRETTY_FUNCTION__); }))
;
409 const int dq_nd = ccv_nnc_tensor_nd(dq->info.dim);
410 assert(dq_nd == 3 || dq_nd == 4)((void) sizeof ((dq_nd == 3 || dq_nd == 4) ? 1 : 0), __extension__
({ if (dq_nd == 3 || dq_nd == 4) ; else __assert_fail ("dq_nd == 3 || dq_nd == 4"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 410, __extension__ __PRETTY_FUNCTION__); }))
;
411 assert(dq_nd == q_nd)((void) sizeof ((dq_nd == q_nd) ? 1 : 0), __extension__ ({ if
(dq_nd == q_nd) ; else __assert_fail ("dq_nd == q_nd", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 411, __extension__ __PRETTY_FUNCTION__); }))
;
412 const int dk_nd = ccv_nnc_tensor_nd(dk->info.dim);
413 assert(dk_nd == 3 || dk_nd == 4)((void) sizeof ((dk_nd == 3 || dk_nd == 4) ? 1 : 0), __extension__
({ if (dk_nd == 3 || dk_nd == 4) ; else __assert_fail ("dk_nd == 3 || dk_nd == 4"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 413, __extension__ __PRETTY_FUNCTION__); }))
;
414 assert(dk_nd == k_nd)((void) sizeof ((dk_nd == k_nd) ? 1 : 0), __extension__ ({ if
(dk_nd == k_nd) ; else __assert_fail ("dk_nd == k_nd", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 414, __extension__ __PRETTY_FUNCTION__); }))
;
415 const int dv_nd = ccv_nnc_tensor_nd(dv->info.dim);
416 assert(dv_nd == 3 || dv_nd == 4)((void) sizeof ((dv_nd == 3 || dv_nd == 4) ? 1 : 0), __extension__
({ if (dv_nd == 3 || dv_nd == 4) ; else __assert_fail ("dv_nd == 3 || dv_nd == 4"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 416, __extension__ __PRETTY_FUNCTION__); }))
;
417 assert(dv_nd == v_nd)((void) sizeof ((dv_nd == v_nd) ? 1 : 0), __extension__ ({ if
(dv_nd == v_nd) ; else __assert_fail ("dv_nd == v_nd", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 417, __extension__ __PRETTY_FUNCTION__); }))
;
418 assert(q_nd == k_nd && k_nd == v_nd && v_nd == g_nd)((void) sizeof ((q_nd == k_nd && k_nd == v_nd &&
v_nd == g_nd) ? 1 : 0), __extension__ ({ if (q_nd == k_nd &&
k_nd == v_nd && v_nd == g_nd) ; else __assert_fail (
"q_nd == k_nd && k_nd == v_nd && v_nd == g_nd"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 418, __extension__ __PRETTY_FUNCTION__); }))
;
419 // Assuming this is float 32.
420 int qdim[CCV_NNC_MAX_DIM_ALLOC(12)];
421 int kdim[CCV_NNC_MAX_DIM_ALLOC(12)];
422 int vdim[CCV_NNC_MAX_DIM_ALLOC(12)];
423 int gdim[CCV_NNC_MAX_DIM_ALLOC(12)];
424 int dqdim[CCV_NNC_MAX_DIM_ALLOC(12)];
425 int dkdim[CCV_NNC_MAX_DIM_ALLOC(12)];
426 int dvdim[CCV_NNC_MAX_DIM_ALLOC(12)];
427 ccv_nnc_tensor_view_get_dim(q, qdim);
428 ccv_nnc_tensor_view_get_dim(k, kdim);
429 ccv_nnc_tensor_view_get_dim(v, vdim);
430 ccv_nnc_tensor_view_get_dim(g, gdim);
431 ccv_nnc_tensor_view_get_dim(dq, dqdim);
432 ccv_nnc_tensor_view_get_dim(dk, dkdim);
433 ccv_nnc_tensor_view_get_dim(dv, dvdim);
434 if (q_nd == 3)
435 {
436 qdim[0] = qdim[1], qdim[1] = qdim[2], qdim[2] = 1;
437 kdim[0] = kdim[1], kdim[1] = kdim[2], kdim[2] = 1;
438 vdim[0] = vdim[1], vdim[1] = vdim[2], vdim[2] = 1;
439 gdim[0] = gdim[1], gdim[1] = gdim[2], gdim[2] = 1;
440 dqdim[0] = dqdim[1], dqdim[1] = dqdim[2], dqdim[2] = 1;
441 dkdim[0] = dkdim[1], dkdim[1] = dkdim[2], dkdim[2] = 1;
442 dvdim[0] = dvdim[1], dvdim[1] = dvdim[2], dvdim[2] = 1;
443 }
444 assert(qdim[0] == kdim[0] && kdim[0] == vdim[0] && vdim[0] == gdim[0])((void) sizeof ((qdim[0] == kdim[0] && kdim[0] == vdim
[0] && vdim[0] == gdim[0]) ? 1 : 0), __extension__ ({
if (qdim[0] == kdim[0] && kdim[0] == vdim[0] &&
vdim[0] == gdim[0]) ; else __assert_fail ("qdim[0] == kdim[0] && kdim[0] == vdim[0] && vdim[0] == gdim[0]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 444, __extension__ __PRETTY_FUNCTION__); }))
;
445 assert(qdim[2] == gdim[2])((void) sizeof ((qdim[2] == gdim[2]) ? 1 : 0), __extension__ (
{ if (qdim[2] == gdim[2]) ; else __assert_fail ("qdim[2] == gdim[2]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 445, __extension__ __PRETTY_FUNCTION__); }))
;
446 assert(kdim[2] == vdim[2])((void) sizeof ((kdim[2] == vdim[2]) ? 1 : 0), __extension__ (
{ if (kdim[2] == vdim[2]) ; else __assert_fail ("kdim[2] == vdim[2]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 446, __extension__ __PRETTY_FUNCTION__); }))
;
447 assert(qdim[2] % kdim[2] == 0)((void) sizeof ((qdim[2] % kdim[2] == 0) ? 1 : 0), __extension__
({ if (qdim[2] % kdim[2] == 0) ; else __assert_fail ("qdim[2] % kdim[2] == 0"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 447, __extension__ __PRETTY_FUNCTION__); }))
;
448 assert(qdim[2] >= kdim[2])((void) sizeof ((qdim[2] >= kdim[2]) ? 1 : 0), __extension__
({ if (qdim[2] >= kdim[2]) ; else __assert_fail ("qdim[2] >= kdim[2]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 448, __extension__ __PRETTY_FUNCTION__); }))
;
449 assert(qdim[3] == kdim[3])((void) sizeof ((qdim[3] == kdim[3]) ? 1 : 0), __extension__ (
{ if (qdim[3] == kdim[3]) ; else __assert_fail ("qdim[3] == kdim[3]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 449, __extension__ __PRETTY_FUNCTION__); }))
;
450 assert(kdim[1] == vdim[1])((void) sizeof ((kdim[1] == vdim[1]) ? 1 : 0), __extension__ (
{ if (kdim[1] == vdim[1]) ; else __assert_fail ("kdim[1] == vdim[1]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 450, __extension__ __PRETTY_FUNCTION__); }))
;
451 assert(gdim[1] == qdim[1])((void) sizeof ((gdim[1] == qdim[1]) ? 1 : 0), __extension__ (
{ if (gdim[1] == qdim[1]) ; else __assert_fail ("gdim[1] == qdim[1]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 451, __extension__ __PRETTY_FUNCTION__); }))
;
452 assert(gdim[3] == vdim[3])((void) sizeof ((gdim[3] == vdim[3]) ? 1 : 0), __extension__ (
{ if (gdim[3] == vdim[3]) ; else __assert_fail ("gdim[3] == vdim[3]"
, "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 452, __extension__ __PRETTY_FUNCTION__); }))
;
453 assert(CCV_NNC_MAX_DIM == 2)((void) sizeof (((2) == 2) ? 1 : 0), __extension__ ({ if ((2)
== 2) ; else __assert_fail ("CCV_NNC_MAX_DIM == 2", "scaled_dot_product_attention/ccv_nnc_scaled_dot_product_attention_cpu_ref.c"
, 453, __extension__ __PRETTY_FUNCTION__); }))
; // Need to change this logic for CCV_NNC_MAX_DIM == other number.
454 int qstride[CCV_NNC_MAX_DIM_ALLOC(12)];
455 int kstride[CCV_NNC_MAX_DIM_ALLOC(12)];
456 int vstride[CCV_NNC_MAX_DIM_ALLOC(12)];
457 int gstride[CCV_NNC_MAX_DIM_ALLOC(12)];
458 int dqstride[CCV_NNC_MAX_DIM_ALLOC(12)];
459 int dkstride[CCV_NNC_MAX_DIM_ALLOC(12)];
460 int dvstride[CCV_NNC_MAX_DIM_ALLOC(12)];
461 ccv_nnc_tensor_view_get_stride(q, qstride);
462 ccv_nnc_tensor_view_get_stride(k, kstride);
463 ccv_nnc_tensor_view_get_stride(v, vstride);
464 ccv_nnc_tensor_view_get_stride(g, gstride);
465 ccv_nnc_tensor_view_get_stride(dq, dqstride);
466 ccv_nnc_tensor_view_get_stride(dk, dkstride);
467 ccv_nnc_tensor_view_get_stride(dv, dvstride);
468 if (q_nd == 3)
469 {
470 qstride[0] = qstride[1], qstride[1] = qstride[2], qstride[2] = qstride[3];
471 kstride[0] = kstride[1], kstride[1] = kstride[2], kstride[2] = kstride[3];
472 vstride[0] = vstride[1], vstride[1] = vstride[2], vstride[2] = vstride[3];
473 gstride[0] = gstride[1], gstride[1] = gstride[2], gstride[2] = gstride[3];
474 dqstride[0] = dqstride[1], dqstride[1] = dqstride[2], dqstride[2] = dqstride[3];
475 dkstride[0] = dkstride[1], dkstride[1] = dkstride[2], dkstride[2] = dkstride[3];
476 dvstride[0] = dvstride[1], dvstride[1] = dvstride[2], dvstride[2] = dvstride[3];
477 }
478 int i[CCV_NNC_MAX_DIM(2) + 2];
479 float* qk = ccv_nnc_stream_context_get_workspace(stream_context, sizeof(float) * 2 * kdim[1], CCV_TENSOR_CPU_MEMORY);
480 const float* const qp = q->data.f32;
481 const float* const kp = k->data.f32;
482 const float* const vp = v->data.f32;
483 const float* const gp = g->data.f32;
484 float* const dqp = dq->data.f32;
485 float* const dkp = dk->data.f32;
486 float* const dvp = dv->data.f32;
487 const float scale = cmd.info.scaled_dot_product_attention.scale;
488 const int is_causal = cmd.info.scaled_dot_product_attention.is_causal;
489 const int h_h_k_ratio = qdim[2] / kdim[2];
490 for (i[0] = 0; i[0] < qdim[0]; i[0]++)
491 {
492 const float* const qp0 = qp + i[0] * qstride[0];
493 const float* const kp0 = kp + i[0] * kstride[0];
494 const float* const vp0 = vp + i[0] * vstride[0];
495 const float* const gp0 = gp + i[0] * gstride[0];
496 float* const dqp0 = dqp + i[0] * dqstride[0];
497 float* const dkp0 = dkp + i[0] * dkstride[0];
498 float* const dvp0 = dvp + i[0] * dvstride[0];
499 for (i[1] = 0; i[1] < qdim[2]; i[1]++)
500 {
501 const float* const qp1 = qp0 + i[1] * qstride[2];
502 const float* const kp1 = kp0 + (i[1] / h_h_k_ratio) * kstride[2];
503 const float* const vp1 = vp0 + (i[1] / h_h_k_ratio) * vstride[2];
504 const float* const gp1 = gp0 + i[1] * gstride[2];
505 float* const dqp1 = dqp0 + i[1] * dqstride[2];
506 float* const dkp1 = dkp0 + (i[1] / h_h_k_ratio) * dkstride[2];
507 float* const dvp1 = dvp0 + (i[1] / h_h_k_ratio) * dvstride[2];
508 // Compute Q @ K^T
509 int x, y, k;
510 for (x = 0; x < qdim[1]; x++)
511 {
512 float* const dqp2 = dqp1 + x * dqstride[1];
513 for (k = 0; k < qdim[3]; k++)
514 dqp2[k * dqstride[3]] = 0;
515 }
516 // Only zero out when it is at 0-index.
517 if (i[1] % h_h_k_ratio == 0)
518 for (y = 0; y < kdim[1]; y++)
519 {
520 float* const dkp2 = dkp1 + y * dkstride[1];
521 for (k = 0; k < qdim[3]; k++)
522 dkp2[k * dkstride[3]] = 0;
523 }
524 // Only zero out when it is at 0-index.
525 if (i[1] % h_h_k_ratio == 0)
526 for (y = 0; y < kdim[1]; y++)
527 {
528 float* const dvp2 = dvp1 + y * dvstride[1];
529 for (k = 0; k < vdim[3]; k++)
530 dvp2[k * dvstride[3]] = 0;
531 }
532 for (x = 0; x < qdim[1]; x++)
533 {
534 const float* const qp2 = qp1 + x * qstride[1];
535 const float* const gp2 = gp1 + x * gstride[1];
536 float* const qk0 = qk;
537 float* const qks0 = qk + kdim[1];
538 for (y = 0; y < kdim[1]; y++)
539 {
540 const float* const kp2 = kp1 + y * kstride[1];
541 float v = 0;
542 for (k = 0; k < qdim[3]; k++)
543 v += qp2[k * qstride[3]] * kp2[k * kstride[3]];
544 qk0[y] = scale * v;
545 }
546 // Compute softmax on qk.
547 if (is_causal)
548 {
549 const int x_end = ccv_max(x - qdim[1] + kdim[1] + 1, 0)({ typeof (x - qdim[1] + kdim[1] + 1) _a = (x - qdim[1] + kdim
[1] + 1); typeof (0) _b = (0); (_a > _b) ? _a : _b; })
;
550 for (y = x_end; y < kdim[1]; y++)
551 qk0[y] = 0;
552 double maxval = qk0[0];
553 for (y = 1; y < x_end; y++)
554 if (qk0[y] > maxval)
555 maxval = qk0[y];
556 double sumval = 0;
557 for (y = 0; y < x_end; y++)
558 sumval += (qk0[y] = expf(qk0[y] - maxval));
559 sumval = 1.0 / sumval;
560 for (y = 0; y < x_end; y++)
561 qk0[y] *= sumval;
562 } else {
563 double maxval = qk0[0];
564 for (y = 1; y < kdim[1]; y++)
565 if (qk0[y] > maxval)
566 maxval = qk0[y];
567 double sumval = 0;
568 for (y = 0; y < kdim[1]; y++)
569 sumval += (qk0[y] = expf(qk0[y] - maxval));
570 sumval = 1.0 / sumval;
571 for (y = 0; y < kdim[1]; y++)
572 qk0[y] *= sumval;
573 }
574 for (y = 0; y < kdim[1]; y++)
575 {
576 float* const dvp2 = dvp1 + y * dvstride[1];
577 const float v = qk0[y];
578 for (k = 0; k < vdim[3]; k++)
579 dvp2[k * dvstride[3]] += v * gp2[k * gstride[3]];
580 }
581 double sumval = 0;
582 for (y = 0; y < kdim[1]; y++)
583 {
584 const float* const vp2 = vp1 + y * vstride[1];
585 float v = 0;
586 for (k = 0; k < vdim[3]; k++)
587 v += gp2[k * gstride[3]] * vp2[k * vstride[3]];
588 qks0[y] = v;
589 sumval += v * qk0[y];
590 }
591 for (y = 0; y < kdim[1]; y++)
592 qk0[y] = (qks0[y] - sumval) * qk0[y];
593 float* const dqp2 = dqp1 + x * dqstride[1];
594 for (y = 0; y < kdim[1]; y++)
595 {
596 const float* const kp2 = kp1 + y * kstride[1];
597 float* const dkp2 = dkp1 + y * dkstride[1];
598 const float v = scale * qk0[y];
599 for (k = 0; k < qdim[3]; k++)
600 {
601 dqp2[k * dqstride[3]] += v * kp2[k * kstride[3]];
602 dkp2[k * dkstride[3]] += v * qp2[k * qstride[3]];
603 }
604 }
605 }
606 }
607 }
608 return CCV_NNC_EXEC_SUCCESS;
609}
610
611REGISTER_COMMAND_BACKEND(CCV_NNC_SCALED_DOT_PRODUCT_ATTENTION_FORWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_SCALED_DOT_PRODUCT_ATTENTION_FORWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
612{
613 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
614 registry->tensor_datatypes = CCV_32F | CCV_32S;
615 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
616 registry->algorithms = 1;
617 registry->exec = _ccv_nnc_scaled_dot_product_attention_forw;
618}
619
620REGISTER_COMMAND_BACKEND(CCV_NNC_SCALED_DOT_PRODUCT_ATTENTION_BACKWARD, CCV_NNC_BACKEND_CPU_REF)void _register_command_CCV_NNC_SCALED_DOT_PRODUCT_ATTENTION_BACKWARD_backend_CCV_NNC_BACKEND_CPU_REF(ccv_nnc_cmd_backend_registry_t* const registry)
621{
622 registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC;
623 registry->tensor_datatypes = CCV_32F;
624 registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
625 registry->algorithms = 1;
626 registry->exec = _ccv_nnc_scaled_dot_product_attention_back;
627}