Bug Summary

File:nnc/ccv_cnnp_model_gradient_checkpointing.c
Warning:line 75, column 1
Array access (via field 'flags') results in a null pointer dereference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model_gradient_checkpointing.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-11-08-214927-221724-1 -x c ccv_cnnp_model_gradient_checkpointing.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6// This can be removed once we organized ccv_cnnp_apply_gradient_checkpoints better.
7#include "_ccv_nnc_symbolic_graph.h"
8
9void ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_symbolic_graph_t* const graph)
10{
11 ccv_array_t* const gradient_checkpoints = compiled_data->gradient_checkpoints;
12 if (!gradient_checkpoints || gradient_checkpoints->rnum == 0) // No saved gradient checkpoints, this is an easy way out.
13 return;
14 int i, j;
15 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (const ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, 0)((void*)(((char*)((graph->tensor_symbol_info)->data)) +
(size_t)(graph->tensor_symbol_info)->rsize * (size_t)(
0)))
;
16 // Go through to check if any tensors that supposes in this map is removed.
17 for (i = 0; i < gradient_checkpoints->rnum; i++)
18 {
19 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(gradient_checkpoints, i)((void*)(((char*)((gradient_checkpoints)->data)) + (size_t
)(gradient_checkpoints)->rsize * (size_t)(i)))
;
20 for (j = 0; j < checkpoint->tensor_symbols->rnum; j++)
21 {
22 ccv_nnc_tensor_symbol_t* const symbol = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(checkpoint->tensor_symbols, j)((void*)(((char*)((checkpoint->tensor_symbols)->data)) +
(size_t)(checkpoint->tensor_symbols)->rsize * (size_t)
(j)))
);
23 if (symbol->d >= 0 && symbol->d < graph->tensor_symbol_info->rnum)
24 // If it is dead, we need to remove this symbol.
25 if (CCV_NNC_TENSOR_SYMBOL_IS_DEAD(tensor_symbol_info[symbol->d].flags)((tensor_symbol_info[symbol->d].flags) & CCV_NNC_TENSOR_SYMBOL_DEAD
)
)
26 {
27 symbol->d = -1;
28 symbol->graph = 0;
29 }
30 }
31 }
32}
33
34typedef struct {
35 ccv_array_t* outgoings;
36} ccv_nnc_graph_exec_symbol_reverse_t;
37
38typedef struct {
39 ccv_cnnp_model_gradient_checkpoint_build_context_t tensor_context;
40 ccv_array_t* graph_exec_symbols;
41 ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
42 void* old_graph_exec_symbol_new_hook_context;
43 ccv_array_t* all_tensor_symbols;
44} ccv_cnnp_gradient_checkpoint_build_t;
45
46static void _ccv_cnnp_gradient_checkpoint_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
47{
48 ccv_cnnp_gradient_checkpoint_build_t* const build_context = (ccv_cnnp_gradient_checkpoint_build_t*)context;
49 if (build_context->tensor_context.record)
50 ccv_array_push(build_context->tensor_context.tensor_symbols, &symbol);
51 ccv_array_push(build_context->all_tensor_symbols, &symbol);
52 if (build_context->tensor_context.old_tensor_symbol_new_hook)
53 build_context->tensor_context.old_tensor_symbol_new_hook(build_context->tensor_context.old_tensor_symbol_new_hook_context, symbol, info, name);
54}
55
56static void _ccv_cnnp_gradient_checkpoint_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
57{
58 ccv_cnnp_gradient_checkpoint_build_t* const build_context = (ccv_cnnp_gradient_checkpoint_build_t*)context;
59 if (build_context->tensor_context.record)
60 ccv_array_push(build_context->tensor_context.tensor_symbols, &symbol);
61 ccv_array_push(build_context->all_tensor_symbols, &symbol);
62 if (build_context->tensor_context.old_tensor_symbol_alias_new_hook)
63 build_context->tensor_context.old_tensor_symbol_alias_new_hook(build_context->tensor_context.old_tensor_symbol_alias_new_hook_context, symbol, from_symbol, ofs, inc, info, name);
64}
65
66static void _ccv_cnnp_model_gradient_checkpoint_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
67{
68 ccv_cnnp_gradient_checkpoint_build_t* const build = (ccv_cnnp_gradient_checkpoint_build_t*)context;
69 ccv_array_push(build->graph_exec_symbols, &symbol);
70 if (build->old_graph_exec_symbol_new_hook)
71 build->old_graph_exec_symbol_new_hook(build->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
72}
73
74KHASH_MAP_INIT_INT(ccv_cnnp_tensor_symbol_map, int)typedef struct kh_ccv_cnnp_tensor_symbol_map_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; khint32_t *
keys; int *vals; } kh_ccv_cnnp_tensor_symbol_map_t; static inline
__attribute__ ((__unused__)) kh_ccv_cnnp_tensor_symbol_map_t
*kh_init_ccv_cnnp_tensor_symbol_map(void) { return (kh_ccv_cnnp_tensor_symbol_map_t
*)calloc(1,sizeof(kh_ccv_cnnp_tensor_symbol_map_t)); } static
inline __attribute__ ((__unused__)) void kh_destroy_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h) { if (h) { free((void *)
h->keys); free(h->flags); free((void *)h->vals); free
(h); } } static inline __attribute__ ((__unused__)) void kh_clear_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h) { if (h && h->
flags) { memset(h->flags, 0xaa, ((h->n_buckets) < 16
? 1 : (h->n_buckets)>>4) * sizeof(khint32_t)); h->
size = h->n_occupied = 0; } } static inline __attribute__ (
(__unused__)) khint_t kh_get_ccv_cnnp_tensor_symbol_map(const
kh_ccv_cnnp_tensor_symbol_map_t *h, khint32_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = (khint32_t)(key); i = k & mask; last =
i; while (!((h->flags[i>>4]>>((i&0xfU)<<
1))&2) && (((h->flags[i>>4]>>((i&
0xfU)<<1))&1) || !((h->keys[i]) == (key)))) { i =
(i + (++step)) & mask; if (i == last) return h->n_buckets
; } return ((h->flags[i>>4]>>((i&0xfU)<<
1))&3)? h->n_buckets : i; } else return 0; } static inline
__attribute__ ((__unused__)) int kh_resize_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h, khint_t new_n_buckets) {
khint32_t *new_flags = 0; khint_t j = 1; { (--(new_n_buckets
), (new_n_buckets)|=(new_n_buckets)>>1, (new_n_buckets)
|=(new_n_buckets)>>2, (new_n_buckets)|=(new_n_buckets)>>
4, (new_n_buckets)|=(new_n_buckets)>>8, (new_n_buckets)
|=(new_n_buckets)>>16, ++(new_n_buckets)); if (new_n_buckets
< 4) new_n_buckets = 4; if (h->size >= (khint_t)(new_n_buckets
* __ac_HASH_UPPER + 0.5)) j = 0; else { new_flags = (khint32_t
*)malloc(((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (!new_flags) return -1; memset(new_flags
, 0xaa, ((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (h->n_buckets < new_n_buckets
) { khint32_t *new_keys = (khint32_t*)realloc((void *)h->keys
,new_n_buckets * sizeof(khint32_t)); if (!new_keys) { free(new_flags
); return -1; } h->keys = new_keys; if (1) { int *new_vals
= (int*)realloc((void *)h->vals,new_n_buckets * sizeof(int
)); if (!new_vals) { free(new_flags); return -1; } h->vals
= new_vals; } } } } if (j) { for (j = 0; j != h->n_buckets
; ++j) { if (((h->flags[j>>4]>>((j&0xfU)<<
1))&3) == 0) { khint32_t key = h->keys[j]; int val; khint_t
new_mask; new_mask = new_n_buckets - 1; if (1) val = h->vals
[j]; (h->flags[j>>4]|=1ul<<((j&0xfU)<<
1)); while (1) { khint_t k, i, step = 0; k = (khint32_t)(key)
; i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { khint32_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (khint32_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(khint32_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_tensor_symbol_map(kh_ccv_cnnp_tensor_symbol_map_t
*h, khint32_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_tensor_symbol_map(h, h->
n_buckets - 1) < 0) { *ret = -1; return h->n_buckets; }
} else if (kh_resize_ccv_cnnp_tensor_symbol_map(h, h->n_buckets
+ 1) < 0) { *ret = -1; return h->n_buckets; } } { khint_t
k, i, site, last, mask = h->n_buckets - 1, step = 0; x = site
= h->n_buckets; k = (khint32_t)(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !((h->
keys[i]) == (key)))) { if (((h->flags[i>>4]>>(
(i&0xfU)<<1))&1)) site = i; i = (i + (++step)) &
mask; if (i == last) { x = site; break; } } if (x == h->n_buckets
) { if (((h->flags[i>>4]>>((i&0xfU)<<
1))&2) && site != h->n_buckets) x = site; else
x = i; } } } if (((h->flags[x>>4]>>((x&0xfU
)<<1))&2)) { h->keys[x] = key; (h->flags[x>>
4]&=~(3ul<<((x&0xfU)<<1))); ++h->size;
++h->n_occupied; *ret = 1; } else if (((h->flags[x>>
4]>>((x&0xfU)<<1))&1)) { h->keys[x] = key
; (h->flags[x>>4]&=~(3ul<<((x&0xfU)<<
1))); ++h->size; *ret = 2; } else *ret = 0; return x; } static
inline __attribute__ ((__unused__)) void kh_del_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h, khint_t x) { if (x != h->
n_buckets && !((h->flags[x>>4]>>((x&
0xfU)<<1))&3)) { (h->flags[x>>4]|=1ul<<
((x&0xfU)<<1)); --h->size; } }
75KHASH_SET_INIT_INT(ccv_cnnp_tensor_symbol_set)typedef struct kh_ccv_cnnp_tensor_symbol_set_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; khint32_t *
keys; char *vals; } kh_ccv_cnnp_tensor_symbol_set_t; static inline
__attribute__ ((__unused__)) kh_ccv_cnnp_tensor_symbol_set_t
*kh_init_ccv_cnnp_tensor_symbol_set(void) { return (kh_ccv_cnnp_tensor_symbol_set_t
*)calloc(1,sizeof(kh_ccv_cnnp_tensor_symbol_set_t)); } static
inline __attribute__ ((__unused__)) void kh_destroy_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h) { if (h) { free((void *)
h->keys); free(h->flags); free((void *)h->vals); free
(h); } } static inline __attribute__ ((__unused__)) void kh_clear_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h) { if (h && h->
flags) { memset(h->flags, 0xaa, ((h->n_buckets) < 16
? 1 : (h->n_buckets)>>4) * sizeof(khint32_t)); h->
size = h->n_occupied = 0; } } static inline __attribute__ (
(__unused__)) khint_t kh_get_ccv_cnnp_tensor_symbol_set(const
kh_ccv_cnnp_tensor_symbol_set_t *h, khint32_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = (khint32_t)(key); i = k & mask; last =
i; while (!((h->flags[i>>4]>>((i&0xfU)<<
1))&2) && (((h->flags[i>>4]>>((i&
0xfU)<<1))&1) || !((h->keys[i]) == (key)))) { i =
(i + (++step)) & mask; if (i == last) return h->n_buckets
; } return ((h->flags[i>>4]>>((i&0xfU)<<
1))&3)? h->n_buckets : i; } else return 0; } static inline
__attribute__ ((__unused__)) int kh_resize_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h, khint_t new_n_buckets) {
khint32_t *new_flags = 0; khint_t j = 1; { (--(new_n_buckets
), (new_n_buckets)|=(new_n_buckets)>>1, (new_n_buckets)
|=(new_n_buckets)>>2, (new_n_buckets)|=(new_n_buckets)>>
4, (new_n_buckets)|=(new_n_buckets)>>8, (new_n_buckets)
|=(new_n_buckets)>>16, ++(new_n_buckets)); if (new_n_buckets
< 4) new_n_buckets = 4; if (h->size >= (khint_t)(new_n_buckets
* __ac_HASH_UPPER + 0.5)) j = 0; else { new_flags = (khint32_t
*)malloc(((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (!new_flags) return -1; memset(new_flags
, 0xaa, ((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (h->n_buckets < new_n_buckets
) { khint32_t *new_keys = (khint32_t*)realloc((void *)h->keys
,new_n_buckets * sizeof(khint32_t)); if (!new_keys) { free(new_flags
); return -1; } h->keys = new_keys; if (0) { char *new_vals
= (char*)realloc((void *)h->vals,new_n_buckets * sizeof(char
)); if (!new_vals) { free(new_flags); return -1; } h->vals
= new_vals; } } } } if (j) { for (j = 0; j != h->n_buckets
; ++j) { if (((h->flags[j>>4]>>((j&0xfU)<<
1))&3) == 0) { khint32_t key = h->keys[j]; char val; khint_t
new_mask; new_mask = new_n_buckets - 1; if (0) val = h->vals
[j]; (h->flags[j>>4]|=1ul<<((j&0xfU)<<
1)); while (1) { khint_t k, i, step = 0; k = (khint32_t)(key)
; i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { khint32_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (0
) { char tmp = h->vals[i]; h->vals[i] = val; val = tmp;
} (h->flags[i>>4]|=1ul<<((i&0xfU)<<
1)); } else { h->keys[i] = key; if (0) h->vals[i] = val
; break; } } } } if (h->n_buckets > new_n_buckets) { h->
keys = (khint32_t*)realloc((void *)h->keys,new_n_buckets *
sizeof(khint32_t)); if (0) h->vals = (char*)realloc((void
*)h->vals,new_n_buckets * sizeof(char)); } free(h->flags
); h->flags = new_flags; h->n_buckets = new_n_buckets; h
->n_occupied = h->size; h->upper_bound = (khint_t)(h
->n_buckets * __ac_HASH_UPPER + 0.5); } return 0; } static
inline __attribute__ ((__unused__)) khint_t kh_put_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h, khint32_t key, int *ret)
{ khint_t x; if (h->n_occupied >= h->upper_bound) {
if (h->n_buckets > (h->size<<1)) { if (kh_resize_ccv_cnnp_tensor_symbol_set
(h, h->n_buckets - 1) < 0) { *ret = -1; return h->n_buckets
; } } else if (kh_resize_ccv_cnnp_tensor_symbol_set(h, h->
n_buckets + 1) < 0) { *ret = -1; return h->n_buckets; }
} { khint_t k, i, site, last, mask = h->n_buckets - 1, step
= 0; x = site = h->n_buckets; k = (khint32_t)(key); i = k
& mask; if (((h->flags[i>>4]>>((i&0xfU
)<<1))&2)) x = i; else { last = i; while (!((h->
flags[i>>4]>>((i&0xfU)<<1))&2) &&
(((h->flags[i>>4]>>((i&0xfU)<<1))&
1) || !((h->keys[i]) == (key)))) { if (((h->flags[i>>
4]>>((i&0xfU)<<1))&1)) site = i; i = (i +
(++step)) & mask; if (i == last) { x = site; break; } } if
(x == h->n_buckets) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&2) && site != h->n_buckets
) x = site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_tensor_symbol_set(kh_ccv_cnnp_tensor_symbol_set_t
*h, khint_t x) { if (x != h->n_buckets && !((h->
flags[x>>4]>>((x&0xfU)<<1))&3)) { (
h->flags[x>>4]|=1ul<<((x&0xfU)<<1));
--h->size; } }
9
Null pointer value stored to field 'flags'
14
Taking true branch
15
Taking false branch
16
Calling 'kh_resize_ccv_cnnp_tensor_symbol_set'
17
Taking true branch
18
Assuming the condition is true
19
Taking true branch
20
Taking false branch
21
Returning without writing to 'h->flags'
22
Returning from 'kh_resize_ccv_cnnp_tensor_symbol_set'
23
Taking false branch
24
Array access (via field 'flags') results in a null pointer dereference
76
77void ccv_cnnp_model_apply_gradient_checkpoints(ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_symbolic_graph_t* const graph)
78{
79 ccv_array_t* const gradient_checkpoints = compiled_data->gradient_checkpoints;
80 if (!gradient_checkpoints || gradient_checkpoints->rnum == 0) // No saved gradient checkpoints, this is an easy way out.
1
Assuming 'gradient_checkpoints' is non-null
2
Assuming field 'rnum' is not equal to 0
3
Taking false branch
81 return;
82 // Otherwise, for each gradient checkpoint, there are 3 steps:
83 // 1. Find currently, what execs exists from inputs to outputs.
84 // 2. Find execs that generates the outputs, and their corresponding backward execs.
85 // 3. Find all backward execs flow from outputs back to inputs.
86 // 4. Generate new ops by calling build again with old inputs, record all new tensors / execs.
87 // 5. Replace inputs in backward execs with the new tensors.
88 // 6. Hook the execs takes inputs with edge from parents of backward execs in step 2.
89 // 7. Delete newly generated execs that has no use (i.e. its outputs are not used by backward pass).
90 // 8. Mark all new execs with DISABLE_OPT to avoid common sub-expression elimination pass.
91 int i, j, k, l;
92 ccv_array_t* input_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
93 ccv_array_t* output_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
94 ccv_array_t* input_gradient_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
95 ccv_array_t* output_gradient_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
96 ccv_array_t* visited_backward_execs = ccv_array_new(sizeof(int), 0, 0);
97 ccv_array_t* replaced_backward_execs = ccv_array_new(sizeof(int), 0, 0);
98 const int exec_rnum = graph->exec_symbol_info->rnum;
99 ccv_nnc_graph_exec_symbol_reverse_t* const reversed_nodes = cccalloccalloc(exec_rnum, sizeof(ccv_nnc_graph_exec_symbol_reverse_t));
100 for (i = 0; i < exec_rnum; i++)
4
Assuming 'i' is >= 'exec_rnum'
5
Loop condition is false. Execution continues on line 116
101 {
102 const int* tos = 0;
103 int to_size = 0;
104 ccv_nnc_graph_exec_symbol_to(graph, (ccv_nnc_graph_exec_symbol_t){
105 .graph = graph,
106 .d = i
107 }, &tos, &to_size);
108 if (tos)
109 for (j = 0; j < to_size; j++)
110 {
111 if (!reversed_nodes[tos[j]].outgoings)
112 reversed_nodes[tos[j]].outgoings = ccv_array_new(sizeof(int), 1, 0);
113 ccv_array_add_unique_int(reversed_nodes[tos[j]].outgoings, i);
114 }
115 }
116 uint32_t* const maskbit = cccalloccalloc((exec_rnum + 31) >> 5, sizeof(uint32_t));
117 // Temporary for build_data.
118 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
119 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
120 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
121 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
122 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
123 ccv_array_t* const buf = ccv_array_new(sizeof(int), 0, 0);
124 int max_output_size = 0;
125 for (i = 0; i < gradient_checkpoints->rnum; i++)
6
Assuming 'i' is >= field 'rnum'
7
Loop condition is false. Execution continues on line 130
126 {
127 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(gradient_checkpoints, i)((void*)(((char*)((gradient_checkpoints)->data)) + (size_t
)(gradient_checkpoints)->rsize * (size_t)(i)))
;
128 max_output_size = ccv_max(checkpoint->output_size, max_output_size)({ typeof (checkpoint->output_size) _a = (checkpoint->output_size
); typeof (max_output_size) _b = (max_output_size); (_a > _b
) ? _a : _b; })
;
129 }
130 ccv_nnc_tensor_symbol_t* max_outputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * max_output_size);
131 ccv_array_t* newly_used_outputs = ccv_array_new(sizeof(int), 0, 0);
132 khash_t(ccv_cnnp_tensor_symbol_set)kh_ccv_cnnp_tensor_symbol_set_t* const parameters_or_internals = kh_init(ccv_cnnp_tensor_symbol_set)kh_init_ccv_cnnp_tensor_symbol_set();
8
Calling 'kh_init_ccv_cnnp_tensor_symbol_set'
10
Returning from 'kh_init_ccv_cnnp_tensor_symbol_set'
133 for (i = 0; i < compiled_data->parameters->rnum; i++)
11
Assuming 'i' is < field 'rnum'
12
Loop condition is true. Entering loop body
134 {
135 const ccv_nnc_tensor_symbol_t* const symbol = (const ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
136 int ret;
137 kh_put(ccv_cnnp_tensor_symbol_set, parameters_or_internals, symbol->d, &ret)kh_put_ccv_cnnp_tensor_symbol_set(parameters_or_internals, symbol
->d, &ret)
;
13
Calling 'kh_put_ccv_cnnp_tensor_symbol_set'
138 }
139 for (i = 0; i < compiled_data->internals->rnum; i++)
140 {
141 const ccv_nnc_tensor_symbol_t* const symbol = (const ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
142 int ret;
143 kh_put(ccv_cnnp_tensor_symbol_set, parameters_or_internals, symbol->d, &ret)kh_put_ccv_cnnp_tensor_symbol_set(parameters_or_internals, symbol
->d, &ret)
;
144 }
145 khash_t(ccv_cnnp_tensor_symbol_set)kh_ccv_cnnp_tensor_symbol_set_t* const newly_created_tensor_symbols = kh_init(ccv_cnnp_tensor_symbol_set)kh_init_ccv_cnnp_tensor_symbol_set();
146 khash_t(ccv_cnnp_tensor_symbol_map)kh_ccv_cnnp_tensor_symbol_map_t* symbol_map = kh_init(ccv_cnnp_tensor_symbol_map)kh_init_ccv_cnnp_tensor_symbol_map();
147 for (i = 0; i < gradient_checkpoints->rnum; i++)
148 {
149 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(gradient_checkpoints, i)((void*)(((char*)((gradient_checkpoints)->data)) + (size_t
)(gradient_checkpoints)->rsize * (size_t)(i)))
;
150 kh_clear(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols)kh_clear_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
)
;
151 for (j = 0; j < checkpoint->tensor_symbols->rnum; j++)
152 {
153 const int idx = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(checkpoint->tensor_symbols, j)((void*)(((char*)((checkpoint->tensor_symbols)->data)) +
(size_t)(checkpoint->tensor_symbols)->rsize * (size_t)
(j)))
)->d;
154 if (idx < 0)
155 continue;
156 // Skip parameters or internals.
157 if (kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, idx)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, idx
)
!= kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets))
158 continue;
159 int ret;
160 kh_put(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, idx, &ret)kh_put_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, idx, &ret)
;
161 }
162 ccv_array_clear(input_execs);
163 ccv_array_clear(output_execs);
164 ccv_nnc_graph_exec_symbol_info_t* exec_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0)((void*)(((char*)((graph->exec_symbol_info)->data)) + (
size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)))
;
165 for (j = 0; j < exec_rnum; j++)
166 {
167 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[j].flags)((exec_info[j].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
168 continue;
169 const int* inputs = exec_info[j].inputs;
170 int input_size = exec_info[j].input_size;
171 const int* outputs = exec_info[j].outputs;
172 int output_size = exec_info[j].output_size;
173 if (input_size == 0 && output_size == 0)
174 continue;
175 // Only go through forward pass.
176 if (ccv_nnc_cmd_is_backward(exec_info[j].cmd))
177 continue;
178 const ccv_nnc_graph_exec_symbol_t symbol = {
179 .graph = graph,
180 .d = j
181 };
182 int flag = 0;
183 for (k = 0; inputs && k < input_size && !flag; k++)
184 if (inputs[k] >= 0)
185 for (l = 0; l < checkpoint->input_size && !flag; l++)
186 if (checkpoint->inputs[l].d >= 0 && inputs[k] == checkpoint->inputs[l].d)
187 flag = 1;
188 // Input logic is different from output logic. We need to filter out these exec that contains inputs from within the graph.
189 for (k = 0; inputs && k < input_size && flag; k++)
190 if (inputs[k] >= 0 && kh_get(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, inputs[k])kh_get_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, inputs[k])
!= kh_end(newly_created_tensor_symbols)((newly_created_tensor_symbols)->n_buckets))
191 flag = 0;
192 if (flag)
193 ccv_array_push(input_execs, &symbol);
194 flag = 0;
195 for (k = 0; outputs && k < output_size && !flag; k++)
196 if (outputs[k] >= 0)
197 for (l = 0; l < checkpoint->output_size && !flag; l++)
198 if (checkpoint->outputs[l].d >= 0 && outputs[k] == checkpoint->outputs[l].d)
199 flag = 1;
200 if (flag)
201 ccv_array_push(output_execs, &symbol);
202 }
203 if (input_execs->rnum <= 0 || output_execs->rnum <= 0)
204 continue;
205 // Fill in blanks (i.e. the backward ops that are not showing in above, but should be included to avoid excluding necessary ones). This is done by flowing gradients from outputs back all the way to inputs.
206 ccv_array_clear(input_gradient_execs);
207 ccv_array_clear(output_gradient_execs);
208 for (j = 0; j < input_execs->rnum; j++)
209 {
210 const int d = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_execs, j)((void*)(((char*)((input_execs)->data)) + (size_t)(input_execs
)->rsize * (size_t)(j)))
)->d;
211 for (k = 0; k < exec_info[d].input_size; k++)
212 if (exec_info[d].inputs[k] >= 0)
213 {
214 const ccv_nnc_tensor_symbol_t gradient_symbol = ccv_nnc_tensor_symbol_for_backward(graph, (ccv_nnc_tensor_symbol_t){
215 .graph = graph,
216 .d = exec_info[d].inputs[k]
217 });
218 if (gradient_symbol.d < 0)
219 continue;
220 const ccv_nnc_graph_exec_symbol_t backward = ccv_nnc_graph_exec_symbol_for_backward(graph, gradient_symbol);
221 if (backward.d < 0)
222 continue;
223 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[backward.d].flags)((exec_info[backward.d].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
224 continue;
225 int flag = 0;
226 for (l = 0; !flag && l < output_gradient_execs->rnum; l++)
227 if (((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, l)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(l)))
)->d == backward.d)
228 flag = 1;
229 if (!flag)
230 ccv_array_push(output_gradient_execs, &backward);
231 }
232 if (exec_info[d].outgoings && exec_info[d].outgoings->rnum > 0)
233 for (k = 0; k < exec_info[d].outgoings->rnum; k++)
234 {
235 const int to_d = *(int*)ccv_array_get(exec_info[d].outgoings, k)((void*)(((char*)((exec_info[d].outgoings)->data)) + (size_t
)(exec_info[d].outgoings)->rsize * (size_t)(k)))
;
236 if (!ccv_nnc_cmd_is_backward(exec_info[to_d].cmd))
237 continue;
238 int flag = 0;
239 for (l = 0; !flag && l < output_gradient_execs->rnum; l++)
240 if (((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, l)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(l)))
)->d == to_d)
241 flag = 1;
242 if (!flag)
243 {
244 const ccv_nnc_graph_exec_symbol_t backward = {
245 .graph = graph,
246 .d = to_d
247 };
248 ccv_array_push(output_gradient_execs, &backward);
249 }
250 }
251 }
252 // For output_gradient_execs, we can be opportunistic and use the wrt symbols (if exists) to find relevant bits.
253 // For input_gradient_execs, there is no other way but to loop over all outgoings, find the ones are direct link as backward execs.
254 for (j = 0; j < output_execs->rnum; j++)
255 {
256 const int d = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_execs, j)((void*)(((char*)((output_execs)->data)) + (size_t)(output_execs
)->rsize * (size_t)(j)))
)->d;
257 if (exec_info[d].outgoings && exec_info[d].outgoings->rnum > 0)
258 for (k = 0; k < exec_info[d].outgoings->rnum; k++)
259 {
260 const int to_d = *(int*)ccv_array_get(exec_info[d].outgoings, k)((void*)(((char*)((exec_info[d].outgoings)->data)) + (size_t
)(exec_info[d].outgoings)->rsize * (size_t)(k)))
;
261 if (!ccv_nnc_cmd_is_backward(exec_info[to_d].cmd))
262 continue;
263 int flag = 0;
264 for (l = 0; !flag && l < input_gradient_execs->rnum; l++)
265 if (((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, l)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(l)))
)->d == to_d)
266 flag = 1;
267 if (!flag)
268 {
269 const ccv_nnc_graph_exec_symbol_t backward = {
270 .graph = graph,
271 .d = to_d
272 };
273 ccv_array_push(input_gradient_execs, &backward);
274 }
275 }
276 }
277 // Note that we have to use up-to-date ones because the exec_info might have outgoings that is up-to-date.
278 ccv_nnc_graph_visit_t* const visit = ccv_nnc_graph_visit_new(graph, exec_info, graph->exec_symbol_info->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, 0), input_gradient_execs->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, 0), output_gradient_execs->rnum, 1)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_symbol_info->rnum) - 1)); _visit_->size
= 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_symbol_info->
rnum); _i_++) _incoming_edges_ += ((exec_info)[_i_].outgoings
) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_
= ((graph->exec_symbol_info->rnum) + _incoming_edges_ >
1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
graph->exec_symbol_info->rnum) + sizeof(int32_t) * ((graph
->exec_symbol_info->rnum) * 2 + _incoming_edges_)); else
_incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (sizeof(
ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum) +
sizeof(int32_t) * ((graph->exec_symbol_info->rnum) * 2
+ _incoming_edges_)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t
) * (graph->exec_symbol_info->rnum)); int32_t* _exists_
[2] = { (int32_t*)(_incomings_ + (graph->exec_symbol_info->
rnum)), (int32_t*)(_incomings_ + (graph->exec_symbol_info->
rnum)) + (graph->exec_symbol_info->rnum), }; int32_t* const
_edges_ = _exists_[1] + (graph->exec_symbol_info->rnum
); for (_i_ = 0; _i_ < (input_gradient_execs->rnum); _i_
++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs
)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t
)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r =
1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(input_gradient_execs->rnum), 0, }; int _p_ = 0, _q_ = 1;
while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue
; _incomings_[_idx_].r = 2; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if
(_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void
) sizeof ((_exist_size_[_q_] < (graph->exec_symbol_info
->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] <
(graph->exec_symbol_info->rnum)) ; else __assert_fail (
"_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r =
3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (input_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (graph->exec_symbol_info->rnum))
? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
= 5; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (output_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (graph->exec_symbol_info->
rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].d
= 1; } for (_i_ = 0; _i_ < (input_gradient_execs->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t*)(
(void*)(((char*)((input_gradient_execs)->data)) + (size_t)
(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph ==
graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d; } _p_
= 0; _q_ = 1; _exist_size_[0] = (input_gradient_execs->rnum
); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_]
> 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->
node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_
->size].term = ((_incomings_[_idx_].d)); ++_visit_->size
;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7
; } if ((exec_info)[_idx_].outgoings) { if ((exec_info)[_idx_
].outgoings->rnum == 1) { const int d = *(int*)((void*)(((
char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t)((
exec_info)[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
6 && _d_ < (output_gradient_execs->rnum)) { _exists_
[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (output_gradient_execs
->rnum)) { ((void) sizeof ((_exist_size_[_q_] < (graph->
exec_symbol_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (graph->exec_symbol_info->rnum)) ; else __assert_fail
("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_)
= (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
== 7) continue; if (!(1)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c
== 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c
== 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); } else if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c
> 0) continue; _visit_->node[_visit_->size].index =
((((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs
)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t
)(0))))[_i_].d)); _visit_->node[_visit_->size].term = (
(_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)
((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d].d)); ++_visit_->size;
; } if (_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof
((_visit_->size <= (graph->exec_symbol_info->rnum
)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
__PRETTY_FUNCTION__); })); _visit_; })
;
279 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
280 if (idx < exec_rnum && !CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags)((node->flags) & CCV_NNC_GRAPH_EXEC_DEAD))
281 maskbit[idx >> 5] |= (1u << (idx & 0x1f));
282 } ccv_nnc_graph_visit_endfor} }
283 ccv_array_clear(visited_backward_execs);
284 // Add more backward pass to the list. Note that we don't add everything, particularly there are new nodes created through gradient checkpointing are ignored.
285#define visitor(node, idx, _) \
286 if (idx < exec_rnum && !CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD) && maskbit[idx >> 5] & (1u << (idx & 0x1f))) \
287 ccv_array_add_unique_int(visited_backward_execs, idx);
288 CCV_NNC_GRAPH_VISIT(graph, reversed_nodes, exec_rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, 0), output_gradient_execs->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, 0), input_gradient_execs->rnum, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (exec_rnum); _i_++) _incoming_edges_
+= ((reversed_nodes)[_i_].outgoings) ? (reversed_nodes)[_i_]
.outgoings->rnum : 0; const int _heap_mem_ = ((exec_rnum) +
_incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (exec_rnum) + sizeof(int32_t) * ((exec_rnum
) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (exec_rnum) +
sizeof(int32_t) * ((exec_rnum) * 2 + _incoming_edges_)); memset
(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (exec_rnum)); int32_t
* _exists_[2] = { (int32_t*)(_incomings_ + (exec_rnum)), (int32_t
*)(_incomings_ + (exec_rnum)) + (exec_rnum), }; int32_t* const
_edges_ = _exists_[1] + (exec_rnum); for (_i_ = 0; _i_ < (
output_gradient_execs->rnum); _i_++) { ((void) sizeof ((((
ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs
)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs
)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t
)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
= 1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(output_gradient_execs->rnum), 0, }; int _p_ = 0, _q_ = 1
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue
; _incomings_[_idx_].r = 2; if ((reversed_nodes)[_idx_].outgoings
) for (_j_ = 0; _j_ < (reversed_nodes)[_idx_].outgoings->
rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((reversed_nodes
)[_idx_].outgoings)->data)) + (size_t)((reversed_nodes)[_idx_
].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c;
if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; (
(void) sizeof ((_exist_size_[_q_] < (exec_rnum)) ? 1 : 0),
__extension__ ({ if (_exist_size_[_q_] < (exec_rnum)) ; else
__assert_fail ("_exist_size_[_q_] < (exec_rnum)", "ccv_cnnp_model_gradient_checkpointing.c"
, 288, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_]
[_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
= 3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (output_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((reversed_nodes)[_idx_].outgoings
) for (_j_ = 0; _j_ < (reversed_nodes)[_idx_].outgoings->
rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((reversed_nodes
)[_idx_].outgoings)->data)) + (size_t)((reversed_nodes)[_idx_
].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].
edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_rnum)) ? 1 : 0), __extension__ (
{ if (_exist_size_[_q_] < (exec_rnum)) ; else __assert_fail
("_exist_size_[_q_] < (exec_rnum)", "ccv_cnnp_model_gradient_checkpointing.c"
, 288, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_]
[_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r =
5; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (input_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_rnum)) ; else __assert_fail ("_exist_size_[_q_] < (exec_rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].d =
1; } for (_i_ = 0; _i_ < (output_gradient_execs->rnum)
; _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t*)((
void*)(((char*)((output_gradient_execs)->data)) + (size_t)
(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
__PRETTY_FUNCTION__); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d; }
_p_ = 0; _q_ = 1; _exist_size_[0] = (output_gradient_execs->
rnum); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[
_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; visitor((
(reversed_nodes) + _idx_), (_idx_), (_incomings_[_idx_].d)); if
(_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if
((reversed_nodes)[_idx_].outgoings) { if ((reversed_nodes)[_idx_
].outgoings->rnum == 1) { const int d = *(int*)((void*)(((
char*)(((reversed_nodes)[_idx_].outgoings)->data)) + (size_t
)((reversed_nodes)[_idx_].outgoings)->rsize * (size_t)(0))
); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (input_gradient_execs->rnum
)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0;
_j_ < (reversed_nodes)[_idx_].outgoings->rnum; _j_++) {
const int d = *(int*)((void*)(((char*)(((reversed_nodes)[_idx_
].outgoings)->data)) + (size_t)((reversed_nodes)[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(input_gradient_execs->rnum)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_rnum)) ; else __assert_fail ("_exist_size_[_q_] < (exec_rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_)
= (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
__PRETTY_FUNCTION__); })); if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r ==
7) continue; if (!(0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c ==
0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c ==
0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
__PRETTY_FUNCTION__); })); } else if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c >
0) continue; visitor(((reversed_nodes) + ((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d), (
((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs
)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t
)(0))))[_i_].d), (_incomings_[((ccv_nnc_graph_exec_symbol_t*)
((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].d)
); } if (_heap_mem_) free(_incomings_); } while (0);
;
289 for (j = 0; j < input_gradient_execs->rnum; j++)
290 ccv_array_add_unique_int(visited_backward_execs, ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, j)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(j)))
)->d);
291#undef visitor
292 ccv_cnnp_gradient_checkpoint_build_t build = {
293 .tensor_context = {
294 .record = 1,
295 .tensor_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0),
296 },
297 .graph_exec_symbols = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0),
298 .all_tensor_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0),
299 };
300 build.tensor_context.old_tensor_symbol_new_hook_context = ccv_nnc_tensor_symbol_new_hook(graph, _ccv_cnnp_gradient_checkpoint_tensor_symbol_new_hook, &build, &build.tensor_context.old_tensor_symbol_new_hook);
301 build.tensor_context.old_tensor_symbol_alias_new_hook_context = ccv_nnc_tensor_symbol_alias_new_hook(graph, _ccv_cnnp_gradient_checkpoint_tensor_symbol_alias_new_hook, &build, &build.tensor_context.old_tensor_symbol_alias_new_hook);
302 build.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(graph, _ccv_cnnp_model_gradient_checkpoint_graph_exec_symbol_new_hook, &build, &build.old_graph_exec_symbol_new_hook);
303 ccv_array_clear(parameters);
304 ccv_array_clear(parameter_ids);
305 ccv_array_clear(parameter_trainables);
306 ccv_array_clear(internals);
307 ccv_array_clear(internal_ids);
308 ccv_cnnp_model_sequence_t model_sequence = {
309 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
310 };
311 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
312 .sequence = &model_sequence,
313 .prefix = 't',
314 .symbols = parameters,
315 .ids = parameter_ids,
316 .trainables = parameter_trainables,
317 };
318 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
319 .sequence = &model_sequence,
320 .prefix = 'r',
321 .symbols = internals,
322 .ids = internal_ids,
323 .trainables = 0,
324 };
325 ccv_cnnp_model_build_data_t build_data = {
326 .is_trainable = checkpoint->is_trainable,
327 .model_sequence = &model_sequence,
328 .add_to_array = ccv_cnnp_model_add_to_array,
329 .parameters = parameters,
330 .context = {
331 .add_to_parameter = &add_to_parameter_context,
332 .add_to_output = &add_to_output_context,
333 },
334 .is_gradient_checkpointing = 1, // Mark this as true so we don't allocate gradient_checkpoints array or override the hooks.
335 .gradient_checkpoints = 0,
336 };
337 checkpoint->model->data = &build_data;
338 checkpoint->build(checkpoint->model, graph, checkpoint->inputs, checkpoint->input_size, max_outputs, checkpoint->output_size);
339 checkpoint->model->data = 0;
340 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
341 if (model_sequence.sequences)
342 ccv_array_free(model_sequence.sequences);
343 ccv_nnc_tensor_symbol_new_hook(graph, build.tensor_context.old_tensor_symbol_new_hook, build.tensor_context.old_tensor_symbol_new_hook_context, 0);
344 ccv_nnc_tensor_symbol_alias_new_hook(graph, build.tensor_context.old_tensor_symbol_alias_new_hook, build.tensor_context.old_tensor_symbol_alias_new_hook_context, 0);
345 ccv_nnc_graph_exec_symbol_autogen(graph, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, 0)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(0)))
, build.graph_exec_symbols->rnum, 0);
346 for (j = 0; j < parameter_ids->rnum; j++)
347 ccfreefree(*(char**)ccv_array_get(parameter_ids, j)((void*)(((char*)((parameter_ids)->data)) + (size_t)(parameter_ids
)->rsize * (size_t)(j)))
);
348 for (j = 0; j < internal_ids->rnum; j++)
349 ccfreefree(*(char**)ccv_array_get(internal_ids, j)((void*)(((char*)((internal_ids)->data)) + (size_t)(internal_ids
)->rsize * (size_t)(j)))
);
350 // Note that there is no graph optimization applied here.
351 exec_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0)((void*)(((char*)((graph->exec_symbol_info)->data)) + (
size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)))
;
352 // Reuse existing one.
353 kh_clear(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols)kh_clear_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
)
;
354 for (j = 0; j < build.tensor_context.tensor_symbols->rnum; j++)
355 {
356 const int idx = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(build.tensor_context.tensor_symbols, j)((void*)(((char*)((build.tensor_context.tensor_symbols)->data
)) + (size_t)(build.tensor_context.tensor_symbols)->rsize *
(size_t)(j)))
)->d;
357 if (idx < 0)
358 continue;
359 if (kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, idx)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, idx
)
!= kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets))
360 continue;
361 int ret;
362 kh_put(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, idx, &ret)kh_put_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, idx, &ret)
;
363 }
364 ccv_array_t* const newly_input_execs = input_execs;
365 ccv_array_t* const newly_output_execs = output_execs;
366 ccv_array_clear(newly_input_execs);
367 ccv_array_clear(newly_output_execs);
368 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
369 {
370 const int idx = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
)->d;
371 if (idx < 0)
372 continue;
373 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
374 continue;
375 const ccv_nnc_graph_exec_symbol_t symbol = {
376 .graph = graph,
377 .d = idx
378 };
379 const int* inputs = exec_info[idx].inputs;
380 int input_size = exec_info[idx].input_size;
381 // Only go through forward pass.
382 assert(!ccv_nnc_cmd_is_backward(exec_info[idx].cmd))((void) sizeof ((!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)
) ? 1 : 0), __extension__ ({ if (!ccv_nnc_cmd_is_backward(exec_info
[idx].cmd)) ; else __assert_fail ("!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)"
, "ccv_cnnp_model_gradient_checkpointing.c", 382, __extension__
__PRETTY_FUNCTION__); }))
;
383 int flag = 0;
384 for (k = 0; inputs && k < input_size && !flag; k++)
385 if (inputs[k] >= 0)
386 for (l = 0; l < checkpoint->input_size && !flag; l++)
387 if (checkpoint->inputs[l].d >= 0 && inputs[k] == checkpoint->inputs[l].d)
388 flag = 1;
389 // Input logic is different from output logic. We need to filter out these exec that contains inputs from within the graph.
390 for (k = 0; inputs && k < input_size && flag; k++)
391 if (inputs[k] >= 0 && kh_get(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, inputs[k])kh_get_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, inputs[k])
!= kh_end(newly_created_tensor_symbols)((newly_created_tensor_symbols)->n_buckets))
392 flag = 0;
393 if (flag)
394 ccv_array_push(newly_input_execs, &symbol);
395 flag = 0;
396 const int* outputs = exec_info[idx].outputs;
397 int output_size = exec_info[idx].output_size;
398 for (k = 0; inputs && k < output_size && !flag; k++)
399 if (outputs[k] >= 0)
400 for (l = 0; l < checkpoint->output_size && !flag; l++)
401 if (max_outputs[l].d >= 0 && outputs[k] == max_outputs[l].d)
402 flag = 1;
403 if (flag)
404 ccv_array_push(newly_output_execs, &symbol);
405 }
406 for (j = 0; j < checkpoint->input_size; j++)
407 if (checkpoint->inputs[j].d >= 0)
408 ccv_array_push(parameters, checkpoint->inputs + j);
409 ccv_nnc_symbolic_graph_simplify(graph,
410 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
}, (1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
411 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
}, (1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
412 CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
}, (1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
,
413 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters->rnum,
414 max_outputs, checkpoint->output_size,
415 ccv_array_get(newly_input_execs, 0)((void*)(((char*)((newly_input_execs)->data)) + (size_t)(newly_input_execs
)->rsize * (size_t)(0)))
, newly_input_execs->rnum, ccv_array_get(newly_output_execs, 0)((void*)(((char*)((newly_output_execs)->data)) + (size_t)(
newly_output_execs)->rsize * (size_t)(0)))
, newly_output_execs->rnum);
416 ccv_nnc_graph_exec_symbol_new_hook(graph, build.old_graph_exec_symbol_new_hook, build.old_graph_exec_symbol_new_hook_context, 0);
417 // Need to autogen and redo source / destination.
418 ccv_nnc_graph_exec_symbol_autogen(graph, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, 0)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(0)))
, build.graph_exec_symbols->rnum, 0);
419 ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, 0)((void*)(((char*)((graph->tensor_symbol_info)->data)) +
(size_t)(graph->tensor_symbol_info)->rsize * (size_t)(
0)))
;
420 exec_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0)((void*)(((char*)((graph->exec_symbol_info)->data)) + (
size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)))
;
421 ccv_array_clear(newly_input_execs);
422 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
423 {
424 const int idx = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
)->d;
425 if (idx < 0)
426 continue;
427 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
428 continue;
429 const ccv_nnc_graph_exec_symbol_t symbol = {
430 .graph = graph,
431 .d = idx
432 };
433 const int* inputs = exec_info[idx].inputs;
434 int input_size = exec_info[idx].input_size;
435 // Only go through forward pass.
436 assert(!ccv_nnc_cmd_is_backward(exec_info[idx].cmd))((void) sizeof ((!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)
) ? 1 : 0), __extension__ ({ if (!ccv_nnc_cmd_is_backward(exec_info
[idx].cmd)) ; else __assert_fail ("!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)"
, "ccv_cnnp_model_gradient_checkpointing.c", 436, __extension__
__PRETTY_FUNCTION__); }))
;
437 int flag = 0;
438 for (k = 0; inputs && k < input_size && !flag; k++)
439 if (inputs[k] >= 0)
440 for (l = 0; l < checkpoint->input_size && !flag; l++)
441 if (checkpoint->inputs[l].d >= 0 && inputs[k] == checkpoint->inputs[l].d)
442 flag = 1;
443 for (k = 0; inputs && k < input_size && flag; k++)
444 if (inputs[k] >= 0 && kh_get(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, inputs[k])kh_get_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, inputs[k])
!= kh_end(newly_created_tensor_symbols)((newly_created_tensor_symbols)->n_buckets))
445 flag = 0;
446 if (flag)
447 ccv_array_push(newly_input_execs, &symbol);
448 }
449 // Build a map between old tensor symbols and new tensor symbols.
450 assert(build.tensor_context.tensor_symbols->rnum <= checkpoint->tensor_symbols->rnum)((void) sizeof ((build.tensor_context.tensor_symbols->rnum
<= checkpoint->tensor_symbols->rnum) ? 1 : 0), __extension__
({ if (build.tensor_context.tensor_symbols->rnum <= checkpoint
->tensor_symbols->rnum) ; else __assert_fail ("build.tensor_context.tensor_symbols->rnum <= checkpoint->tensor_symbols->rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 450, __extension__
__PRETTY_FUNCTION__); }))
;
451 // Build a map to potentially map from old input to new input.
452 kh_clear(ccv_cnnp_tensor_symbol_map, symbol_map)kh_clear_ccv_cnnp_tensor_symbol_map(symbol_map);
453 for (j = 0, k = 0; j < build.tensor_context.tensor_symbols->rnum && k < checkpoint->tensor_symbols->rnum;)
454 {
455 const int from_d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(checkpoint->tensor_symbols, k)((void*)(((char*)((checkpoint->tensor_symbols)->data)) +
(size_t)(checkpoint->tensor_symbols)->rsize * (size_t)
(k)))
)->d;
456 if (from_d < 0) // This is removed, move to the next one.
457 {
458 ++j;
459 ++k;
460 continue;
461 }
462 const int to_d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(build.tensor_context.tensor_symbols, j)((void*)(((char*)((build.tensor_context.tensor_symbols)->data
)) + (size_t)(build.tensor_context.tensor_symbols)->rsize *
(size_t)(j)))
)->d;
463 assert(to_d >= 0)((void) sizeof ((to_d >= 0) ? 1 : 0), __extension__ ({ if (
to_d >= 0) ; else __assert_fail ("to_d >= 0", "ccv_cnnp_model_gradient_checkpointing.c"
, 463, __extension__ __PRETTY_FUNCTION__); }))
;
464 int from_flag = kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, from_d)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, from_d
)
!= kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets);
465 int to_flag = kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, to_d)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, to_d
)
!= kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets);
466 if (from_flag)
467 ++k;
468 if (to_flag)
469 ++j;
470 if (from_flag || to_flag)
471 continue;
472 ++k;
473 ++j;
474 // Skip if from_d is outputs.
475 for (l = 0; l < !from_flag && checkpoint->output_size; l++)
476 if (checkpoint->outputs[l].d == from_d)
477 from_flag = 1;
478 if (from_flag)
479 continue;
480 // Skip if to_d is outputs.
481 for (l = 0; l < !to_flag && checkpoint->output_size; l++)
482 if (checkpoint->outputs[l].d == to_d)
483 to_flag = 1;
484 if (to_flag)
485 continue;
486 int ret = 0;
487 khiter_t h = kh_put(ccv_cnnp_tensor_symbol_map, symbol_map, from_d, &ret)kh_put_ccv_cnnp_tensor_symbol_map(symbol_map, from_d, &ret
)
;
488 kh_val(symbol_map, h)((symbol_map)->vals[h]) = to_d;
489 }
490 // Now go over all backward passes to replace inputs with the ones from symbol map. Record these that are used.
491 ccv_array_clear(newly_used_outputs);
492 ccv_array_clear(replaced_backward_execs);
493 for (j = 0; j < visited_backward_execs->rnum; j++)
494 {
495 const int idx = *(int*)ccv_array_get(visited_backward_execs, j)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(j)))
;
496 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
497 continue;
498 assert(idx >= 0)((void) sizeof ((idx >= 0) ? 1 : 0), __extension__ ({ if (
idx >= 0) ; else __assert_fail ("idx >= 0", "ccv_cnnp_model_gradient_checkpointing.c"
, 498, __extension__ __PRETTY_FUNCTION__); }))
;
499 assert(idx < exec_rnum)((void) sizeof ((idx < exec_rnum) ? 1 : 0), __extension__ (
{ if (idx < exec_rnum) ; else __assert_fail ("idx < exec_rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 499, __extension__
__PRETTY_FUNCTION__); }))
;
500 if (!ccv_nnc_cmd_is_backward(exec_info[idx].cmd))
501 continue;
502 for (k = 0; k < exec_info[idx].input_size; k++)
503 if (exec_info[idx].inputs[k] >= 0)
504 {
505 const khiter_t h = kh_get(ccv_cnnp_tensor_symbol_map, symbol_map, exec_info[idx].inputs[k])kh_get_ccv_cnnp_tensor_symbol_map(symbol_map, exec_info[idx].
inputs[k])
;
506 if (h != kh_end(symbol_map)((symbol_map)->n_buckets)) // Replacing it.
507 {
508 int newly_created_output = kh_val(symbol_map, h)((symbol_map)->vals[h]);
509 exec_info[idx].inputs[k] = newly_created_output;
510 ccv_array_add_unique_int(newly_used_outputs, newly_created_output);
511 if (tensor_symbol_info[newly_created_output].alias_ref > 0)
512 {
513 newly_created_output = tensor_symbol_info[newly_created_output].alias_ref - 1;
514 ccv_array_add_unique_int(newly_used_outputs, newly_created_output);
515 }
516 ccv_array_add_unique_int(replaced_backward_execs, idx);
517 }
518 }
519 }
520 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
521 {
522 ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
;
523 if (symbol->d < 0)
524 continue;
525 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
)
)
526 continue;
527 int x, y;
528 for (k = 0; k < replaced_backward_execs->rnum; k++)
529 {
530 const int idx = *(int*)ccv_array_get(replaced_backward_execs, k)((void*)(((char*)((replaced_backward_execs)->data)) + (size_t
)(replaced_backward_execs)->rsize * (size_t)(k)))
;
531 assert(idx >= 0)((void) sizeof ((idx >= 0) ? 1 : 0), __extension__ ({ if (
idx >= 0) ; else __assert_fail ("idx >= 0", "ccv_cnnp_model_gradient_checkpointing.c"
, 531, __extension__ __PRETTY_FUNCTION__); }))
;
532 assert(idx < exec_rnum)((void) sizeof ((idx < exec_rnum) ? 1 : 0), __extension__ (
{ if (idx < exec_rnum) ; else __assert_fail ("idx < exec_rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 532, __extension__
__PRETTY_FUNCTION__); }))
;
533 assert(ccv_nnc_cmd_is_backward(exec_info[idx].cmd))((void) sizeof ((ccv_nnc_cmd_is_backward(exec_info[idx].cmd))
? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_is_backward(exec_info
[idx].cmd)) ; else __assert_fail ("ccv_nnc_cmd_is_backward(exec_info[idx].cmd)"
, "ccv_cnnp_model_gradient_checkpointing.c", 533, __extension__
__PRETTY_FUNCTION__); }))
;
534 int flag = 0;
535 for (x = 0; !flag && x < exec_info[idx].input_size; x++)
536 {
537 int x_d = exec_info[idx].inputs[x];
538 if (x_d < 0)
539 continue;
540 if (tensor_symbol_info[x_d].alias_ref > 0)
541 x_d = tensor_symbol_info[x_d].alias_ref - 1;
542 for (y = 0; !flag && y < exec_info[symbol->d].output_size; y++)
543 {
544 int y_d = exec_info[symbol->d].outputs[y];
545 if (y_d < 0)
546 continue;
547 if (tensor_symbol_info[y_d].alias_ref > 0)
548 y_d = tensor_symbol_info[y_d].alias_ref - 1;
549 if (x_d == y_d)
550 flag = 1;
551 }
552 }
553 if (flag)
554 ccv_nnc_graph_exec_symbol_concat(graph, *symbol, (ccv_nnc_graph_exec_symbol_t){
555 .graph = graph,
556 .d = idx
557 });
558 }
559 }
560 // Find parents to visited_backward_execs, and use that as the starting point of all newly added graph_exec_symbols. Use the visited backward execs as the source, use all its parents as destination, go through with graph visit.
561 ccv_sparse_matrix_t* const exec_dep = ccv_sparse_matrix_new(graph->exec_symbol_info->rnum, graph->exec_symbol_info->rnum, CCV_8U | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
562#define for_block(x, val) \
563 do { \
564 if (((uint8_t*)val)[0] != 0) \
565 ccv_array_push(buf, &x); \
566 } while (0)
567 const uint8_t one = 1;
568 // Now go from outputs to inputs, unmark visited ones.
569 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
570 if (idx < exec_rnum && !CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags)((node->flags) & CCV_NNC_GRAPH_EXEC_DEAD) && maskbit[idx >> 5] & (1u << (idx & 0x1f)))
571 {
572 ccv_array_clear(buf);
573 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
574 if (vector)
575 CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
{ int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
= (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
= (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
(_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
= { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0)
;
576 if (node->outgoings && node->outgoings->rnum > 0)
577 {
578 ccv_array_t* const outgoings = node->outgoings;
579 for (k = 0; k < outgoings->rnum; k++)
580 {
581 const int outgoing_d = *(int*)ccv_array_get(outgoings, k)((void*)(((char*)((outgoings)->data)) + (size_t)(outgoings
)->rsize * (size_t)(k)))
;
582 if (outgoing_d >= exec_rnum)
583 continue;
584 int l;
585 // We cannot avoid the ones that visited, because these may not contain all the deps.
586 ccv_set_sparse_matrix_cell(exec_dep, outgoing_d, idx, &one);
587 for (l = 0; l < buf->rnum; l++)
588 ccv_set_sparse_matrix_cell(exec_dep, outgoing_d, *(int*)ccv_array_get(buf, l)((void*)(((char*)((buf)->data)) + (size_t)(buf)->rsize *
(size_t)(l)))
, &one);
589 }
590 }
591 }
592 } ccv_nnc_graph_visit_endfor} }
593 // Now go from outputs to inputs, unmark visited ones.
594 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
595 if (idx < exec_rnum)
596 maskbit[idx >> 5] &= ~(1u << (idx & 0x1f));
597 } ccv_nnc_graph_visit_endfor} }
598 ccv_nnc_graph_visit_free(visit);
599#undef for_block
600 // Go through visited backward execs, remove the ones that has no dependency on any replaced backward execs.
601 for (j = 0; j < visited_backward_execs->rnum;)
602 {
603 const int idx = *(int*)ccv_array_get(visited_backward_execs, j)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(j)))
;
604 if (ccv_array_contain_int(replaced_backward_execs, idx))
605 {
606 ++j;
607 continue;
608 }
609 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
610 int flag = 0;
611#define for_block(x, val) \
612 do { \
613 if (((uint8_t*)val)[0] != 0) \
614 if (ccv_array_contain_int(replaced_backward_execs, x)) \
615 flag = 1; \
616 } while (0)
617 if (vector)
618 CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
{ int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
= (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
= (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
(_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
= { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0)
;
619#undef for_block
620 if (!flag)
621 {
622 if (j < visited_backward_execs->rnum - 1)
623 *(int*)ccv_array_get(visited_backward_execs, j)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(j)))
= *(int*)ccv_array_get(visited_backward_execs, visited_backward_execs->rnum - 1)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(visited_backward_execs
->rnum - 1)))
;
624 --visited_backward_execs->rnum;
625 continue;
626 }
627 ++j;
628 }
629 // Now go through all replaced_backward_execs to find the ones has no dependencies in visited_backward_execs.
630 for (j = 0; j < replaced_backward_execs->rnum; j++)
631 {
632 const int idx = *(int*)ccv_array_get(replaced_backward_execs, j)((void*)(((char*)((replaced_backward_execs)->data)) + (size_t
)(replaced_backward_execs)->rsize * (size_t)(j)))
;
633 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
634 int flag = 0;
635#define for_block(x, val) \
636 do { \
637 if (((uint8_t*)val)[0] != 0) \
638 if (ccv_array_contain_int(visited_backward_execs, x)) \
639 flag = 1; \
640 } while (0)
641 if (vector)
642 CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
{ do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
-4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
(_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
* _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
_d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
{ int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
= (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
= (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
(_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
= { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0)
;
643#undef for_block
644 // If this one has no parents that is within the visited_backward_execs, it is a good place for us to add all its parents as dependency for input_execs.
645 if (!flag)
646 {
647 assert(idx < exec_rnum)((void) sizeof ((idx < exec_rnum) ? 1 : 0), __extension__ (
{ if (idx < exec_rnum) ; else __assert_fail ("idx < exec_rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 647, __extension__
__PRETTY_FUNCTION__); }))
;
648 ccv_array_t* const outgoings = reversed_nodes[idx].outgoings;
649 assert(outgoings)((void) sizeof ((outgoings) ? 1 : 0), __extension__ ({ if (outgoings
) ; else __assert_fail ("outgoings", "ccv_cnnp_model_gradient_checkpointing.c"
, 649, __extension__ __PRETTY_FUNCTION__); }))
;
650 for (k = 0; k < outgoings->rnum; k++)
651 {
652 const int d = *(int*)ccv_array_get(outgoings, k)((void*)(((char*)((outgoings)->data)) + (size_t)(outgoings
)->rsize * (size_t)(k)))
;
653 for (l = 0; l < newly_input_execs->rnum; l++)
654 {
655 ccv_nnc_graph_exec_symbol_concat(graph, (ccv_nnc_graph_exec_symbol_t){
656 .graph = graph,
657 .d = d
658 }, *(ccv_nnc_graph_exec_symbol_t*)ccv_array_get(newly_input_execs, l)((void*)(((char*)((newly_input_execs)->data)) + (size_t)(newly_input_execs
)->rsize * (size_t)(l)))
);
659 }
660 }
661 }
662 }
663 ccv_matrix_free(exec_dep);
664 // Go through all exec, free ones that doesn't have output used.
665 // Reuse this array because it is not useful any more.
666 ccv_array_t* forward_pass_inputs = visited_backward_execs;
667 int any_deleted;
668 do {
669 // Build a map of still active inputs.
670 ccv_array_clear(forward_pass_inputs);
671 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
672 {
673 ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
;
674 if (symbol->d < 0)
675 continue;
676 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
)
)
677 continue;
678 int* const inputs = exec_info[symbol->d].inputs;
679 const int input_size = exec_info[symbol->d].input_size;
680 for (k = 0; k < input_size; k++)
681 {
682 int d = inputs[k];
683 if (d < 0)
684 continue;
685 ccv_array_add_unique_int(forward_pass_inputs, d);
686 if (tensor_symbol_info[d].alias_ref > 0)
687 {
688 d = tensor_symbol_info[d].alias_ref - 1;
689 ccv_array_add_unique_int(forward_pass_inputs, d);
690 }
691 }
692 }
693 any_deleted = 0;
694 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
695 {
696 ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
;
697 if (symbol->d < 0)
698 continue;
699 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
)
)
700 continue;
701 int* const outputs = exec_info[symbol->d].outputs;
702 const int output_size = exec_info[symbol->d].output_size;
703 int flag = 0;
704 for (k = 0; !flag && k < output_size; k++)
705 {
706 int d = outputs[k];
707 if (d < 0)
708 continue;
709 flag = ccv_array_contain_int(newly_used_outputs, d) || ccv_array_contain_int(forward_pass_inputs, d);
710 if (!flag && tensor_symbol_info[d].alias_ref > 0)
711 {
712 d = tensor_symbol_info[d].alias_ref - 1;
713 flag = ccv_array_contain_int(newly_used_outputs, d) || ccv_array_contain_int(forward_pass_inputs, d);
714 }
715 }
716 if (flag)
717 continue;
718 ccv_nnc_graph_exec_symbol_free(graph, *symbol);
719 symbol->d = -1;
720 symbol->graph = 0;
721 any_deleted = 1;
722 }
723 } while (any_deleted);
724 ccv_array_clear(forward_pass_inputs);
725 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
726 {
727 ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
;
728 if (symbol->d < 0)
729 continue;
730 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
)
)
731 continue;
732 int* const inputs = exec_info[symbol->d].inputs;
733 const int input_size = exec_info[symbol->d].input_size;
734 for (k = 0; k < input_size; k++)
735 {
736 if (inputs[k] < 0)
737 continue;
738 ccv_array_add_unique_int(forward_pass_inputs, inputs[k]);
739 if (tensor_symbol_info[inputs[k]].alias_ref > 0)
740 ccv_array_add_unique_int(forward_pass_inputs, tensor_symbol_info[inputs[k]].alias_ref - 1);
741 }
742 int* const outputs = exec_info[symbol->d].outputs;
743 const int output_size = exec_info[symbol->d].output_size;
744 for (k = 0; k < output_size; k++)
745 {
746 if (outputs[k] < 0)
747 continue;
748 ccv_array_add_unique_int(forward_pass_inputs, outputs[k]);
749 if (tensor_symbol_info[outputs[k]].alias_ref > 0)
750 ccv_array_add_unique_int(forward_pass_inputs, tensor_symbol_info[outputs[k]].alias_ref - 1);
751 }
752 }
753 // Free unused tensor symbols.
754 for (j = 0; j < build.all_tensor_symbols->rnum; j++)
755 {
756 const ccv_nnc_tensor_symbol_t* symbol = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(build.all_tensor_symbols, j)((void*)(((char*)((build.all_tensor_symbols)->data)) + (size_t
)(build.all_tensor_symbols)->rsize * (size_t)(j)))
);
757 if (ccv_array_contain_int(newly_used_outputs, symbol->d) || ccv_array_contain_int(forward_pass_inputs, symbol->d))
758 continue;
759 if (tensor_symbol_info[symbol->d].alias_ref > 0)
760 {
761 const int d = tensor_symbol_info[symbol->d].alias_ref - 1;
762 if (ccv_array_contain_int(newly_used_outputs, d) || ccv_array_contain_int(forward_pass_inputs, d))
763 continue;
764 }
765 ccv_nnc_tensor_symbol_free(graph, *symbol);
766 }
767 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
768 {
769 ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
;
770 if (symbol->d < 0)
771 continue;
772 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
)
)
773 continue;
774 ccv_nnc_graph_exec_symbol_set_flags(graph, *symbol, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
775 }
776 // Free these newly created execs and tensor symbols.
777 ccv_array_free(build.tensor_context.tensor_symbols);
778 ccv_array_free(build.graph_exec_symbols);
779 ccv_array_free(build.all_tensor_symbols);
780 }
781 kh_destroy(ccv_cnnp_tensor_symbol_map, symbol_map)kh_destroy_ccv_cnnp_tensor_symbol_map(symbol_map);
782 kh_destroy(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols)kh_destroy_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
)
;
783 kh_destroy(ccv_cnnp_tensor_symbol_set, parameters_or_internals)kh_destroy_ccv_cnnp_tensor_symbol_set(parameters_or_internals
)
;
784 ccfreefree(max_outputs);
785 ccv_array_free(buf);
786 ccv_array_free(newly_used_outputs);
787 ccv_array_free(parameters);
788 ccv_array_free(parameter_ids);
789 ccv_array_free(parameter_trainables);
790 ccv_array_free(internals);
791 ccv_array_free(internal_ids);
792 ccfreefree(maskbit);
793 ccv_array_free(input_gradient_execs);
794 ccv_array_free(output_gradient_execs);
795 ccv_array_free(input_execs);
796 ccv_array_free(output_execs);
797 ccv_array_free(replaced_backward_execs);
798 ccv_array_free(visited_backward_execs);
799 for (i = 0; i < exec_rnum; i++)
800 if (reversed_nodes[i].outgoings)
801 ccv_array_free(reversed_nodes[i].outgoings);
802 ccfreefree(reversed_nodes);
803}