ccv_cnnp_model_gradient_checkpointing.c

Bug Summary

File:	nnc/ccv_cnnp_model_gradient_checkpointing.c
Warning:	line 75, column 1 Assigned value is garbage or undefined
Annotated Source Code

Press '?' to see keyboard shortcuts
Show analyzer invocation
clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model_gradient_checkpointing.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-11-15-112641-667043-1 -x c ccv_cnnp_model_gradient_checkpointing.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6// This can be removed once we organized ccv_cnnp_apply_gradient_checkpoints better.
7#include "_ccv_nnc_symbolic_graph.h"
8 
9void ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_symbolic_graph_t* const graph)
10{
11	ccv_array_t* const gradient_checkpoints = compiled_data->gradient_checkpoints;
12	if (!gradient_checkpoints || gradient_checkpoints->rnum == 0) // No saved gradient checkpoints, this is an easy way out.
13		return;
14	int i, j;
15	const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (const ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, 0)((void*)(((char*)((graph->tensor_symbol_info)->data)) +
 (size_t)(graph->tensor_symbol_info)->rsize * (size_t)(
0)));
16	// Go through to check if any tensors that supposes in this map is removed.
17	for (i = 0; i < gradient_checkpoints->rnum; i++)
18	{
19		ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(gradient_checkpoints, i)((void*)(((char*)((gradient_checkpoints)->data)) + (size_t
)(gradient_checkpoints)->rsize * (size_t)(i)));
20		for (j = 0; j < checkpoint->tensor_symbols->rnum; j++)
21		{
22			ccv_nnc_tensor_symbol_t* const symbol = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(checkpoint->tensor_symbols, j)((void*)(((char*)((checkpoint->tensor_symbols)->data)) +
 (size_t)(checkpoint->tensor_symbols)->rsize * (size_t)
(j))));
23			if (symbol->d >= 0 && symbol->d < graph->tensor_symbol_info->rnum)
24				// If it is dead, we need to remove this symbol.
25				if (CCV_NNC_TENSOR_SYMBOL_IS_DEAD(tensor_symbol_info[symbol->d].flags)((tensor_symbol_info[symbol->d].flags) & CCV_NNC_TENSOR_SYMBOL_DEAD
))
26				{
27					symbol->d = -1;
28					symbol->graph = 0;
29				}
30		}
31	}
32}
33 
34typedef struct {
35	ccv_array_t* outgoings;
36} ccv_nnc_graph_exec_symbol_reverse_t;
37 
38typedef struct {
39	ccv_cnnp_model_gradient_checkpoint_build_context_t tensor_context;
40	ccv_array_t* graph_exec_symbols;
41	ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
42	void* old_graph_exec_symbol_new_hook_context;
43	ccv_array_t* all_tensor_symbols;
44} ccv_cnnp_gradient_checkpoint_build_t;
45 
46static void _ccv_cnnp_gradient_checkpoint_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
47{
48	ccv_cnnp_gradient_checkpoint_build_t* const build_context = (ccv_cnnp_gradient_checkpoint_build_t*)context;
49	if (build_context->tensor_context.record)
50		ccv_array_push(build_context->tensor_context.tensor_symbols, &symbol);
51	ccv_array_push(build_context->all_tensor_symbols, &symbol);
52	if (build_context->tensor_context.old_tensor_symbol_new_hook)
53		build_context->tensor_context.old_tensor_symbol_new_hook(build_context->tensor_context.old_tensor_symbol_new_hook_context, symbol, info, name);
54}
55 
56static void _ccv_cnnp_gradient_checkpoint_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
57{
58	ccv_cnnp_gradient_checkpoint_build_t* const build_context = (ccv_cnnp_gradient_checkpoint_build_t*)context;
59	if (build_context->tensor_context.record)
60		ccv_array_push(build_context->tensor_context.tensor_symbols, &symbol);
61	ccv_array_push(build_context->all_tensor_symbols, &symbol);
62	if (build_context->tensor_context.old_tensor_symbol_alias_new_hook)
63		build_context->tensor_context.old_tensor_symbol_alias_new_hook(build_context->tensor_context.old_tensor_symbol_alias_new_hook_context, symbol, from_symbol, ofs, inc, info, name);
64}
65 
66static void _ccv_cnnp_model_gradient_checkpoint_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
67{
68	ccv_cnnp_gradient_checkpoint_build_t* const build = (ccv_cnnp_gradient_checkpoint_build_t*)context;
69	ccv_array_push(build->graph_exec_symbols, &symbol);
70	if (build->old_graph_exec_symbol_new_hook)
71		build->old_graph_exec_symbol_new_hook(build->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
72}
73 
74KHASH_MAP_INIT_INT(ccv_cnnp_tensor_symbol_map, int)typedef struct kh_ccv_cnnp_tensor_symbol_map_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; khint32_t *
keys; int *vals; } kh_ccv_cnnp_tensor_symbol_map_t; static inline
 __attribute__ ((__unused__)) kh_ccv_cnnp_tensor_symbol_map_t
 *kh_init_ccv_cnnp_tensor_symbol_map(void) { return (kh_ccv_cnnp_tensor_symbol_map_t
*)calloc(1,sizeof(kh_ccv_cnnp_tensor_symbol_map_t)); } static
 inline __attribute__ ((__unused__)) void kh_destroy_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h) { if (h) { free((void *)
h->keys); free(h->flags); free((void *)h->vals); free
(h); } } static inline __attribute__ ((__unused__)) void kh_clear_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h) { if (h && h->
flags) { memset(h->flags, 0xaa, ((h->n_buckets) < 16
? 1 : (h->n_buckets)>>4) * sizeof(khint32_t)); h->
size = h->n_occupied = 0; } } static inline __attribute__ (
(__unused__)) khint_t kh_get_ccv_cnnp_tensor_symbol_map(const
 kh_ccv_cnnp_tensor_symbol_map_t *h, khint32_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = (khint32_t)(key); i = k & mask; last =
 i; while (!((h->flags[i>>4]>>((i&0xfU)<<
1))&2) && (((h->flags[i>>4]>>((i&
0xfU)<<1))&1) || !((h->keys[i]) == (key)))) { i =
 (i + (++step)) & mask; if (i == last) return h->n_buckets
; } return ((h->flags[i>>4]>>((i&0xfU)<<
1))&3)? h->n_buckets : i; } else return 0; } static inline
 __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h, khint_t new_n_buckets) {
 khint32_t *new_flags = 0; khint_t j = 1; { (--(new_n_buckets
), (new_n_buckets)|=(new_n_buckets)>>1, (new_n_buckets)
|=(new_n_buckets)>>2, (new_n_buckets)|=(new_n_buckets)>>
4, (new_n_buckets)|=(new_n_buckets)>>8, (new_n_buckets)
|=(new_n_buckets)>>16, ++(new_n_buckets)); if (new_n_buckets
 < 4) new_n_buckets = 4; if (h->size >= (khint_t)(new_n_buckets
 * __ac_HASH_UPPER + 0.5)) j = 0; else { new_flags = (khint32_t
*)malloc(((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (!new_flags) return -1; memset(new_flags
, 0xaa, ((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (h->n_buckets < new_n_buckets
) { khint32_t *new_keys = (khint32_t*)realloc((void *)h->keys
,new_n_buckets * sizeof(khint32_t)); if (!new_keys) { free(new_flags
); return -1; } h->keys = new_keys; if (1) { int *new_vals
 = (int*)realloc((void *)h->vals,new_n_buckets * sizeof(int
)); if (!new_vals) { free(new_flags); return -1; } h->vals
 = new_vals; } } } } if (j) { for (j = 0; j != h->n_buckets
; ++j) { if (((h->flags[j>>4]>>((j&0xfU)<<
1))&3) == 0) { khint32_t key = h->keys[j]; int val; khint_t
 new_mask; new_mask = new_n_buckets - 1; if (1) val = h->vals
[j]; (h->flags[j>>4]|=1ul<<((j&0xfU)<<
1)); while (1) { khint_t k, i, step = 0; k = (khint32_t)(key)
; i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { khint32_t
 tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
 (h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
 = (khint32_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(khint32_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
 = h->size; h->upper_bound = (khint_t)(h->n_buckets *
 __ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
 ((__unused__)) khint_t kh_put_ccv_cnnp_tensor_symbol_map(kh_ccv_cnnp_tensor_symbol_map_t
 *h, khint32_t key, int *ret) { khint_t x; if (h->n_occupied
 >= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_tensor_symbol_map(h, h->
n_buckets - 1) < 0) { *ret = -1; return h->n_buckets; }
 } else if (kh_resize_ccv_cnnp_tensor_symbol_map(h, h->n_buckets
 + 1) < 0) { *ret = -1; return h->n_buckets; } } { khint_t
 k, i, site, last, mask = h->n_buckets - 1, step = 0; x = site
 = h->n_buckets; k = (khint32_t)(key); i = k & mask; if
 (((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !((h->
keys[i]) == (key)))) { if (((h->flags[i>>4]>>(
(i&0xfU)<<1))&1)) site = i; i = (i + (++step)) &
 mask; if (i == last) { x = site; break; } } if (x == h->n_buckets
) { if (((h->flags[i>>4]>>((i&0xfU)<<
1))&2) && site != h->n_buckets) x = site; else
 x = i; } } } if (((h->flags[x>>4]>>((x&0xfU
)<<1))&2)) { h->keys[x] = key; (h->flags[x>>
4]&=~(3ul<<((x&0xfU)<<1))); ++h->size;
 ++h->n_occupied; *ret = 1; } else if (((h->flags[x>>
4]>>((x&0xfU)<<1))&1)) { h->keys[x] = key
; (h->flags[x>>4]&=~(3ul<<((x&0xfU)<<
1))); ++h->size; *ret = 2; } else *ret = 0; return x; } static
 inline __attribute__ ((__unused__)) void kh_del_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h, khint_t x) { if (x != h->
n_buckets && !((h->flags[x>>4]>>((x&
0xfU)<<1))&3)) { (h->flags[x>>4]|=1ul<<
((x&0xfU)<<1)); --h->size; } }
75KHASH_SET_INIT_INT(ccv_cnnp_tensor_symbol_set)typedef struct kh_ccv_cnnp_tensor_symbol_set_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; khint32_t *
keys; char *vals; } kh_ccv_cnnp_tensor_symbol_set_t; static inline
 __attribute__ ((__unused__)) kh_ccv_cnnp_tensor_symbol_set_t
 *kh_init_ccv_cnnp_tensor_symbol_set(void) { return (kh_ccv_cnnp_tensor_symbol_set_t
*)calloc(1,sizeof(kh_ccv_cnnp_tensor_symbol_set_t)); } static
 inline __attribute__ ((__unused__)) void kh_destroy_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h) { if (h) { free((void *)
h->keys); free(h->flags); free((void *)h->vals); free
(h); } } static inline __attribute__ ((__unused__)) void kh_clear_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h) { if (h && h->
flags) { memset(h->flags, 0xaa, ((h->n_buckets) < 16
? 1 : (h->n_buckets)>>4) * sizeof(khint32_t)); h->
size = h->n_occupied = 0; } } static inline __attribute__ (
(__unused__)) khint_t kh_get_ccv_cnnp_tensor_symbol_set(const
 kh_ccv_cnnp_tensor_symbol_set_t *h, khint32_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = (khint32_t)(key); i = k & mask; last =
 i; while (!((h->flags[i>>4]>>((i&0xfU)<<
1))&2) && (((h->flags[i>>4]>>((i&
0xfU)<<1))&1) || !((h->keys[i]) == (key)))) { i =
 (i + (++step)) & mask; if (i == last) return h->n_buckets
; } return ((h->flags[i>>4]>>((i&0xfU)<<
1))&3)? h->n_buckets : i; } else return 0; } static inline
 __attribute__ ((__unused__)) int kh_resize_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h, khint_t new_n_buckets) {
 khint32_t *new_flags = 0; khint_t j = 1; { (--(new_n_buckets
), (new_n_buckets)|=(new_n_buckets)>>1, (new_n_buckets)
|=(new_n_buckets)>>2, (new_n_buckets)|=(new_n_buckets)>>
4, (new_n_buckets)|=(new_n_buckets)>>8, (new_n_buckets)
|=(new_n_buckets)>>16, ++(new_n_buckets)); if (new_n_buckets
 < 4) new_n_buckets = 4; if (h->size >= (khint_t)(new_n_buckets
 * __ac_HASH_UPPER + 0.5)) j = 0; else { new_flags = (khint32_t
*)malloc(((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (!new_flags) return -1; memset(new_flags
, 0xaa, ((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (h->n_buckets < new_n_buckets
) { khint32_t *new_keys = (khint32_t*)realloc((void *)h->keys
,new_n_buckets * sizeof(khint32_t)); if (!new_keys) { free(new_flags
); return -1; } h->keys = new_keys; if (0) { char *new_vals
 = (char*)realloc((void *)h->vals,new_n_buckets * sizeof(char
)); if (!new_vals) { free(new_flags); return -1; } h->vals
 = new_vals; } } } } if (j) { for (j = 0; j != h->n_buckets
; ++j) { if (((h->flags[j>>4]>>((j&0xfU)<<
1))&3) == 0) { khint32_t key = h->keys[j]; char val; khint_t
 new_mask; new_mask = new_n_buckets - 1; if (0) val = h->vals
[j]; (h->flags[j>>4]|=1ul<<((j&0xfU)<<
1)); while (1) { khint_t k, i, step = 0; k = (khint32_t)(key)
; i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { khint32_t
 tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (0
) { char tmp = h->vals[i]; h->vals[i] = val; val = tmp;
 } (h->flags[i>>4]|=1ul<<((i&0xfU)<<
1)); } else { h->keys[i] = key; if (0) h->vals[i] = val
; break; } } } } if (h->n_buckets > new_n_buckets) { h->
keys = (khint32_t*)realloc((void *)h->keys,new_n_buckets *
 sizeof(khint32_t)); if (0) h->vals = (char*)realloc((void
 *)h->vals,new_n_buckets * sizeof(char)); } free(h->flags
); h->flags = new_flags; h->n_buckets = new_n_buckets; h
->n_occupied = h->size; h->upper_bound = (khint_t)(h
->n_buckets * __ac_HASH_UPPER + 0.5); } return 0; } static
 inline __attribute__ ((__unused__)) khint_t kh_put_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h, khint32_t key, int *ret)
 { khint_t x; if (h->n_occupied >= h->upper_bound) {
 if (h->n_buckets > (h->size<<1)) { if (kh_resize_ccv_cnnp_tensor_symbol_set
(h, h->n_buckets - 1) < 0) { *ret = -1; return h->n_buckets
; } } else if (kh_resize_ccv_cnnp_tensor_symbol_set(h, h->
n_buckets + 1) < 0) { *ret = -1; return h->n_buckets; }
 } { khint_t k, i, site, last, mask = h->n_buckets - 1, step
 = 0; x = site = h->n_buckets; k = (khint32_t)(key); i = k
 & mask; if (((h->flags[i>>4]>>((i&0xfU
)<<1))&2)) x = i; else { last = i; while (!((h->
flags[i>>4]>>((i&0xfU)<<1))&2) &&
 (((h->flags[i>>4]>>((i&0xfU)<<1))&
1) || !((h->keys[i]) == (key)))) { if (((h->flags[i>>
4]>>((i&0xfU)<<1))&1)) site = i; i = (i +
 (++step)) & mask; if (i == last) { x = site; break; } } if
 (x == h->n_buckets) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&2) && site != h->n_buckets
) x = site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
 = 0; return x; } static inline __attribute__ ((__unused__)) void
 kh_del_ccv_cnnp_tensor_symbol_set(kh_ccv_cnnp_tensor_symbol_set_t
 *h, khint_t x) { if (x != h->n_buckets && !((h->
flags[x>>4]>>((x&0xfU)<<1))&3)) { (
h->flags[x>>4]|=1ul<<((x&0xfU)<<1));
 --h->size; } }
11
←
Taking true branch→
12
←
Taking false branch→
13
←
Calling 'kh_resize_ccv_cnnp_tensor_symbol_set'→
14
←
Taking true branch→
15
←
Assuming the condition is false→
16
←
Taking false branch→
17
←
'?' condition is true→
18
←
Assuming 'new_flags' is non-null→
19
←
Taking false branch→
20
←
'?' condition is true→
21
←
Taking true branch→
22
←
Storing uninitialized value→
23
←
Assuming 'new_keys' is non-null→
24
←
Taking false branch→
25
←
Taking false branch→
26
←
Taking true branch→
27
←
Loop condition is false. Execution continues on line 75→
28
←
Taking false branch→
29
←
Returning from 'kh_resize_ccv_cnnp_tensor_symbol_set'→
30
←
Taking false branch→
31
←
Assuming right operand of bit shift is less than 32→
32
←
Assuming the condition is true→
33
←
Taking true branch→
34
←
Assuming the condition is false→
35
←
Taking false branch→
36
←
Assuming the condition is false→
37
←
Taking false branch→
42
←
Assuming field 'n_occupied' is >= field 'upper_bound'→
43
←
Taking true branch→
44
←
Taking true branch→
45
←
Calling 'kh_resize_ccv_cnnp_tensor_symbol_set'→
46
←
Taking false branch→
47
←
Assuming the condition is false→
48
←
Taking false branch→
49
←
'?' condition is true→
50
←
Assuming 'new_flags' is non-null→
51
←
Taking false branch→
52
←
'?' condition is true→
53
←
Taking false branch→
54
←
Taking true branch→
55
←
The value 0 is assigned to 'j'→
56
←
Loop condition is true.  Entering loop body→
57
←
Assuming the condition is true→
58
←
Taking true branch→
59
←
Assigned value is garbage or undefined
76 
77void ccv_cnnp_model_apply_gradient_checkpoints(ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_symbolic_graph_t* const graph)
78{
79	ccv_array_t* const gradient_checkpoints = compiled_data->gradient_checkpoints;
80	if (!gradient_checkpoints || gradient_checkpoints->rnum == 0) // No saved gradient checkpoints, this is an easy way out.
1
Assuming 'gradient_checkpoints' is non-null→
2
←
Assuming field 'rnum' is not equal to 0→
3
←
Taking false branch→
81		return;
82	// Otherwise, for each gradient checkpoint, there are 3 steps:
83	// 1. Find currently, what execs exists from inputs to outputs.
84	// 2. Find execs that generates the outputs, and their corresponding backward execs.
85	// 3. Find all backward execs flow from outputs back to inputs.
86	// 4. Generate new ops by calling build again with old inputs, record all new tensors / execs.
87	// 5. Replace inputs in backward execs with the new tensors.
88	// 6. Hook the execs takes inputs with edge from parents of backward execs in step 2.
89	// 7. Delete newly generated execs that has no use (i.e. its outputs are not used by backward pass).
90	// 8. Mark all new execs with DISABLE_OPT to avoid common sub-expression elimination pass.
91	int i, j, k, l;
92	ccv_array_t* input_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
93	ccv_array_t* output_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
94	ccv_array_t* input_gradient_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
95	ccv_array_t* output_gradient_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
96	ccv_array_t* visited_backward_execs = ccv_array_new(sizeof(int), 0, 0);
97	ccv_array_t* replaced_backward_execs = ccv_array_new(sizeof(int), 0, 0);
98	const int exec_rnum = graph->exec_symbol_info->rnum;
99	ccv_nnc_graph_exec_symbol_reverse_t* const reversed_nodes = cccalloccalloc(exec_rnum, sizeof(ccv_nnc_graph_exec_symbol_reverse_t));
100	for (i = 0; i < exec_rnum; i++)
4
←
Assuming 'i' is >= 'exec_rnum'→
5
←
Loop condition is false. Execution continues on line 116→
101	{
102		const int* tos = 0;
103		int to_size = 0;
104		ccv_nnc_graph_exec_symbol_to(graph, (ccv_nnc_graph_exec_symbol_t){
105			.graph = graph,
106			.d = i
107		}, &tos, &to_size);
108		if (tos)
109			for (j = 0; j < to_size; j++)
110			{
111				if (!reversed_nodes[tos[j]].outgoings)
112					reversed_nodes[tos[j]].outgoings = ccv_array_new(sizeof(int), 1, 0);
113				ccv_array_add_unique_int(reversed_nodes[tos[j]].outgoings, i);
114			}
115	}
116	uint32_t* const maskbit = cccalloccalloc((exec_rnum + 31) >> 5, sizeof(uint32_t));
117	// Temporary for build_data.
118	ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
119	ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
120	ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
121	ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
122	ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
123	ccv_array_t* const buf = ccv_array_new(sizeof(int), 0, 0);
124	int max_output_size = 0;
125	for (i = 0; i < gradient_checkpoints->rnum; i++)
6
←
Assuming 'i' is >= field 'rnum'→
7
←
Loop condition is false. Execution continues on line 130→
126	{
127		ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(gradient_checkpoints, i)((void*)(((char*)((gradient_checkpoints)->data)) + (size_t
)(gradient_checkpoints)->rsize * (size_t)(i)));
128		max_output_size = ccv_max(checkpoint->output_size, max_output_size)({ typeof (checkpoint->output_size) _a = (checkpoint->output_size
); typeof (max_output_size) _b = (max_output_size); (_a > _b
) ? _a : _b; });
129	}
130	ccv_nnc_tensor_symbol_t* max_outputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * max_output_size);
131	ccv_array_t* newly_used_outputs = ccv_array_new(sizeof(int), 0, 0);
132	khash_t(ccv_cnnp_tensor_symbol_set)kh_ccv_cnnp_tensor_symbol_set_t* const parameters_or_internals = kh_init(ccv_cnnp_tensor_symbol_set)kh_init_ccv_cnnp_tensor_symbol_set();
133	for (i = 0; i < compiled_data->parameters->rnum; i++)
8
←
Assuming 'i' is < field 'rnum'→
9
←
Loop condition is true.  Entering loop body→
39
←
Assuming 'i' is < field 'rnum'→
40
←
Loop condition is true.  Entering loop body→
134	{
135		const ccv_nnc_tensor_symbol_t* const symbol = (const ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
 (size_t)(compiled_data->parameters)->rsize * (size_t)(
i)));
136		int ret;
137		kh_put(ccv_cnnp_tensor_symbol_set, parameters_or_internals, symbol->d, &ret)kh_put_ccv_cnnp_tensor_symbol_set(parameters_or_internals, symbol
->d, &ret);
10
←
Calling 'kh_put_ccv_cnnp_tensor_symbol_set'→
38
←
Returning from 'kh_put_ccv_cnnp_tensor_symbol_set'→
41
←
Calling 'kh_put_ccv_cnnp_tensor_symbol_set'→
138	}
139	for (i = 0; i < compiled_data->internals->rnum; i++)
140	{
141		const ccv_nnc_tensor_symbol_t* const symbol = (const ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
 (size_t)(compiled_data->parameters)->rsize * (size_t)(
i)));
142		int ret;
143		kh_put(ccv_cnnp_tensor_symbol_set, parameters_or_internals, symbol->d, &ret)kh_put_ccv_cnnp_tensor_symbol_set(parameters_or_internals, symbol
->d, &ret);
144	}
145	khash_t(ccv_cnnp_tensor_symbol_set)kh_ccv_cnnp_tensor_symbol_set_t* const newly_created_tensor_symbols = kh_init(ccv_cnnp_tensor_symbol_set)kh_init_ccv_cnnp_tensor_symbol_set();
146	khash_t(ccv_cnnp_tensor_symbol_map)kh_ccv_cnnp_tensor_symbol_map_t* symbol_map = kh_init(ccv_cnnp_tensor_symbol_map)kh_init_ccv_cnnp_tensor_symbol_map();
147	for (i = 0; i < gradient_checkpoints->rnum; i++)
148	{
149		ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(gradient_checkpoints, i)((void*)(((char*)((gradient_checkpoints)->data)) + (size_t
)(gradient_checkpoints)->rsize * (size_t)(i)));
150		kh_clear(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols)kh_clear_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
);
151		for (j = 0; j < checkpoint->tensor_symbols->rnum; j++)
152		{
153			const int idx = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(checkpoint->tensor_symbols, j)((void*)(((char*)((checkpoint->tensor_symbols)->data)) +
 (size_t)(checkpoint->tensor_symbols)->rsize * (size_t)
(j))))->d;
154			if (idx < 0)
155				continue;
156			// Skip parameters or internals.
157			if (kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, idx)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, idx
) != kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets))
158				continue;
159			int ret;
160			kh_put(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, idx, &ret)kh_put_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, idx, &ret);
161		}
162		ccv_array_clear(input_execs);
163		ccv_array_clear(output_execs);
164		ccv_nnc_graph_exec_symbol_info_t* exec_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0)((void*)(((char*)((graph->exec_symbol_info)->data)) + (
size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)));
165		for (j = 0; j < exec_rnum; j++)
166		{
167			if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[j].flags)((exec_info[j].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
168				continue;
169			const int* inputs = exec_info[j].inputs;
170			int input_size = exec_info[j].input_size;
171			const int* outputs = exec_info[j].outputs;
172			int output_size = exec_info[j].output_size;
173			if (input_size == 0 && output_size == 0)
174				continue;
175			// Only go through forward pass.
176			if (ccv_nnc_cmd_is_backward(exec_info[j].cmd))
177				continue;
178			const ccv_nnc_graph_exec_symbol_t symbol = {
179				.graph = graph,
180				.d = j
181			};
182			int flag = 0;
183			for (k = 0; inputs && k < input_size && !flag; k++)
184				if (inputs[k] >= 0)
185					for (l = 0; l < checkpoint->input_size && !flag; l++)
186						if (checkpoint->inputs[l].d >= 0 && inputs[k] == checkpoint->inputs[l].d)
187							flag = 1;
188			// Input logic is different from output logic. We need to filter out these exec that contains inputs from within the graph.
189			for (k = 0; inputs && k < input_size && flag; k++)
190				if (inputs[k] >= 0 && kh_get(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, inputs[k])kh_get_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, inputs[k]) != kh_end(newly_created_tensor_symbols)((newly_created_tensor_symbols)->n_buckets))
191					flag = 0;
192			if (flag)
193				ccv_array_push(input_execs, &symbol);
194			flag = 0;
195			for (k = 0; outputs && k < output_size && !flag; k++)
196				if (outputs[k] >= 0)
197					for (l = 0; l < checkpoint->output_size && !flag; l++)
198						if (checkpoint->outputs[l].d >= 0 && outputs[k] == checkpoint->outputs[l].d)
199							flag = 1;
200			if (flag)
201				ccv_array_push(output_execs, &symbol);
202		}
203		if (input_execs->rnum <= 0 || output_execs->rnum <= 0)
204			continue;
205		// Fill in blanks (i.e. the backward ops that are not showing in above, but should be included to avoid excluding necessary ones). This is done by flowing gradients from outputs back all the way to inputs.
206		ccv_array_clear(input_gradient_execs);
207		ccv_array_clear(output_gradient_execs);
208		for (j = 0; j < input_execs->rnum; j++)
209		{
210			const int d = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_execs, j)((void*)(((char*)((input_execs)->data)) + (size_t)(input_execs
)->rsize * (size_t)(j))))->d;
211			for (k = 0; k < exec_info[d].input_size; k++)
212				if (exec_info[d].inputs[k] >= 0)
213				{
214					const ccv_nnc_tensor_symbol_t gradient_symbol = ccv_nnc_tensor_symbol_for_backward(graph, (ccv_nnc_tensor_symbol_t){
215						.graph = graph,
216						.d = exec_info[d].inputs[k]
217					});
218					if (gradient_symbol.d < 0)
219						continue;
220					const ccv_nnc_graph_exec_symbol_t backward = ccv_nnc_graph_exec_symbol_for_backward(graph, gradient_symbol);
221					if (backward.d < 0)
222						continue;
223					if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[backward.d].flags)((exec_info[backward.d].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
224						continue;
225					int flag = 0;
226					for (l = 0; !flag && l < output_gradient_execs->rnum; l++)
227						if (((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, l)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(l))))->d == backward.d)
228							flag = 1;
229					if (!flag)
230						ccv_array_push(output_gradient_execs, &backward);
231				}
232			if (exec_info[d].outgoings && exec_info[d].outgoings->rnum > 0)
233				for (k = 0; k < exec_info[d].outgoings->rnum; k++)
234				{
235					const int to_d = *(int*)ccv_array_get(exec_info[d].outgoings, k)((void*)(((char*)((exec_info[d].outgoings)->data)) + (size_t
)(exec_info[d].outgoings)->rsize * (size_t)(k)));
236					if (!ccv_nnc_cmd_is_backward(exec_info[to_d].cmd))
237						continue;
238					int flag = 0;
239					for (l = 0; !flag && l < output_gradient_execs->rnum; l++)
240						if (((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, l)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(l))))->d == to_d)
241							flag = 1;
242					if (!flag)
243					{
244						const ccv_nnc_graph_exec_symbol_t backward = {
245							.graph = graph,
246							.d = to_d
247						};
248						ccv_array_push(output_gradient_execs, &backward);
249					}
250				}
251		}
252		// For output_gradient_execs, we can be opportunistic and use the wrt symbols (if exists) to find relevant bits.
253		// For input_gradient_execs, there is no other way but to loop over all outgoings, find the ones are direct link as backward execs.
254		for (j = 0; j < output_execs->rnum; j++)
255		{
256			const int d = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_execs, j)((void*)(((char*)((output_execs)->data)) + (size_t)(output_execs
)->rsize * (size_t)(j))))->d;
257			if (exec_info[d].outgoings && exec_info[d].outgoings->rnum > 0)
258				for (k = 0; k < exec_info[d].outgoings->rnum; k++)
259				{
260					const int to_d = *(int*)ccv_array_get(exec_info[d].outgoings, k)((void*)(((char*)((exec_info[d].outgoings)->data)) + (size_t
)(exec_info[d].outgoings)->rsize * (size_t)(k)));
261					if (!ccv_nnc_cmd_is_backward(exec_info[to_d].cmd))
262						continue;
263					int flag = 0;
264					for (l = 0; !flag && l < input_gradient_execs->rnum; l++)
265						if (((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, l)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(l))))->d == to_d)
266							flag = 1;
267					if (!flag)
268					{
269						const ccv_nnc_graph_exec_symbol_t backward = {
270							.graph = graph,
271							.d = to_d
272						};
273						ccv_array_push(input_gradient_execs, &backward);
274					}
275				}
276		}
277		// Note that we have to use up-to-date ones because the exec_info might have outgoings that is up-to-date.
278		ccv_nnc_graph_visit_t* const visit = ccv_nnc_graph_visit_new(graph, exec_info, graph->exec_symbol_info->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, 0), input_gradient_execs->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, 0), output_gradient_execs->rnum, 1)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
 ((graph->exec_symbol_info->rnum) - 1)); _visit_->size
 = 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
 edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
 = 0; for (_i_ = 0; _i_ < (graph->exec_symbol_info->
rnum); _i_++) _incoming_edges_ += ((exec_info)[_i_].outgoings
) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_
 = ((graph->exec_symbol_info->rnum) + _incoming_edges_ >
 1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
 = (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
graph->exec_symbol_info->rnum) + sizeof(int32_t) * ((graph
->exec_symbol_info->rnum) * 2 + _incoming_edges_)); else
 _incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (sizeof(
ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum) +
 sizeof(int32_t) * ((graph->exec_symbol_info->rnum) * 2
 + _incoming_edges_)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t
) * (graph->exec_symbol_info->rnum)); int32_t* _exists_
[2] = { (int32_t*)(_incomings_ + (graph->exec_symbol_info->
rnum)), (int32_t*)(_incomings_ + (graph->exec_symbol_info->
rnum)) + (graph->exec_symbol_info->rnum), }; int32_t* const
 _edges_ = _exists_[1] + (graph->exec_symbol_info->rnum
); for (_i_ = 0; _i_ < (input_gradient_execs->rnum); _i_
++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
 ({ if (((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs
)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t
)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r =
 1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
 (input_gradient_execs->rnum), 0, }; int _p_ = 0, _q_ = 1;
 while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
 (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
 _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue
; _incomings_[_idx_].r = 2; if ((exec_info)[_idx_].outgoings)
 for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if
 (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void
) sizeof ((_exist_size_[_q_] < (graph->exec_symbol_info
->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] <
 (graph->exec_symbol_info->rnum)) ; else __assert_fail (
"_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
 d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r =
 3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (input_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_
 = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
 _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
 for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
 == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (graph->exec_symbol_info->rnum))
 ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
 d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
 = 5; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (output_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
 _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__
 ({ if (_exist_size_[_q_] < (graph->exec_symbol_info->
rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
 d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].d
 = 1; } for (_i_ = 0; _i_ < (input_gradient_execs->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t*)(
(void*)(((char*)((input_gradient_execs)->data)) + (size_t)
(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph ==
 graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d; } _p_
 = 0; _q_ = 1; _exist_size_[0] = (input_gradient_execs->rnum
); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_]
 > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->
node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_
->size].term = ((_incomings_[_idx_].d)); ++_visit_->size
;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7
; } if ((exec_info)[_idx_].outgoings) { if ((exec_info)[_idx_
].outgoings->rnum == 1) { const int d = *(int*)((void*)(((
char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t)((
exec_info)[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
 6 && _d_ < (output_gradient_execs->rnum)) { _exists_
[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
 _incomings_[d].r == 6 && _d_ < (output_gradient_execs
->rnum)) { ((void) sizeof ((_exist_size_[_q_] < (graph->
exec_symbol_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (graph->exec_symbol_info->rnum)) ; else __assert_fail
 ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
 d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_)
 = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
 == 7) continue; if (!(1)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c
 == 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c
 == 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); } else if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c
 > 0) continue; _visit_->node[_visit_->size].index =
 ((((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs
)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t
)(0))))[_i_].d)); _visit_->node[_visit_->size].term = (
(_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)
((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d].d)); ++_visit_->size;
; } if (_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof
 ((_visit_->size <= (graph->exec_symbol_info->rnum
)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 278, __extension__
 __PRETTY_FUNCTION__); })); _visit_; });
279		ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
 int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx; {
280			if (idx < exec_rnum && !CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags)((node->flags) & CCV_NNC_GRAPH_EXEC_DEAD))
281				maskbit[idx >> 5] |= (1u << (idx & 0x1f));
282		} ccv_nnc_graph_visit_endfor} }
283		ccv_array_clear(visited_backward_execs);
284		// Add more backward pass to the list. Note that we don't add everything, particularly there are new nodes created through gradient checkpointing are ignored.
285#define visitor(node, idx, _) \
286		if (idx < exec_rnum && !CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD) && maskbit[idx >> 5] & (1u << (idx & 0x1f))) \
287			ccv_array_add_unique_int(visited_backward_execs, idx);
288		CCV_NNC_GRAPH_VISIT(graph, reversed_nodes, exec_rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, 0), output_gradient_execs->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, 0), input_gradient_execs->rnum, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
 edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
 = 0; for (_i_ = 0; _i_ < (exec_rnum); _i_++) _incoming_edges_
 += ((reversed_nodes)[_i_].outgoings) ? (reversed_nodes)[_i_]
.outgoings->rnum : 0; const int _heap_mem_ = ((exec_rnum) +
 _incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (exec_rnum) + sizeof(int32_t) * ((exec_rnum
) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (exec_rnum) +
 sizeof(int32_t) * ((exec_rnum) * 2 + _incoming_edges_)); memset
(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (exec_rnum)); int32_t
* _exists_[2] = { (int32_t*)(_incomings_ + (exec_rnum)), (int32_t
*)(_incomings_ + (exec_rnum)) + (exec_rnum), }; int32_t* const
 _edges_ = _exists_[1] + (exec_rnum); for (_i_ = 0; _i_ < (
output_gradient_execs->rnum); _i_++) { ((void) sizeof ((((
ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs
)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs
)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t
)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
 __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
 = 1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
 (output_gradient_execs->rnum), 0, }; int _p_ = 0, _q_ = 1
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
 (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
 _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue
; _incomings_[_idx_].r = 2; if ((reversed_nodes)[_idx_].outgoings
) for (_j_ = 0; _j_ < (reversed_nodes)[_idx_].outgoings->
rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((reversed_nodes
)[_idx_].outgoings)->data)) + (size_t)((reversed_nodes)[_idx_
].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c;
 if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; (
(void) sizeof ((_exist_size_[_q_] < (exec_rnum)) ? 1 : 0),
 __extension__ ({ if (_exist_size_[_q_] < (exec_rnum)) ; else
 __assert_fail ("_exist_size_[_q_] < (exec_rnum)", "ccv_cnnp_model_gradient_checkpointing.c"
, 288, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_]
[_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
 __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
 = 3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (output_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_
 = 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
 _idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((reversed_nodes)[_idx_].outgoings
) for (_j_ = 0; _j_ < (reversed_nodes)[_idx_].outgoings->
rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((reversed_nodes
)[_idx_].outgoings)->data)) + (size_t)((reversed_nodes)[_idx_
].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].
edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
 1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_rnum)) ? 1 : 0), __extension__ (
{ if (_exist_size_[_q_] < (exec_rnum)) ; else __assert_fail
 ("_exist_size_[_q_] < (exec_rnum)", "ccv_cnnp_model_gradient_checkpointing.c"
, 288, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_]
[_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
 __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r =
 5; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (input_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
 _exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
 0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
 4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_rnum)) ; else __assert_fail ("_exist_size_[_q_] < (exec_rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
 __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
 d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
 __PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].d =
 1; } for (_i_ = 0; _i_ < (output_gradient_execs->rnum)
; _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t*)((
void*)(((char*)((output_gradient_execs)->data)) + (size_t)
(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
 __PRETTY_FUNCTION__); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d; }
 _p_ = 0; _q_ = 1; _exist_size_[0] = (output_gradient_execs->
rnum); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[
_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; visitor((
(reversed_nodes) + _idx_), (_idx_), (_incomings_[_idx_].d)); if
 (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if
 ((reversed_nodes)[_idx_].outgoings) { if ((reversed_nodes)[_idx_
].outgoings->rnum == 1) { const int d = *(int*)((void*)(((
char*)(((reversed_nodes)[_idx_].outgoings)->data)) + (size_t
)((reversed_nodes)[_idx_].outgoings)->rsize * (size_t)(0))
); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (input_gradient_execs->rnum
)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0;
 _j_ < (reversed_nodes)[_idx_].outgoings->rnum; _j_++) {
 const int d = *(int*)((void*)(((char*)(((reversed_nodes)[_idx_
].outgoings)->data)) + (size_t)((reversed_nodes)[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
 (input_gradient_execs->rnum)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_rnum)) ; else __assert_fail ("_exist_size_[_q_] < (exec_rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
 __PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
 d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_)
 = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
 == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
 __PRETTY_FUNCTION__); })); if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r ==
 7) continue; if (!(0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c ==
 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c ==
 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_cnnp_model_gradient_checkpointing.c", 288, __extension__
 __PRETTY_FUNCTION__); })); } else if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c >
 0) continue; visitor(((reversed_nodes) + ((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d), (
((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs
)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t
)(0))))[_i_].d), (_incomings_[((ccv_nnc_graph_exec_symbol_t*)
((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].d)
); } if (_heap_mem_) free(_incomings_); } while (0);;
289		for (j = 0; j < input_gradient_execs->rnum; j++)
290			ccv_array_add_unique_int(visited_backward_execs, ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, j)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(j))))->d);
291#undef visitor
292		ccv_cnnp_gradient_checkpoint_build_t build = {
293			.tensor_context = {
294				.record = 1,
295				.tensor_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0),
296			},
297			.graph_exec_symbols = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0),
298			.all_tensor_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0),
299		};
300		build.tensor_context.old_tensor_symbol_new_hook_context = ccv_nnc_tensor_symbol_new_hook(graph, _ccv_cnnp_gradient_checkpoint_tensor_symbol_new_hook, &build, &build.tensor_context.old_tensor_symbol_new_hook);
301		build.tensor_context.old_tensor_symbol_alias_new_hook_context = ccv_nnc_tensor_symbol_alias_new_hook(graph, _ccv_cnnp_gradient_checkpoint_tensor_symbol_alias_new_hook, &build, &build.tensor_context.old_tensor_symbol_alias_new_hook);
302		build.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(graph, _ccv_cnnp_model_gradient_checkpoint_graph_exec_symbol_new_hook, &build, &build.old_graph_exec_symbol_new_hook);
303		ccv_array_clear(parameters);
304		ccv_array_clear(parameter_ids);
305		ccv_array_clear(parameter_trainables);
306		ccv_array_clear(internals);
307		ccv_array_clear(internal_ids);
308		ccv_cnnp_model_sequence_t model_sequence = {
309			.bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
310		};
311		ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
312			.add_parameter_indices = 0,
313			.prefix = 't',
314			.sequence = &model_sequence,
315			.symbols = parameters,
316			.ids = parameter_ids,
317			.trainables = parameter_trainables,
318		};
319		ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
320			.add_parameter_indices = 0,
321			.prefix = 'r',
322			.sequence = &model_sequence,
323			.symbols = internals,
324			.ids = internal_ids,
325			.trainables = 0,
326		};
327		ccv_cnnp_model_build_data_t build_data = {
328			.is_trainable = checkpoint->is_trainable,
329			.model_sequence = &model_sequence,
330			.add_to_array = ccv_cnnp_model_add_to_array,
331			.parameters = parameters,
332			.context = {
333				.add_to_parameter = &add_to_parameter_context,
334				.add_to_output = &add_to_output_context,
335			},
336			.is_gradient_checkpointing = 1, // Mark this as true so we don't allocate gradient_checkpoints array or override the hooks.
337			.gradient_checkpoints = 0,
338		};
339		checkpoint->model->data = &build_data;
340		checkpoint->build(checkpoint->model, graph, checkpoint->inputs, checkpoint->input_size, max_outputs, checkpoint->output_size);
341		checkpoint->model->data = 0;
342		kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
343		if (model_sequence.sequences)
344			ccv_array_free(model_sequence.sequences);
345		ccv_nnc_tensor_symbol_new_hook(graph, build.tensor_context.old_tensor_symbol_new_hook, build.tensor_context.old_tensor_symbol_new_hook_context, 0);
346		ccv_nnc_tensor_symbol_alias_new_hook(graph, build.tensor_context.old_tensor_symbol_alias_new_hook, build.tensor_context.old_tensor_symbol_alias_new_hook_context, 0);
347		ccv_nnc_graph_exec_symbol_autogen(graph, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, 0)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(0))), build.graph_exec_symbols->rnum, 0);
348		for (j = 0; j < parameter_ids->rnum; j++)
349			ccfreefree(*(char**)ccv_array_get(parameter_ids, j)((void*)(((char*)((parameter_ids)->data)) + (size_t)(parameter_ids
)->rsize * (size_t)(j))));
350		for (j = 0; j < internal_ids->rnum; j++)
351			ccfreefree(*(char**)ccv_array_get(internal_ids, j)((void*)(((char*)((internal_ids)->data)) + (size_t)(internal_ids
)->rsize * (size_t)(j))));
352		// Note that there is no graph optimization applied here.
353		exec_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0)((void*)(((char*)((graph->exec_symbol_info)->data)) + (
size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)));
354		// Reuse existing one.
355		kh_clear(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols)kh_clear_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
);
356		for (j = 0; j < build.tensor_context.tensor_symbols->rnum; j++)
357		{
358			const int idx = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(build.tensor_context.tensor_symbols, j)((void*)(((char*)((build.tensor_context.tensor_symbols)->data
)) + (size_t)(build.tensor_context.tensor_symbols)->rsize *
 (size_t)(j))))->d;
359			if (idx < 0)
360				continue;
361			if (kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, idx)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, idx
) != kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets))
362				continue;
363			int ret;
364			kh_put(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, idx, &ret)kh_put_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, idx, &ret);
365		}
366		ccv_array_t* const newly_input_execs = input_execs;
367		ccv_array_t* const newly_output_execs = output_execs;
368		ccv_array_clear(newly_input_execs);
369		ccv_array_clear(newly_output_execs);
370		for (j = 0; j < build.graph_exec_symbols->rnum; j++)
371		{
372			const int idx = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j))))->d;
373			if (idx < 0)
374				continue;
375			if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
376				continue;
377			const ccv_nnc_graph_exec_symbol_t symbol = {
378				.graph = graph,
379				.d = idx
380			};
381			const int* inputs = exec_info[idx].inputs;
382			int input_size = exec_info[idx].input_size;
383			// Only go through forward pass.
384			assert(!ccv_nnc_cmd_is_backward(exec_info[idx].cmd))((void) sizeof ((!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)
) ? 1 : 0), __extension__ ({ if (!ccv_nnc_cmd_is_backward(exec_info
[idx].cmd)) ; else __assert_fail ("!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)"
, "ccv_cnnp_model_gradient_checkpointing.c", 384, __extension__
 __PRETTY_FUNCTION__); }));
385			int flag = 0;
386			for (k = 0; inputs && k < input_size && !flag; k++)
387				if (inputs[k] >= 0)
388					for (l = 0; l < checkpoint->input_size && !flag; l++)
389						if (checkpoint->inputs[l].d >= 0 && inputs[k] == checkpoint->inputs[l].d)
390							flag = 1;
391			// Input logic is different from output logic. We need to filter out these exec that contains inputs from within the graph.
392			for (k = 0; inputs && k < input_size && flag; k++)
393				if (inputs[k] >= 0 && kh_get(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, inputs[k])kh_get_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, inputs[k]) != kh_end(newly_created_tensor_symbols)((newly_created_tensor_symbols)->n_buckets))
394					flag = 0;
395			if (flag)
396				ccv_array_push(newly_input_execs, &symbol);
397			flag = 0;
398			const int* outputs = exec_info[idx].outputs;
399			int output_size = exec_info[idx].output_size;
400			for (k = 0; inputs && k < output_size && !flag; k++)
401				if (outputs[k] >= 0)
402					for (l = 0; l < checkpoint->output_size && !flag; l++)
403						if (max_outputs[l].d >= 0 && outputs[k] == max_outputs[l].d)
404							flag = 1;
405			if (flag)
406				ccv_array_push(newly_output_execs, &symbol);
407		}
408		for (j = 0; j < checkpoint->input_size; j++)
409			if (checkpoint->inputs[j].d >= 0)
410				ccv_array_push(parameters, checkpoint->inputs + j);
411		ccv_nnc_symbolic_graph_simplify(graph,
412			SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
}, (1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
413				CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
}, (1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
414				CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
}, (1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1),
415			ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0))), parameters->rnum,
416			max_outputs, checkpoint->output_size,
417			ccv_array_get(newly_input_execs, 0)((void*)(((char*)((newly_input_execs)->data)) + (size_t)(newly_input_execs
)->rsize * (size_t)(0))), newly_input_execs->rnum, ccv_array_get(newly_output_execs, 0)((void*)(((char*)((newly_output_execs)->data)) + (size_t)(
newly_output_execs)->rsize * (size_t)(0))), newly_output_execs->rnum);
418		ccv_nnc_graph_exec_symbol_new_hook(graph, build.old_graph_exec_symbol_new_hook, build.old_graph_exec_symbol_new_hook_context, 0);
419		// Need to autogen and redo source / destination.
420		ccv_nnc_graph_exec_symbol_autogen(graph, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, 0)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(0))), build.graph_exec_symbols->rnum, 0);
421		ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, 0)((void*)(((char*)((graph->tensor_symbol_info)->data)) +
 (size_t)(graph->tensor_symbol_info)->rsize * (size_t)(
0)));
422		exec_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0)((void*)(((char*)((graph->exec_symbol_info)->data)) + (
size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)));
423		ccv_array_clear(newly_input_execs);
424		for (j = 0; j < build.graph_exec_symbols->rnum; j++)
425		{
426			const int idx = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j))))->d;
427			if (idx < 0)
428				continue;
429			if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
430				continue;
431			const ccv_nnc_graph_exec_symbol_t symbol = {
432				.graph = graph,
433				.d = idx
434			};
435			const int* inputs = exec_info[idx].inputs;
436			int input_size = exec_info[idx].input_size;
437			// Only go through forward pass.
438			assert(!ccv_nnc_cmd_is_backward(exec_info[idx].cmd))((void) sizeof ((!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)
) ? 1 : 0), __extension__ ({ if (!ccv_nnc_cmd_is_backward(exec_info
[idx].cmd)) ; else __assert_fail ("!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)"
, "ccv_cnnp_model_gradient_checkpointing.c", 438, __extension__
 __PRETTY_FUNCTION__); }));
439			int flag = 0;
440			for (k = 0; inputs && k < input_size && !flag; k++)
441				if (inputs[k] >= 0)
442					for (l = 0; l < checkpoint->input_size && !flag; l++)
443						if (checkpoint->inputs[l].d >= 0 && inputs[k] == checkpoint->inputs[l].d)
444							flag = 1;
445			for (k = 0; inputs && k < input_size && flag; k++)
446				if (inputs[k] >= 0 && kh_get(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, inputs[k])kh_get_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, inputs[k]) != kh_end(newly_created_tensor_symbols)((newly_created_tensor_symbols)->n_buckets))
447					flag = 0;
448			if (flag)
449				ccv_array_push(newly_input_execs, &symbol);
450		}
451		// Build a map between old tensor symbols and new tensor symbols.
452		assert(build.tensor_context.tensor_symbols->rnum <= checkpoint->tensor_symbols->rnum)((void) sizeof ((build.tensor_context.tensor_symbols->rnum
 <= checkpoint->tensor_symbols->rnum) ? 1 : 0), __extension__
 ({ if (build.tensor_context.tensor_symbols->rnum <= checkpoint
->tensor_symbols->rnum) ; else __assert_fail ("build.tensor_context.tensor_symbols->rnum <= checkpoint->tensor_symbols->rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 452, __extension__
 __PRETTY_FUNCTION__); }));
453		// Build a map to potentially map from old input to new input. 
454		kh_clear(ccv_cnnp_tensor_symbol_map, symbol_map)kh_clear_ccv_cnnp_tensor_symbol_map(symbol_map);
455		for (j = 0, k = 0; j < build.tensor_context.tensor_symbols->rnum && k < checkpoint->tensor_symbols->rnum;)
456		{
457			const int from_d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(checkpoint->tensor_symbols, k)((void*)(((char*)((checkpoint->tensor_symbols)->data)) +
 (size_t)(checkpoint->tensor_symbols)->rsize * (size_t)
(k))))->d;
458			if (from_d < 0) // This is removed, move to the next one.
459			{
460				++j;
461				++k;
462				continue;
463			}
464			const int to_d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(build.tensor_context.tensor_symbols, j)((void*)(((char*)((build.tensor_context.tensor_symbols)->data
)) + (size_t)(build.tensor_context.tensor_symbols)->rsize *
 (size_t)(j))))->d;
465			assert(to_d >= 0)((void) sizeof ((to_d >= 0) ? 1 : 0), __extension__ ({ if (
to_d >= 0) ; else __assert_fail ("to_d >= 0", "ccv_cnnp_model_gradient_checkpointing.c"
, 465, __extension__ __PRETTY_FUNCTION__); }));
466			int from_flag = kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, from_d)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, from_d
) != kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets);
467			int to_flag = kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, to_d)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, to_d
) != kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets);
468			if (from_flag)
469				++k;
470			if (to_flag)
471				++j;
472			if (from_flag || to_flag)
473				continue;
474			++k;
475			++j;
476			// Skip if from_d is outputs.
477			for (l = 0; l < !from_flag && checkpoint->output_size; l++)
478				if (checkpoint->outputs[l].d == from_d)
479					from_flag = 1;
480			if (from_flag)
481				continue;
482			// Skip if to_d is outputs.
483			for (l = 0; l < !to_flag && checkpoint->output_size; l++)
484				if (checkpoint->outputs[l].d == to_d)
485					to_flag = 1;
486			if (to_flag)
487				continue;
488			int ret = 0;
489			khiter_t h = kh_put(ccv_cnnp_tensor_symbol_map, symbol_map, from_d, &ret)kh_put_ccv_cnnp_tensor_symbol_map(symbol_map, from_d, &ret
);
490			kh_val(symbol_map, h)((symbol_map)->vals[h]) = to_d;
491		}
492		// Now go over all backward passes to replace inputs with the ones from symbol map. Record these that are used.
493		ccv_array_clear(newly_used_outputs);
494		ccv_array_clear(replaced_backward_execs);
495		for (j = 0; j < visited_backward_execs->rnum; j++)
496		{
497			const int idx = *(int*)ccv_array_get(visited_backward_execs, j)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(j)));
498			if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
499				continue;
500			assert(idx >= 0)((void) sizeof ((idx >= 0) ? 1 : 0), __extension__ ({ if (
idx >= 0) ; else __assert_fail ("idx >= 0", "ccv_cnnp_model_gradient_checkpointing.c"
, 500, __extension__ __PRETTY_FUNCTION__); }));
501			assert(idx < exec_rnum)((void) sizeof ((idx < exec_rnum) ? 1 : 0), __extension__ (
{ if (idx < exec_rnum) ; else __assert_fail ("idx < exec_rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 501, __extension__
 __PRETTY_FUNCTION__); }));
502			if (!ccv_nnc_cmd_is_backward(exec_info[idx].cmd))
503				continue;
504			for (k = 0; k < exec_info[idx].input_size; k++)
505				if (exec_info[idx].inputs[k] >= 0)
506				{
507					const khiter_t h = kh_get(ccv_cnnp_tensor_symbol_map, symbol_map, exec_info[idx].inputs[k])kh_get_ccv_cnnp_tensor_symbol_map(symbol_map, exec_info[idx].
inputs[k]);
508					if (h != kh_end(symbol_map)((symbol_map)->n_buckets)) // Replacing it.
509					{
510						int newly_created_output = kh_val(symbol_map, h)((symbol_map)->vals[h]);
511						exec_info[idx].inputs[k] = newly_created_output;
512						ccv_array_add_unique_int(newly_used_outputs, newly_created_output);
513						if (tensor_symbol_info[newly_created_output].alias_ref > 0)
514						{
515							newly_created_output = tensor_symbol_info[newly_created_output].alias_ref - 1;
516							ccv_array_add_unique_int(newly_used_outputs, newly_created_output);
517						}
518						ccv_array_add_unique_int(replaced_backward_execs, idx);
519					}
520				}
521		}
522		for (j = 0; j < build.graph_exec_symbols->rnum; j++)
523		{
524			ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)));
525			if (symbol->d < 0)
526				continue;
527			if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
))
528				continue;
529			int x, y;
530			for (k = 0; k < replaced_backward_execs->rnum; k++)
531			{
532				const int idx = *(int*)ccv_array_get(replaced_backward_execs, k)((void*)(((char*)((replaced_backward_execs)->data)) + (size_t
)(replaced_backward_execs)->rsize * (size_t)(k)));
533				assert(idx >= 0)((void) sizeof ((idx >= 0) ? 1 : 0), __extension__ ({ if (
idx >= 0) ; else __assert_fail ("idx >= 0", "ccv_cnnp_model_gradient_checkpointing.c"
, 533, __extension__ __PRETTY_FUNCTION__); }));
534				assert(idx < exec_rnum)((void) sizeof ((idx < exec_rnum) ? 1 : 0), __extension__ (
{ if (idx < exec_rnum) ; else __assert_fail ("idx < exec_rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 534, __extension__
 __PRETTY_FUNCTION__); }));
535				assert(ccv_nnc_cmd_is_backward(exec_info[idx].cmd))((void) sizeof ((ccv_nnc_cmd_is_backward(exec_info[idx].cmd))
 ? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_is_backward(exec_info
[idx].cmd)) ; else __assert_fail ("ccv_nnc_cmd_is_backward(exec_info[idx].cmd)"
, "ccv_cnnp_model_gradient_checkpointing.c", 535, __extension__
 __PRETTY_FUNCTION__); }));
536				int flag = 0;
537				for (x = 0; !flag && x < exec_info[idx].input_size; x++)
538				{
539					int x_d = exec_info[idx].inputs[x];
540					if (x_d < 0)
541						continue;
542					if (tensor_symbol_info[x_d].alias_ref > 0)
543						x_d = tensor_symbol_info[x_d].alias_ref - 1;
544					for (y = 0; !flag && y < exec_info[symbol->d].output_size; y++)
545					{
546						int y_d = exec_info[symbol->d].outputs[y];
547						if (y_d < 0)
548							continue;
549						if (tensor_symbol_info[y_d].alias_ref > 0)
550							y_d = tensor_symbol_info[y_d].alias_ref - 1;
551						if (x_d == y_d)
552							flag = 1;
553					}
554				}
555				if (flag)
556					ccv_nnc_graph_exec_symbol_concat(graph, *symbol, (ccv_nnc_graph_exec_symbol_t){
557						.graph = graph,
558						.d = idx
559					});
560			}
561		}
562		// Find parents to visited_backward_execs, and use that as the starting point of all newly added graph_exec_symbols. Use the visited backward execs as the source, use all its parents as destination, go through with graph visit.
563		ccv_sparse_matrix_t* const exec_dep = ccv_sparse_matrix_new(graph->exec_symbol_info->rnum, graph->exec_symbol_info->rnum, CCV_8U | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
564#define for_block(x, val) \
565		do { \
566			if (((uint8_t*)val)[0] != 0) \
567				ccv_array_push(buf, &x); \
568		} while (0)
569		const uint8_t one = 1;
570		// Now go from outputs to inputs, unmark visited ones.
571		ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
 int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx; {
572			if (idx < exec_rnum && !CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags)((node->flags) & CCV_NNC_GRAPH_EXEC_DEAD) && maskbit[idx >> 5] & (1u << (idx & 0x1f)))
573			{
574				ccv_array_clear(buf);
575				ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
576				if (vector)
577					CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
 { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
 { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
 { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
 { int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
 size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
 = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
 = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
 (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
 = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0);
578				if (node->outgoings && node->outgoings->rnum > 0)
579				{
580					ccv_array_t* const outgoings = node->outgoings;
581					for (k = 0; k < outgoings->rnum; k++)
582					{
583						const int outgoing_d = *(int*)ccv_array_get(outgoings, k)((void*)(((char*)((outgoings)->data)) + (size_t)(outgoings
)->rsize * (size_t)(k)));
584						if (outgoing_d >= exec_rnum)
585							continue;
586						int l;
587						// We cannot avoid the ones that visited, because these may not contain all the deps.
588						ccv_set_sparse_matrix_cell(exec_dep, outgoing_d, idx, &one);
589						for (l = 0; l < buf->rnum; l++)
590							ccv_set_sparse_matrix_cell(exec_dep, outgoing_d, *(int*)ccv_array_get(buf, l)((void*)(((char*)((buf)->data)) + (size_t)(buf)->rsize *
 (size_t)(l))), &one);
591					}
592				}
593			}
594		} ccv_nnc_graph_visit_endfor} }
595		// Now go from outputs to inputs, unmark visited ones.
596		ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
 int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx; {
597			if (idx < exec_rnum)
598				maskbit[idx >> 5] &= ~(1u << (idx & 0x1f));
599		} ccv_nnc_graph_visit_endfor} }
600		ccv_nnc_graph_visit_free(visit);
601#undef for_block
602		// Go through visited backward execs, remove the ones that has no dependency on any replaced backward execs.
603		for (j = 0; j < visited_backward_execs->rnum;)
604		{
605			const int idx = *(int*)ccv_array_get(visited_backward_execs, j)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(j)));
606			if (ccv_array_contain_int(replaced_backward_execs, idx))
607			{
608				++j;
609				continue;
610			}
611			ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
612			int flag = 0;
613#define for_block(x, val) \
614			do { \
615				if (((uint8_t*)val)[0] != 0) \
616					if (ccv_array_contain_int(replaced_backward_execs, x)) \
617						flag = 1; \
618			} while (0)
619			if (vector)
620				CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
 { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
 { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
 { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
 { int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
 size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
 = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
 = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
 (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
 = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0);
621#undef for_block
622			if (!flag)
623			{
624				if (j < visited_backward_execs->rnum - 1)
625					*(int*)ccv_array_get(visited_backward_execs, j)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(j))) = *(int*)ccv_array_get(visited_backward_execs, visited_backward_execs->rnum - 1)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(visited_backward_execs
->rnum - 1)));
626				--visited_backward_execs->rnum;
627				continue;
628			}
629			++j;
630		}
631		// Now go through all replaced_backward_execs to find the ones has no dependencies in visited_backward_execs.
632		for (j = 0; j < replaced_backward_execs->rnum; j++)
633		{
634			const int idx = *(int*)ccv_array_get(replaced_backward_execs, j)((void*)(((char*)((replaced_backward_execs)->data)) + (size_t
)(replaced_backward_execs)->rsize * (size_t)(j)));
635			ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep, idx);
636			int flag = 0;
637#define for_block(x, val) \
638			do { \
639				if (((uint8_t*)val)[0] != 0) \
640					if (ccv_array_contain_int(visited_backward_execs, x)) \
641						flag = 1; \
642			} while (0)
643			if (vector)
644				CCV_SPARSE_VECTOR_FOREACH(exec_dep, vector, for_block)do { switch ((((exec_dep)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i32 + (0))); } } } while (0); break; } case CCV_32F:
 { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f32 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f32 + (0))); } } } while (0); break; } case CCV_64S:
 { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.i64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.i64 + (0))); } } } while (0); break; } case CCV_64F:
 { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((exec_dep)->type) & 0xFFF); if ((exec_dep)->type &
 CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (vector)->size
; _i_++) { for_block((_i_), ((vector)->data.f64 + (_i_ * _c_
))); } } else { const size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t
) + ((_ccv_get_data_type_size[(((exec_dep)->type) & 0xFF000
) >> 12] * (((exec_dep)->type) & 0xFFF) + 3) &
 -4); uint8_t* const _vidx_ = (uint8_t*)(vector)->index; for
 (_i_ = 0; _i_ < (vector)->size; _i_++) { ccv_sparse_matrix_index_t
* const _idx_i_ = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_
 * _i_); if (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t
 _d_ = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.f64 + (0))); } } } while (0); break; } default: { do
 { int _i_; __attribute__((unused)) const size_t _c_ = (((exec_dep
)->type) & 0xFFF); if ((exec_dep)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.u8 + (_i_ * _c_))); } } else { const
 size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((exec_dep)->type) & 0xFF000) >> 12] * (((exec_dep
)->type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_
 = (uint8_t*)(vector)->index; for (_i_ = 0; _i_ < (vector
)->size; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_
 = (ccv_sparse_matrix_index_t*)(_vidx_ + _idx_size_ * _i_); if
 (_idx_i_->ifbit <= 1) continue; ccv_numeric_data_t _d_
 = { .u8 = (uint8_t*)(_idx_i_ + 1) }; for_block((_idx_i_->
i), (_d_.u8 + (0))); } } } while (0); } } } while (0);
645#undef for_block
646			// If this one has no parents that is within the visited_backward_execs, it is a good place for us to add all its parents as dependency for input_execs.
647			if (!flag)
648			{
649				assert(idx < exec_rnum)((void) sizeof ((idx < exec_rnum) ? 1 : 0), __extension__ (
{ if (idx < exec_rnum) ; else __assert_fail ("idx < exec_rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 649, __extension__
 __PRETTY_FUNCTION__); }));
650				ccv_array_t* const outgoings = reversed_nodes[idx].outgoings;
651				assert(outgoings)((void) sizeof ((outgoings) ? 1 : 0), __extension__ ({ if (outgoings
) ; else __assert_fail ("outgoings", "ccv_cnnp_model_gradient_checkpointing.c"
, 651, __extension__ __PRETTY_FUNCTION__); }));
652				for (k = 0; k < outgoings->rnum; k++)
653				{
654					const int d = *(int*)ccv_array_get(outgoings, k)((void*)(((char*)((outgoings)->data)) + (size_t)(outgoings
)->rsize * (size_t)(k)));
655					for (l = 0; l < newly_input_execs->rnum; l++)
656					{
657						ccv_nnc_graph_exec_symbol_concat(graph, (ccv_nnc_graph_exec_symbol_t){
658							.graph = graph,
659							.d = d
660						}, *(ccv_nnc_graph_exec_symbol_t*)ccv_array_get(newly_input_execs, l)((void*)(((char*)((newly_input_execs)->data)) + (size_t)(newly_input_execs
)->rsize * (size_t)(l))));
661					}
662				}
663			}
664		}
665		ccv_matrix_free(exec_dep);
666		// Go through all exec, free ones that doesn't have output used.
667		// Reuse this array because it is not useful any more.
668		ccv_array_t* forward_pass_inputs = visited_backward_execs;
669		int any_deleted;
670		do {
671			// Build a map of still active inputs.
672			ccv_array_clear(forward_pass_inputs);
673			for (j = 0; j < build.graph_exec_symbols->rnum; j++)
674			{
675				ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)));
676				if (symbol->d < 0)
677					continue;
678				if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
))
679					continue;
680				int* const inputs = exec_info[symbol->d].inputs;
681				const int input_size = exec_info[symbol->d].input_size;
682				for (k = 0; k < input_size; k++)
683				{
684					int d = inputs[k];
685					if (d < 0)
686						continue;
687					ccv_array_add_unique_int(forward_pass_inputs, d);
688					if (tensor_symbol_info[d].alias_ref > 0)
689					{
690						d = tensor_symbol_info[d].alias_ref - 1;
691						ccv_array_add_unique_int(forward_pass_inputs, d);
692					}
693				}
694			}
695			any_deleted = 0;
696			for (j = 0; j < build.graph_exec_symbols->rnum; j++)
697			{
698				ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)));
699				if (symbol->d < 0)
700					continue;
701				if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
))
702					continue;
703				int* const outputs = exec_info[symbol->d].outputs;
704				const int output_size = exec_info[symbol->d].output_size;
705				int flag = 0;
706				for (k = 0; !flag && k < output_size; k++)
707				{
708					int d = outputs[k];
709					if (d < 0)
710						continue;
711					flag = ccv_array_contain_int(newly_used_outputs, d) || ccv_array_contain_int(forward_pass_inputs, d);
712					if (!flag && tensor_symbol_info[d].alias_ref > 0)
713					{
714						d = tensor_symbol_info[d].alias_ref - 1;
715						flag = ccv_array_contain_int(newly_used_outputs, d) || ccv_array_contain_int(forward_pass_inputs, d);
716					}
717				}
718				if (flag)
719					continue;
720				ccv_nnc_graph_exec_symbol_free(graph, *symbol);
721				symbol->d = -1;
722				symbol->graph = 0;
723				any_deleted = 1;
724			}
725		} while (any_deleted);
726		ccv_array_clear(forward_pass_inputs);
727		for (j = 0; j < build.graph_exec_symbols->rnum; j++)
728		{
729			ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)));
730			if (symbol->d < 0)
731				continue;
732			if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
))
733				continue;
734			int* const inputs = exec_info[symbol->d].inputs;
735			const int input_size = exec_info[symbol->d].input_size;
736			for (k = 0; k < input_size; k++)
737			{
738				if (inputs[k] < 0)
739					continue;
740				ccv_array_add_unique_int(forward_pass_inputs, inputs[k]);
741				if (tensor_symbol_info[inputs[k]].alias_ref > 0)
742					ccv_array_add_unique_int(forward_pass_inputs, tensor_symbol_info[inputs[k]].alias_ref - 1);
743			}
744			int* const outputs = exec_info[symbol->d].outputs;
745			const int output_size = exec_info[symbol->d].output_size;
746			for (k = 0; k < output_size; k++)
747			{
748				if (outputs[k] < 0)
749					continue;
750				ccv_array_add_unique_int(forward_pass_inputs, outputs[k]);
751				if (tensor_symbol_info[outputs[k]].alias_ref > 0)
752					ccv_array_add_unique_int(forward_pass_inputs, tensor_symbol_info[outputs[k]].alias_ref - 1);
753			}
754		}
755		// Free unused tensor symbols.
756		for (j = 0; j < build.all_tensor_symbols->rnum; j++)
757		{
758			const ccv_nnc_tensor_symbol_t* symbol = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(build.all_tensor_symbols, j)((void*)(((char*)((build.all_tensor_symbols)->data)) + (size_t
)(build.all_tensor_symbols)->rsize * (size_t)(j))));
759			if (ccv_array_contain_int(newly_used_outputs, symbol->d) || ccv_array_contain_int(forward_pass_inputs, symbol->d))
760				continue;
761			if (tensor_symbol_info[symbol->d].alias_ref > 0)
762			{
763				const int d = tensor_symbol_info[symbol->d].alias_ref - 1;
764				if (ccv_array_contain_int(newly_used_outputs, d) || ccv_array_contain_int(forward_pass_inputs, d))
765					continue;
766			}
767			ccv_nnc_tensor_symbol_free(graph, *symbol);
768		}
769		for (j = 0; j < build.graph_exec_symbols->rnum; j++)
770		{
771			ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)));
772			if (symbol->d < 0)
773				continue;
774			if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
))
775				continue;
776			ccv_nnc_graph_exec_symbol_set_flags(graph, *symbol, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
777		}
778		// Free these newly created execs and tensor symbols.
779		ccv_array_free(build.tensor_context.tensor_symbols);
780		ccv_array_free(build.graph_exec_symbols);
781		ccv_array_free(build.all_tensor_symbols);
782	}
783	kh_destroy(ccv_cnnp_tensor_symbol_map, symbol_map)kh_destroy_ccv_cnnp_tensor_symbol_map(symbol_map);
784	kh_destroy(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols)kh_destroy_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
);
785	kh_destroy(ccv_cnnp_tensor_symbol_set, parameters_or_internals)kh_destroy_ccv_cnnp_tensor_symbol_set(parameters_or_internals
);
786	ccfreefree(max_outputs);
787	ccv_array_free(buf);
788	ccv_array_free(newly_used_outputs);
789	ccv_array_free(parameters);
790	ccv_array_free(parameter_ids);
791	ccv_array_free(parameter_trainables);
792	ccv_array_free(internals);
793	ccv_array_free(internal_ids);
794	ccfreefree(maskbit);
795	ccv_array_free(input_gradient_execs);
796	ccv_array_free(output_gradient_execs);
797	ccv_array_free(input_execs);
798	ccv_array_free(output_execs);
799	ccv_array_free(replaced_backward_execs);
800	ccv_array_free(visited_backward_execs);
801	for (i = 0; i < exec_rnum; i++)
802		if (reversed_nodes[i].outgoings)
803			ccv_array_free(reversed_nodes[i].outgoings);
804	ccfreefree(reversed_nodes);
805}