Bug Summary

File:nnc/ccv_cnnp_model_gradient_checkpointing.c
Warning:line 75, column 1
Array access (via field 'flags') results in a null pointer dereference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-unknown-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name ccv_cnnp_model_gradient_checkpointing.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -target-feature +sse2 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -fcoverage-compilation-dir=/home/liu/actions-runner/_work/ccv/ccv/lib/nnc -resource-dir /usr/local/lib/clang/19 -I ../ -I /usr/local/cuda/include -D HAVE_CBLAS -D HAVE_LIBPNG -D HAVE_LIBJPEG -D HAVE_FFTW3 -D HAVE_PTHREAD -D HAVE_LIBLINEAR -D HAVE_TESSERACT -D HAVE_AVCODEC -D HAVE_AVFORMAT -D HAVE_AVUTIL -D HAVE_SWSCALE -D HAVE_SSE2 -D HAVE_GSL -D HAVE_CUDA -D HAVE_CUDNN -D HAVE_NCCL -D USE_SYSTEM_CUB -I /usr/local/include -internal-isystem /usr/local/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/liu/actions-runner/_work/ccv/ccv/_analyze/2024-12-03-134311-3468-1 -x c ccv_cnnp_model_gradient_checkpointing.c
1#include "ccv_nnc.h"
2#include "ccv_nnc_easy.h"
3#include "ccv_nnc_internal.h"
4#include "ccv_internal.h"
5#include "_ccv_cnnp_model.h"
6// This can be removed once we organized ccv_cnnp_apply_gradient_checkpoints better.
7#include "_ccv_nnc_symbolic_graph.h"
8
9void ccv_cnnp_model_gradient_checkpoints_cleanup_after_build(ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_symbolic_graph_t* const graph)
10{
11 ccv_array_t* const gradient_checkpoints = compiled_data->gradient_checkpoints;
12 if (!gradient_checkpoints || gradient_checkpoints->rnum == 0) // No saved gradient checkpoints, this is an easy way out.
13 return;
14 int i, j;
15 const ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (const ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, 0)((void*)(((char*)((graph->tensor_symbol_info)->data)) +
(size_t)(graph->tensor_symbol_info)->rsize * (size_t)(
0)))
;
16 // Go through to check if any tensors that supposes in this map is removed.
17 for (i = 0; i < gradient_checkpoints->rnum; i++)
18 {
19 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(gradient_checkpoints, i)((void*)(((char*)((gradient_checkpoints)->data)) + (size_t
)(gradient_checkpoints)->rsize * (size_t)(i)))
;
20 for (j = 0; j < checkpoint->tensor_symbols->rnum; j++)
21 {
22 ccv_nnc_tensor_symbol_t* const symbol = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(checkpoint->tensor_symbols, j)((void*)(((char*)((checkpoint->tensor_symbols)->data)) +
(size_t)(checkpoint->tensor_symbols)->rsize * (size_t)
(j)))
);
23 if (symbol->d >= 0 && symbol->d < graph->tensor_symbol_info->rnum)
24 // If it is dead, we need to remove this symbol.
25 if (CCV_NNC_TENSOR_SYMBOL_IS_DEAD(tensor_symbol_info[symbol->d].flags)((tensor_symbol_info[symbol->d].flags) & CCV_NNC_TENSOR_SYMBOL_DEAD
)
)
26 {
27 symbol->d = -1;
28 symbol->graph = 0;
29 }
30 }
31 }
32}
33
34typedef struct {
35 ccv_array_t* outgoings;
36} ccv_nnc_graph_exec_symbol_reverse_t;
37
38typedef struct {
39 ccv_cnnp_model_gradient_checkpoint_build_context_t tensor_context;
40 ccv_array_t* graph_exec_symbols;
41 ccv_nnc_graph_exec_symbol_new_hook_f old_graph_exec_symbol_new_hook;
42 void* old_graph_exec_symbol_new_hook_context;
43 ccv_array_t* all_tensor_symbols;
44} ccv_cnnp_gradient_checkpoint_build_t;
45
46static void _ccv_cnnp_gradient_checkpoint_tensor_symbol_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name)
47{
48 ccv_cnnp_gradient_checkpoint_build_t* const build_context = (ccv_cnnp_gradient_checkpoint_build_t*)context;
49 if (build_context->tensor_context.record)
50 ccv_array_push(build_context->tensor_context.tensor_symbols, &symbol);
51 ccv_array_push(build_context->all_tensor_symbols, &symbol);
52 if (build_context->tensor_context.old_tensor_symbol_new_hook)
53 build_context->tensor_context.old_tensor_symbol_new_hook(build_context->tensor_context.old_tensor_symbol_new_hook_context, symbol, info, name);
54}
55
56static void _ccv_cnnp_gradient_checkpoint_tensor_symbol_alias_new_hook(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC(12)], const int inc[CCV_NNC_MAX_DIM_ALLOC(12)], const ccv_nnc_tensor_param_t info, const char* const name)
57{
58 ccv_cnnp_gradient_checkpoint_build_t* const build_context = (ccv_cnnp_gradient_checkpoint_build_t*)context;
59 if (build_context->tensor_context.record)
60 ccv_array_push(build_context->tensor_context.tensor_symbols, &symbol);
61 ccv_array_push(build_context->all_tensor_symbols, &symbol);
62 if (build_context->tensor_context.old_tensor_symbol_alias_new_hook)
63 build_context->tensor_context.old_tensor_symbol_alias_new_hook(build_context->tensor_context.old_tensor_symbol_alias_new_hook_context, symbol, from_symbol, ofs, inc, info, name);
64}
65
66static void _ccv_cnnp_model_gradient_checkpoint_graph_exec_symbol_new_hook(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name)
67{
68 ccv_cnnp_gradient_checkpoint_build_t* const build = (ccv_cnnp_gradient_checkpoint_build_t*)context;
69 ccv_array_push(build->graph_exec_symbols, &symbol);
70 if (build->old_graph_exec_symbol_new_hook)
71 build->old_graph_exec_symbol_new_hook(build->old_graph_exec_symbol_new_hook_context, symbol, cmd, inputs, input_size, outputs, output_size, name);
72}
73
74KHASH_MAP_INIT_INT(ccv_cnnp_tensor_symbol_map, int)typedef struct kh_ccv_cnnp_tensor_symbol_map_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; khint32_t *
keys; int *vals; } kh_ccv_cnnp_tensor_symbol_map_t; static inline
__attribute__ ((__unused__)) kh_ccv_cnnp_tensor_symbol_map_t
*kh_init_ccv_cnnp_tensor_symbol_map(void) { return (kh_ccv_cnnp_tensor_symbol_map_t
*)calloc(1,sizeof(kh_ccv_cnnp_tensor_symbol_map_t)); } static
inline __attribute__ ((__unused__)) void kh_destroy_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h) { if (h) { free((void *)
h->keys); free(h->flags); free((void *)h->vals); free
(h); } } static inline __attribute__ ((__unused__)) void kh_clear_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h) { if (h && h->
flags) { memset(h->flags, 0xaa, ((h->n_buckets) < 16
? 1 : (h->n_buckets)>>4) * sizeof(khint32_t)); h->
size = h->n_occupied = 0; } } static inline __attribute__ (
(__unused__)) khint_t kh_get_ccv_cnnp_tensor_symbol_map(const
kh_ccv_cnnp_tensor_symbol_map_t *h, khint32_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = (khint32_t)(key); i = k & mask; last =
i; while (!((h->flags[i>>4]>>((i&0xfU)<<
1))&2) && (((h->flags[i>>4]>>((i&
0xfU)<<1))&1) || !((h->keys[i]) == (key)))) { i =
(i + (++step)) & mask; if (i == last) return h->n_buckets
; } return ((h->flags[i>>4]>>((i&0xfU)<<
1))&3)? h->n_buckets : i; } else return 0; } static inline
__attribute__ ((__unused__)) int kh_resize_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h, khint_t new_n_buckets) {
khint32_t *new_flags = 0; khint_t j = 1; { (--(new_n_buckets
), (new_n_buckets)|=(new_n_buckets)>>1, (new_n_buckets)
|=(new_n_buckets)>>2, (new_n_buckets)|=(new_n_buckets)>>
4, (new_n_buckets)|=(new_n_buckets)>>8, (new_n_buckets)
|=(new_n_buckets)>>16, ++(new_n_buckets)); if (new_n_buckets
< 4) new_n_buckets = 4; if (h->size >= (khint_t)(new_n_buckets
* __ac_HASH_UPPER + 0.5)) j = 0; else { new_flags = (khint32_t
*)malloc(((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (!new_flags) return -1; memset(new_flags
, 0xaa, ((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (h->n_buckets < new_n_buckets
) { khint32_t *new_keys = (khint32_t*)realloc((void *)h->keys
,new_n_buckets * sizeof(khint32_t)); if (!new_keys) { free(new_flags
); return -1; } h->keys = new_keys; if (1) { int *new_vals
= (int*)realloc((void *)h->vals,new_n_buckets * sizeof(int
)); if (!new_vals) { free(new_flags); return -1; } h->vals
= new_vals; } } } } if (j) { for (j = 0; j != h->n_buckets
; ++j) { if (((h->flags[j>>4]>>((j&0xfU)<<
1))&3) == 0) { khint32_t key = h->keys[j]; int val; khint_t
new_mask; new_mask = new_n_buckets - 1; if (1) val = h->vals
[j]; (h->flags[j>>4]|=1ul<<((j&0xfU)<<
1)); while (1) { khint_t k, i, step = 0; k = (khint32_t)(key)
; i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { khint32_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (1
) { int tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
(h->flags[i>>4]|=1ul<<((i&0xfU)<<1)
); } else { h->keys[i] = key; if (1) h->vals[i] = val; break
; } } } } if (h->n_buckets > new_n_buckets) { h->keys
= (khint32_t*)realloc((void *)h->keys,new_n_buckets * sizeof
(khint32_t)); if (1) h->vals = (int*)realloc((void *)h->
vals,new_n_buckets * sizeof(int)); } free(h->flags); h->
flags = new_flags; h->n_buckets = new_n_buckets; h->n_occupied
= h->size; h->upper_bound = (khint_t)(h->n_buckets *
__ac_HASH_UPPER + 0.5); } return 0; } static inline __attribute__
((__unused__)) khint_t kh_put_ccv_cnnp_tensor_symbol_map(kh_ccv_cnnp_tensor_symbol_map_t
*h, khint32_t key, int *ret) { khint_t x; if (h->n_occupied
>= h->upper_bound) { if (h->n_buckets > (h->size
<<1)) { if (kh_resize_ccv_cnnp_tensor_symbol_map(h, h->
n_buckets - 1) < 0) { *ret = -1; return h->n_buckets; }
} else if (kh_resize_ccv_cnnp_tensor_symbol_map(h, h->n_buckets
+ 1) < 0) { *ret = -1; return h->n_buckets; } } { khint_t
k, i, site, last, mask = h->n_buckets - 1, step = 0; x = site
= h->n_buckets; k = (khint32_t)(key); i = k & mask; if
(((h->flags[i>>4]>>((i&0xfU)<<1))&
2)) x = i; else { last = i; while (!((h->flags[i>>4]
>>((i&0xfU)<<1))&2) && (((h->flags
[i>>4]>>((i&0xfU)<<1))&1) || !((h->
keys[i]) == (key)))) { if (((h->flags[i>>4]>>(
(i&0xfU)<<1))&1)) site = i; i = (i + (++step)) &
mask; if (i == last) { x = site; break; } } if (x == h->n_buckets
) { if (((h->flags[i>>4]>>((i&0xfU)<<
1))&2) && site != h->n_buckets) x = site; else
x = i; } } } if (((h->flags[x>>4]>>((x&0xfU
)<<1))&2)) { h->keys[x] = key; (h->flags[x>>
4]&=~(3ul<<((x&0xfU)<<1))); ++h->size;
++h->n_occupied; *ret = 1; } else if (((h->flags[x>>
4]>>((x&0xfU)<<1))&1)) { h->keys[x] = key
; (h->flags[x>>4]&=~(3ul<<((x&0xfU)<<
1))); ++h->size; *ret = 2; } else *ret = 0; return x; } static
inline __attribute__ ((__unused__)) void kh_del_ccv_cnnp_tensor_symbol_map
(kh_ccv_cnnp_tensor_symbol_map_t *h, khint_t x) { if (x != h->
n_buckets && !((h->flags[x>>4]>>((x&
0xfU)<<1))&3)) { (h->flags[x>>4]|=1ul<<
((x&0xfU)<<1)); --h->size; } }
75KHASH_SET_INIT_INT(ccv_cnnp_tensor_symbol_set)typedef struct kh_ccv_cnnp_tensor_symbol_set_s { khint_t n_buckets
, size, n_occupied, upper_bound; khint32_t *flags; khint32_t *
keys; char *vals; } kh_ccv_cnnp_tensor_symbol_set_t; static inline
__attribute__ ((__unused__)) kh_ccv_cnnp_tensor_symbol_set_t
*kh_init_ccv_cnnp_tensor_symbol_set(void) { return (kh_ccv_cnnp_tensor_symbol_set_t
*)calloc(1,sizeof(kh_ccv_cnnp_tensor_symbol_set_t)); } static
inline __attribute__ ((__unused__)) void kh_destroy_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h) { if (h) { free((void *)
h->keys); free(h->flags); free((void *)h->vals); free
(h); } } static inline __attribute__ ((__unused__)) void kh_clear_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h) { if (h && h->
flags) { memset(h->flags, 0xaa, ((h->n_buckets) < 16
? 1 : (h->n_buckets)>>4) * sizeof(khint32_t)); h->
size = h->n_occupied = 0; } } static inline __attribute__ (
(__unused__)) khint_t kh_get_ccv_cnnp_tensor_symbol_set(const
kh_ccv_cnnp_tensor_symbol_set_t *h, khint32_t key) { if (h->
n_buckets) { khint_t k, i, last, mask, step = 0; mask = h->
n_buckets - 1; k = (khint32_t)(key); i = k & mask; last =
i; while (!((h->flags[i>>4]>>((i&0xfU)<<
1))&2) && (((h->flags[i>>4]>>((i&
0xfU)<<1))&1) || !((h->keys[i]) == (key)))) { i =
(i + (++step)) & mask; if (i == last) return h->n_buckets
; } return ((h->flags[i>>4]>>((i&0xfU)<<
1))&3)? h->n_buckets : i; } else return 0; } static inline
__attribute__ ((__unused__)) int kh_resize_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h, khint_t new_n_buckets) {
khint32_t *new_flags = 0; khint_t j = 1; { (--(new_n_buckets
), (new_n_buckets)|=(new_n_buckets)>>1, (new_n_buckets)
|=(new_n_buckets)>>2, (new_n_buckets)|=(new_n_buckets)>>
4, (new_n_buckets)|=(new_n_buckets)>>8, (new_n_buckets)
|=(new_n_buckets)>>16, ++(new_n_buckets)); if (new_n_buckets
< 4) new_n_buckets = 4; if (h->size >= (khint_t)(new_n_buckets
* __ac_HASH_UPPER + 0.5)) j = 0; else { new_flags = (khint32_t
*)malloc(((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (!new_flags) return -1; memset(new_flags
, 0xaa, ((new_n_buckets) < 16? 1 : (new_n_buckets)>>
4) * sizeof(khint32_t)); if (h->n_buckets < new_n_buckets
) { khint32_t *new_keys = (khint32_t*)realloc((void *)h->keys
,new_n_buckets * sizeof(khint32_t)); if (!new_keys) { free(new_flags
); return -1; } h->keys = new_keys; if (0) { char *new_vals
= (char*)realloc((void *)h->vals,new_n_buckets * sizeof(char
)); if (!new_vals) { free(new_flags); return -1; } h->vals
= new_vals; } } } } if (j) { for (j = 0; j != h->n_buckets
; ++j) { if (((h->flags[j>>4]>>((j&0xfU)<<
1))&3) == 0) { khint32_t key = h->keys[j]; char val; khint_t
new_mask; new_mask = new_n_buckets - 1; if (0) val = h->vals
[j]; (h->flags[j>>4]|=1ul<<((j&0xfU)<<
1)); while (1) { khint_t k, i, step = 0; k = (khint32_t)(key)
; i = k & new_mask; while (!((new_flags[i>>4]>>
((i&0xfU)<<1))&2)) i = (i + (++step)) & new_mask
; (new_flags[i>>4]&=~(2ul<<((i&0xfU)<<
1))); if (i < h->n_buckets && ((h->flags[i>>
4]>>((i&0xfU)<<1))&3) == 0) { { khint32_t
tmp = h->keys[i]; h->keys[i] = key; key = tmp; } if (0
) { char tmp = h->vals[i]; h->vals[i] = val; val = tmp;
} (h->flags[i>>4]|=1ul<<((i&0xfU)<<
1)); } else { h->keys[i] = key; if (0) h->vals[i] = val
; break; } } } } if (h->n_buckets > new_n_buckets) { h->
keys = (khint32_t*)realloc((void *)h->keys,new_n_buckets *
sizeof(khint32_t)); if (0) h->vals = (char*)realloc((void
*)h->vals,new_n_buckets * sizeof(char)); } free(h->flags
); h->flags = new_flags; h->n_buckets = new_n_buckets; h
->n_occupied = h->size; h->upper_bound = (khint_t)(h
->n_buckets * __ac_HASH_UPPER + 0.5); } return 0; } static
inline __attribute__ ((__unused__)) khint_t kh_put_ccv_cnnp_tensor_symbol_set
(kh_ccv_cnnp_tensor_symbol_set_t *h, khint32_t key, int *ret)
{ khint_t x; if (h->n_occupied >= h->upper_bound) {
if (h->n_buckets > (h->size<<1)) { if (kh_resize_ccv_cnnp_tensor_symbol_set
(h, h->n_buckets - 1) < 0) { *ret = -1; return h->n_buckets
; } } else if (kh_resize_ccv_cnnp_tensor_symbol_set(h, h->
n_buckets + 1) < 0) { *ret = -1; return h->n_buckets; }
} { khint_t k, i, site, last, mask = h->n_buckets - 1, step
= 0; x = site = h->n_buckets; k = (khint32_t)(key); i = k
& mask; if (((h->flags[i>>4]>>((i&0xfU
)<<1))&2)) x = i; else { last = i; while (!((h->
flags[i>>4]>>((i&0xfU)<<1))&2) &&
(((h->flags[i>>4]>>((i&0xfU)<<1))&
1) || !((h->keys[i]) == (key)))) { if (((h->flags[i>>
4]>>((i&0xfU)<<1))&1)) site = i; i = (i +
(++step)) & mask; if (i == last) { x = site; break; } } if
(x == h->n_buckets) { if (((h->flags[i>>4]>>
((i&0xfU)<<1))&2) && site != h->n_buckets
) x = site; else x = i; } } } if (((h->flags[x>>4]>>
((x&0xfU)<<1))&2)) { h->keys[x] = key; (h->
flags[x>>4]&=~(3ul<<((x&0xfU)<<1)))
; ++h->size; ++h->n_occupied; *ret = 1; } else if (((h->
flags[x>>4]>>((x&0xfU)<<1))&1)) { h
->keys[x] = key; (h->flags[x>>4]&=~(3ul<<
((x&0xfU)<<1))); ++h->size; *ret = 2; } else *ret
= 0; return x; } static inline __attribute__ ((__unused__)) void
kh_del_ccv_cnnp_tensor_symbol_set(kh_ccv_cnnp_tensor_symbol_set_t
*h, khint_t x) { if (x != h->n_buckets && !((h->
flags[x>>4]>>((x&0xfU)<<1))&3)) { (
h->flags[x>>4]|=1ul<<((x&0xfU)<<1));
--h->size; } }
9
Null pointer value stored to field 'flags'
14
Taking true branch
15
Taking false branch
16
Calling 'kh_resize_ccv_cnnp_tensor_symbol_set'
17
Taking true branch
18
Assuming the condition is true
19
Taking true branch
20
Taking false branch
21
Returning without writing to 'h->flags'
22
Returning from 'kh_resize_ccv_cnnp_tensor_symbol_set'
23
Taking false branch
24
Array access (via field 'flags') results in a null pointer dereference
76
77ccv_nnc_exec_dep_t _ccv_nnc_exec_dep_new(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_visit_t* const visit, const int exec_rnum, uint32_t* const maskbit)
78{
79 int* chain_ids = ccmallocmalloc(sizeof(int) * exec_rnum * 2);
80 int* chain_pos = chain_ids + exec_rnum;
81 int* buf = (int*)ccmallocmalloc(sizeof(int) * exec_rnum);
82 int* reversed_depth = buf;
83 const ccv_nnc_graph_exec_symbol_info_t* const exec_symbol_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0)((void*)(((char*)((graph->exec_symbol_info)->data)) + (
size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)))
;
84 int i, j;
85 // Go reverse order to generate the distance from sink.
86 ccv_nnc_graph_visit_for_reversed(visit, exec_symbol_info, node, idx, term){ int _i_; for (_i_ = (visit)->size - 1; _i_ >= 0; _i_--
) { const int idx __attribute__((unused)) = (visit)->node[
_i_].index; const int term __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_symbol_info)) const node __attribute__
((unused)) = (exec_symbol_info) + idx;
{
87 if (idx >= exec_rnum || CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags)((node->flags) & CCV_NNC_GRAPH_EXEC_DEAD) || !(maskbit[idx >> 5] & (1u << (idx & 0x1f))))
88 continue;
89 chain_ids[idx] = -1;
90 if (!node->outgoings || node->outgoings->rnum == 0)
91 {
92 reversed_depth[idx] = 0;
93 continue;
94 }
95 int depth = -1;
96 for (i = 0; i < node->outgoings->rnum; i++)
97 {
98 const int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
99 if (outgoing >= exec_rnum)
100 continue;
101 depth = ccv_max(depth, reversed_depth[outgoing])({ typeof (depth) _a = (depth); typeof (reversed_depth[outgoing
]) _b = (reversed_depth[outgoing]); (_a > _b) ? _a : _b; }
)
;
102 }
103 reversed_depth[idx] = depth + 1;
104 } ccv_nnc_graph_visit_endfor} }
105 // Go in order to generate chain ids (if there are multiple exits, we use the reverse depth to break the tie).
106 // Note that we cannot use depth so-far because then multiple exit nodes are equally good to "inherit" the chain selection.
107 int chain_count = 0;
108 ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx, term){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int term __attribute__((unused)) = (visit)->node[_i_
].term; typeof ((exec_symbol_info)) const node __attribute__(
(unused)) = (exec_symbol_info) + idx;
{
109 if (idx >= exec_rnum || CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags)((node->flags) & CCV_NNC_GRAPH_EXEC_DEAD) || !(maskbit[idx >> 5] & (1u << (idx & 0x1f))))
110 continue;
111 int chain_id = chain_ids[idx];
112 if (chain_ids[idx] < 0)
113 {
114 chain_id = chain_count;
115 chain_ids[idx] = chain_id;
116 chain_pos[idx] = 1; // The first one in this chain. 1-based index because in sparse matrix, 0 is the default value.
117 chain_count += 1;
118 }
119 if (!node->outgoings || node->outgoings->rnum == 0)
120 continue;
121 int depth = -1;
122 int next_idx = -1;
123 for (i = 0; i < node->outgoings->rnum; i++)
124 {
125 const int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
126 if (outgoing >= exec_rnum)
127 continue;
128 if (chain_ids[outgoing] < 0 && reversed_depth[outgoing] > depth)
129 depth = reversed_depth[outgoing], next_idx = outgoing;
130 }
131 if (next_idx >= 0)
132 {
133 chain_ids[next_idx] = chain_id;
134 assert(reversed_depth[idx] - depth >= 1)((void) sizeof ((reversed_depth[idx] - depth >= 1) ? 1 : 0
), __extension__ ({ if (reversed_depth[idx] - depth >= 1) ;
else __assert_fail ("reversed_depth[idx] - depth >= 1", "ccv_cnnp_model_gradient_checkpointing.c"
, 134, __extension__ __PRETTY_FUNCTION__); }))
;
135 chain_pos[next_idx] = chain_pos[idx] + (reversed_depth[idx] - depth);
136 }
137 } ccv_nnc_graph_visit_endfor} }
138 if (exec_rnum < chain_count * 2) // Be more conservative on RAM usage.
139 buf = ccreallocrealloc(buf, sizeof(int) * chain_count * 2);
140 ccv_sparse_matrix_t* deps = ccv_sparse_matrix_new(graph->exec_symbol_info->rnum, chain_count, CCV_32S | CCV_C1, CCV_SPARSE_ROW_MAJOR, 0);
141 // It logs which pos on that chain we depend on. We can simply compare that with the chain_pos for a node to know if they are ancestors.
142#define for_block(x, val) \
143 do { \
144 if (((int32_t*)val)[0] > 0) \
145 { \
146 buf[buf_size * 2] = x; \
147 buf[buf_size * 2 + 1] = ((int32_t*)val)[0]; \
148 ++buf_size; \
149 } \
150 } while (0)
151 int buf_size;
152 ccv_nnc_graph_visit_for(visit, exec_symbol_info, node, idx, term){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int term __attribute__((unused)) = (visit)->node[_i_
].term; typeof ((exec_symbol_info)) const node __attribute__(
(unused)) = (exec_symbol_info) + idx;
{
153 if (idx >= exec_rnum || CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags)((node->flags) & CCV_NNC_GRAPH_EXEC_DEAD) || !(maskbit[idx >> 5] & (1u << (idx & 0x1f))))
154 continue;
155 buf_size = 0; /* save all its parent deps to this buffer */
156 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(deps, idx);
157 if (vector)
158 CCV_SPARSE_VECTOR_FOREACH(deps, vector, for_block)do { switch ((((deps)->type) & 0xFF000)) { case CCV_32S
: { do { int _i_; __attribute__((unused)) const size_t _c_ = (
((deps)->type) & 0xFFF); if ((deps)->type & CCV_DENSE_VECTOR
) { for (_i_ = 0; _i_ < (vector)->size; _i_++) { for_block
((_i_), ((vector)->data.i32 + (_i_ * _c_))); } } else { const
size_t _idx_size_ = sizeof(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size
[(((deps)->type) & 0xFF000) >> 12] * (((deps)->
type) & 0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t
*)(vector)->index; for (_i_ = 0; _i_ < (vector)->size
; _i_++) { ccv_sparse_matrix_index_t* const _idx_i_ = (ccv_sparse_matrix_index_t
*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1)
continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_
+ 1) }; for_block((_idx_i_->i), (_d_.i32 + (0))); } } } while
(0); break; } case CCV_32F: { do { int _i_; __attribute__((unused
)) const size_t _c_ = (((deps)->type) & 0xFFF); if ((deps
)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (
vector)->size; _i_++) { for_block((_i_), ((vector)->data
.f32 + (_i_ * _c_))); } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((deps
)->type) & 0xFF000) >> 12] * (((deps)->type) &
0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector
)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++)
{ ccv_sparse_matrix_index_t* const _idx_i_ = (ccv_sparse_matrix_index_t
*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1)
continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_
+ 1) }; for_block((_idx_i_->i), (_d_.f32 + (0))); } } } while
(0); break; } case CCV_64S: { do { int _i_; __attribute__((unused
)) const size_t _c_ = (((deps)->type) & 0xFFF); if ((deps
)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (
vector)->size; _i_++) { for_block((_i_), ((vector)->data
.i64 + (_i_ * _c_))); } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((deps
)->type) & 0xFF000) >> 12] * (((deps)->type) &
0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector
)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++)
{ ccv_sparse_matrix_index_t* const _idx_i_ = (ccv_sparse_matrix_index_t
*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1)
continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_
+ 1) }; for_block((_idx_i_->i), (_d_.i64 + (0))); } } } while
(0); break; } case CCV_64F: { do { int _i_; __attribute__((unused
)) const size_t _c_ = (((deps)->type) & 0xFFF); if ((deps
)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (
vector)->size; _i_++) { for_block((_i_), ((vector)->data
.f64 + (_i_ * _c_))); } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((deps
)->type) & 0xFF000) >> 12] * (((deps)->type) &
0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector
)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++)
{ ccv_sparse_matrix_index_t* const _idx_i_ = (ccv_sparse_matrix_index_t
*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1)
continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_
+ 1) }; for_block((_idx_i_->i), (_d_.f64 + (0))); } } } while
(0); break; } default: { do { int _i_; __attribute__((unused
)) const size_t _c_ = (((deps)->type) & 0xFFF); if ((deps
)->type & CCV_DENSE_VECTOR) { for (_i_ = 0; _i_ < (
vector)->size; _i_++) { for_block((_i_), ((vector)->data
.u8 + (_i_ * _c_))); } } else { const size_t _idx_size_ = sizeof
(ccv_sparse_matrix_index_t) + ((_ccv_get_data_type_size[(((deps
)->type) & 0xFF000) >> 12] * (((deps)->type) &
0xFFF) + 3) & -4); uint8_t* const _vidx_ = (uint8_t*)(vector
)->index; for (_i_ = 0; _i_ < (vector)->size; _i_++)
{ ccv_sparse_matrix_index_t* const _idx_i_ = (ccv_sparse_matrix_index_t
*)(_vidx_ + _idx_size_ * _i_); if (_idx_i_->ifbit <= 1)
continue; ccv_numeric_data_t _d_ = { .u8 = (uint8_t*)(_idx_i_
+ 1) }; for_block((_idx_i_->i), (_d_.u8 + (0))); } } } while
(0); } } } while (0)
;
159 if (!node->outgoings)
160 continue;
161 const int chain_id = chain_ids[idx];
162 const int pos = chain_pos[idx];
163 for (i = 0; i < node->outgoings->rnum; i++)
164 {
165 const int outgoing = *(int*)ccv_array_get(node->outgoings, i)((void*)(((char*)((node->outgoings)->data)) + (size_t)(
node->outgoings)->rsize * (size_t)(i)))
;
166 if (outgoing >= exec_rnum)
167 continue;
168 const int outgoing_chain_id = chain_ids[outgoing];
169 if (outgoing_chain_id != chain_id)
170 {
171 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell(deps, outgoing, chain_id);
172 /* If not found, set, if the current node is the destination node, no need
173 * set itself as parent of subsequent nodes because its terminal nature. */
174 if (!cell.i32 || cell.i32[0] == 0 || cell.i32[0] < pos)
175 ccv_set_sparse_matrix_cell(deps, outgoing, chain_id, &pos);
176 }
177 if (buf_size > 0)
178 {
179 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(deps, outgoing);
180 for (j = 0; j < buf_size; j++) /* set with all idx's dependencies as well */
181 {
182 if (outgoing_chain_id == buf[j * 2]) // We don't need to add as dependency for the same chain.
183 continue;
184 if (!vector)
185 {
186 ccv_set_sparse_matrix_cell(deps, outgoing, buf[j * 2], &buf[j * 2 + 1]);
187 vector = ccv_get_sparse_matrix_vector(deps, outgoing);
188 continue;
189 }
190 ccv_numeric_data_t cell = ccv_get_sparse_matrix_cell_from_vector(deps, vector, buf[j * 2]);
191 /* If not found, set. Otherwise, set to the latest one only if it is later. */
192 if (!cell.i32 || cell.i32[0] == 0 || cell.i32[0] <= buf[j * 2 + 1])
193 ccv_set_sparse_matrix_cell_from_vector(deps, vector, buf[j * 2], &buf[j * 2 + 1]);
194 }
195 }
196 }
197 } ccv_nnc_graph_visit_endfor} }
198#undef for_block
199 ccfreefree(buf);
200 ccv_nnc_exec_dep_t exec_dep = {
201 .chain_ids = chain_ids,
202 .chain_pos = chain_pos,
203 .deps = deps
204 };
205 return exec_dep;
206}
207
208void ccv_cnnp_model_apply_gradient_checkpoints(ccv_cnnp_compiled_data_t* const compiled_data, ccv_nnc_symbolic_graph_t* const graph)
209{
210 ccv_array_t* const gradient_checkpoints = compiled_data->gradient_checkpoints;
211 if (!gradient_checkpoints || gradient_checkpoints->rnum == 0) // No saved gradient checkpoints, this is an easy way out.
1
Assuming 'gradient_checkpoints' is non-null
2
Assuming field 'rnum' is not equal to 0
3
Taking false branch
212 return;
213 // Otherwise, for each gradient checkpoint, there are 3 steps:
214 // 1. Find currently, what execs exists from inputs to outputs.
215 // 2. Find execs that generates the outputs, and their corresponding backward execs.
216 // 3. Find all backward execs flow from outputs back to inputs.
217 // 4. Generate new ops by calling build again with old inputs, record all new tensors / execs.
218 // 5. Replace inputs in backward execs with the new tensors.
219 // 6. Hook the execs takes inputs with edge from parents of backward execs in step 2.
220 // 7. Delete newly generated execs that has no use (i.e. its outputs are not used by backward pass).
221 // 8. Mark all new execs with DISABLE_OPT to avoid common sub-expression elimination pass.
222 int i, j, k, l;
223 ccv_array_t* input_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
224 ccv_array_t* output_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
225 ccv_array_t* input_gradient_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
226 ccv_array_t* output_gradient_execs = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0);
227 ccv_array_t* visited_backward_execs = ccv_array_new(sizeof(int), 0, 0);
228 ccv_array_t* replaced_backward_execs = ccv_array_new(sizeof(int), 0, 0);
229 const int exec_rnum = graph->exec_symbol_info->rnum;
230 ccv_nnc_graph_exec_symbol_reverse_t* const reversed_nodes = cccalloccalloc(exec_rnum, sizeof(ccv_nnc_graph_exec_symbol_reverse_t));
231 for (i = 0; i < exec_rnum; i++)
4
Assuming 'i' is >= 'exec_rnum'
5
Loop condition is false. Execution continues on line 247
232 {
233 const int* tos = 0;
234 int to_size = 0;
235 ccv_nnc_graph_exec_symbol_to(graph, (ccv_nnc_graph_exec_symbol_t){
236 .graph = graph,
237 .d = i
238 }, &tos, &to_size);
239 if (tos)
240 for (j = 0; j < to_size; j++)
241 {
242 if (!reversed_nodes[tos[j]].outgoings)
243 reversed_nodes[tos[j]].outgoings = ccv_array_new(sizeof(int), 1, 0);
244 ccv_array_add_unique_int(reversed_nodes[tos[j]].outgoings, i);
245 }
246 }
247 uint32_t* const maskbit = cccalloccalloc((exec_rnum + 31) >> 5, sizeof(uint32_t));
248 // Temporary for build_data.
249 ccv_array_t* const parameters = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
250 ccv_array_t* const parameter_ids = ccv_array_new(sizeof(char*), 0, 0);
251 ccv_array_t* const parameter_trainables = ccv_array_new(sizeof(int), 0, 0);
252 ccv_array_t* const internals = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
253 ccv_array_t* const internal_ids = ccv_array_new(sizeof(char*), 0, 0);
254 int max_output_size = 0;
255 for (i = 0; i < gradient_checkpoints->rnum; i++)
6
Assuming 'i' is >= field 'rnum'
7
Loop condition is false. Execution continues on line 260
256 {
257 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(gradient_checkpoints, i)((void*)(((char*)((gradient_checkpoints)->data)) + (size_t
)(gradient_checkpoints)->rsize * (size_t)(i)))
;
258 max_output_size = ccv_max(checkpoint->output_size, max_output_size)({ typeof (checkpoint->output_size) _a = (checkpoint->output_size
); typeof (max_output_size) _b = (max_output_size); (_a > _b
) ? _a : _b; })
;
259 }
260 ccv_nnc_tensor_symbol_t* max_outputs = ccmallocmalloc(sizeof(ccv_nnc_tensor_symbol_t) * max_output_size);
261 ccv_array_t* newly_used_outputs = ccv_array_new(sizeof(int), 0, 0);
262 khash_t(ccv_cnnp_tensor_symbol_set)kh_ccv_cnnp_tensor_symbol_set_t* const parameters_or_internals = kh_init(ccv_cnnp_tensor_symbol_set)kh_init_ccv_cnnp_tensor_symbol_set();
8
Calling 'kh_init_ccv_cnnp_tensor_symbol_set'
10
Returning from 'kh_init_ccv_cnnp_tensor_symbol_set'
263 for (i = 0; i < compiled_data->parameters->rnum; i++)
11
Assuming 'i' is < field 'rnum'
12
Loop condition is true. Entering loop body
264 {
265 const ccv_nnc_tensor_symbol_t* const symbol = (const ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
266 int ret;
267 kh_put(ccv_cnnp_tensor_symbol_set, parameters_or_internals, symbol->d, &ret)kh_put_ccv_cnnp_tensor_symbol_set(parameters_or_internals, symbol
->d, &ret)
;
13
Calling 'kh_put_ccv_cnnp_tensor_symbol_set'
268 }
269 for (i = 0; i < compiled_data->internals->rnum; i++)
270 {
271 const ccv_nnc_tensor_symbol_t* const symbol = (const ccv_nnc_tensor_symbol_t*)ccv_array_get(compiled_data->parameters, i)((void*)(((char*)((compiled_data->parameters)->data)) +
(size_t)(compiled_data->parameters)->rsize * (size_t)(
i)))
;
272 int ret;
273 kh_put(ccv_cnnp_tensor_symbol_set, parameters_or_internals, symbol->d, &ret)kh_put_ccv_cnnp_tensor_symbol_set(parameters_or_internals, symbol
->d, &ret)
;
274 }
275 khash_t(ccv_cnnp_tensor_symbol_set)kh_ccv_cnnp_tensor_symbol_set_t* const newly_created_tensor_symbols = kh_init(ccv_cnnp_tensor_symbol_set)kh_init_ccv_cnnp_tensor_symbol_set();
276 khash_t(ccv_cnnp_tensor_symbol_map)kh_ccv_cnnp_tensor_symbol_map_t* symbol_map = kh_init(ccv_cnnp_tensor_symbol_map)kh_init_ccv_cnnp_tensor_symbol_map();
277 for (i = 0; i < gradient_checkpoints->rnum; i++)
278 {
279 ccv_cnnp_model_gradient_checkpoint_t* const checkpoint = (ccv_cnnp_model_gradient_checkpoint_t*)ccv_array_get(gradient_checkpoints, i)((void*)(((char*)((gradient_checkpoints)->data)) + (size_t
)(gradient_checkpoints)->rsize * (size_t)(i)))
;
280 kh_clear(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols)kh_clear_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
)
;
281 for (j = 0; j < checkpoint->tensor_symbols->rnum; j++)
282 {
283 const int idx = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(checkpoint->tensor_symbols, j)((void*)(((char*)((checkpoint->tensor_symbols)->data)) +
(size_t)(checkpoint->tensor_symbols)->rsize * (size_t)
(j)))
)->d;
284 if (idx < 0)
285 continue;
286 // Skip parameters or internals.
287 if (kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, idx)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, idx
)
!= kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets))
288 continue;
289 int ret;
290 kh_put(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, idx, &ret)kh_put_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, idx, &ret)
;
291 }
292 ccv_array_clear(input_execs);
293 ccv_array_clear(output_execs);
294 ccv_nnc_graph_exec_symbol_info_t* exec_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0)((void*)(((char*)((graph->exec_symbol_info)->data)) + (
size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)))
;
295 for (j = 0; j < exec_rnum; j++)
296 {
297 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[j].flags)((exec_info[j].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
298 continue;
299 const int* inputs = exec_info[j].inputs;
300 int input_size = exec_info[j].input_size;
301 const int* outputs = exec_info[j].outputs;
302 int output_size = exec_info[j].output_size;
303 if (input_size == 0 && output_size == 0)
304 continue;
305 // Only go through forward pass.
306 if (ccv_nnc_cmd_is_backward(exec_info[j].cmd))
307 continue;
308 const ccv_nnc_graph_exec_symbol_t symbol = {
309 .graph = graph,
310 .d = j
311 };
312 int flag = 0;
313 for (k = 0; inputs && k < input_size && !flag; k++)
314 if (inputs[k] >= 0)
315 for (l = 0; l < checkpoint->input_size && !flag; l++)
316 if (checkpoint->inputs[l].d >= 0 && inputs[k] == checkpoint->inputs[l].d)
317 flag = 1;
318 // Input logic is different from output logic. We need to filter out these exec that contains inputs from within the graph.
319 for (k = 0; inputs && k < input_size && flag; k++)
320 if (inputs[k] >= 0 && kh_get(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, inputs[k])kh_get_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, inputs[k])
!= kh_end(newly_created_tensor_symbols)((newly_created_tensor_symbols)->n_buckets))
321 flag = 0;
322 if (flag)
323 ccv_array_push(input_execs, &symbol);
324 flag = 0;
325 for (k = 0; outputs && k < output_size && !flag; k++)
326 if (outputs[k] >= 0)
327 for (l = 0; l < checkpoint->output_size && !flag; l++)
328 if (checkpoint->outputs[l].d >= 0 && outputs[k] == checkpoint->outputs[l].d)
329 flag = 1;
330 if (flag)
331 ccv_array_push(output_execs, &symbol);
332 }
333 if (input_execs->rnum <= 0 || output_execs->rnum <= 0)
334 continue;
335 // Fill in blanks (i.e. the backward ops that are not showing in above, but should be included to avoid excluding necessary ones). This is done by flowing gradients from outputs back all the way to inputs.
336 ccv_array_clear(input_gradient_execs);
337 ccv_array_clear(output_gradient_execs);
338 for (j = 0; j < input_execs->rnum; j++)
339 {
340 const int d = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_execs, j)((void*)(((char*)((input_execs)->data)) + (size_t)(input_execs
)->rsize * (size_t)(j)))
)->d;
341 for (k = 0; k < exec_info[d].input_size; k++)
342 if (exec_info[d].inputs[k] >= 0)
343 {
344 const ccv_nnc_tensor_symbol_t gradient_symbol = ccv_nnc_tensor_symbol_for_backward(graph, (ccv_nnc_tensor_symbol_t){
345 .graph = graph,
346 .d = exec_info[d].inputs[k]
347 });
348 if (gradient_symbol.d < 0)
349 continue;
350 const ccv_nnc_graph_exec_symbol_t backward = ccv_nnc_graph_exec_symbol_for_backward(graph, gradient_symbol);
351 if (backward.d < 0)
352 continue;
353 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[backward.d].flags)((exec_info[backward.d].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
354 continue;
355 int flag = 0;
356 for (l = 0; !flag && l < output_gradient_execs->rnum; l++)
357 if (((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, l)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(l)))
)->d == backward.d)
358 flag = 1;
359 if (!flag)
360 ccv_array_push(output_gradient_execs, &backward);
361 }
362 if (exec_info[d].outgoings && exec_info[d].outgoings->rnum > 0)
363 for (k = 0; k < exec_info[d].outgoings->rnum; k++)
364 {
365 const int to_d = *(int*)ccv_array_get(exec_info[d].outgoings, k)((void*)(((char*)((exec_info[d].outgoings)->data)) + (size_t
)(exec_info[d].outgoings)->rsize * (size_t)(k)))
;
366 if (!ccv_nnc_cmd_is_backward(exec_info[to_d].cmd))
367 continue;
368 int flag = 0;
369 for (l = 0; !flag && l < output_gradient_execs->rnum; l++)
370 if (((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, l)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(l)))
)->d == to_d)
371 flag = 1;
372 if (!flag)
373 {
374 const ccv_nnc_graph_exec_symbol_t backward = {
375 .graph = graph,
376 .d = to_d
377 };
378 ccv_array_push(output_gradient_execs, &backward);
379 }
380 }
381 }
382 // For output_gradient_execs, we can be opportunistic and use the wrt symbols (if exists) to find relevant bits.
383 // For input_gradient_execs, there is no other way but to loop over all outgoings, find the ones are direct link as backward execs.
384 for (j = 0; j < output_execs->rnum; j++)
385 {
386 const int d = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_execs, j)((void*)(((char*)((output_execs)->data)) + (size_t)(output_execs
)->rsize * (size_t)(j)))
)->d;
387 if (exec_info[d].outgoings && exec_info[d].outgoings->rnum > 0)
388 for (k = 0; k < exec_info[d].outgoings->rnum; k++)
389 {
390 const int to_d = *(int*)ccv_array_get(exec_info[d].outgoings, k)((void*)(((char*)((exec_info[d].outgoings)->data)) + (size_t
)(exec_info[d].outgoings)->rsize * (size_t)(k)))
;
391 if (!ccv_nnc_cmd_is_backward(exec_info[to_d].cmd))
392 continue;
393 int flag = 0;
394 for (l = 0; !flag && l < input_gradient_execs->rnum; l++)
395 if (((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, l)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(l)))
)->d == to_d)
396 flag = 1;
397 if (!flag)
398 {
399 const ccv_nnc_graph_exec_symbol_t backward = {
400 .graph = graph,
401 .d = to_d
402 };
403 ccv_array_push(input_gradient_execs, &backward);
404 }
405 }
406 }
407 // Note that we have to use up-to-date ones because the exec_info might have outgoings that is up-to-date.
408 ccv_nnc_graph_visit_t* const visit = ccv_nnc_graph_visit_new(graph, exec_info, graph->exec_symbol_info->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, 0), input_gradient_execs->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, 0), output_gradient_execs->rnum, 1)({ ccv_nnc_graph_visit_t* _visit_ = (ccv_nnc_graph_visit_t*)malloc
(sizeof(ccv_nnc_graph_visit_t) + sizeof(_visit_->node[0]) *
((graph->exec_symbol_info->rnum) - 1)); _visit_->size
= 0; do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (graph->exec_symbol_info->
rnum); _i_++) _incoming_edges_ += ((exec_info)[_i_].outgoings
) ? (exec_info)[_i_].outgoings->rnum : 0; const int _heap_mem_
= ((graph->exec_symbol_info->rnum) + _incoming_edges_ >
1024); ccv_nnc_incoming_t* _incomings_; if (_heap_mem_) _incomings_
= (ccv_nnc_incoming_t*)malloc(sizeof(ccv_nnc_incoming_t) * (
graph->exec_symbol_info->rnum) + sizeof(int32_t) * ((graph
->exec_symbol_info->rnum) * 2 + _incoming_edges_)); else
_incomings_ = (ccv_nnc_incoming_t*)__builtin_alloca (sizeof(
ccv_nnc_incoming_t) * (graph->exec_symbol_info->rnum) +
sizeof(int32_t) * ((graph->exec_symbol_info->rnum) * 2
+ _incoming_edges_)); memset(_incomings_, 0, sizeof(ccv_nnc_incoming_t
) * (graph->exec_symbol_info->rnum)); int32_t* _exists_
[2] = { (int32_t*)(_incomings_ + (graph->exec_symbol_info->
rnum)), (int32_t*)(_incomings_ + (graph->exec_symbol_info->
rnum)) + (graph->exec_symbol_info->rnum), }; int32_t* const
_edges_ = _exists_[1] + (graph->exec_symbol_info->rnum
); for (_i_ = 0; _i_ < (input_gradient_execs->rnum); _i_
++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].graph == graph) ? 1 : 0), __extension__
({ if (((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs
)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t
)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r =
1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(input_gradient_execs->rnum), 0, }; int _p_ = 0, _q_ = 1;
while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue
; _incomings_[_idx_].r = 2; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c; if
(_incomings_[d].r != 0) continue; _incomings_[d].r = 1; ((void
) sizeof ((_exist_size_[_q_] < (graph->exec_symbol_info
->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_[_q_] <
(graph->exec_symbol_info->rnum)) ; else __assert_fail (
"_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r =
3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (input_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((exec_info)[_idx_].outgoings)
for (_j_ = 0; _j_ < (exec_info)[_idx_].outgoings->rnum
; _j_++) { const int d = *(int*)((void*)(((char*)(((exec_info
)[_idx_].outgoings)->data)) + (size_t)((exec_info)[_idx_].
outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].edges
== 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (graph->exec_symbol_info->rnum))
? 1 : 0), __extension__ ({ if (_exist_size_[_q_] < (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
= 5; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (output_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (graph->exec_symbol_info->rnum)) ? 1 : 0), __extension__
({ if (_exist_size_[_q_] < (graph->exec_symbol_info->
rnum)) ; else __assert_fail ("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].d
= 1; } for (_i_ = 0; _i_ < (input_gradient_execs->rnum
); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t*)(
(void*)(((char*)((input_gradient_execs)->data)) + (size_t)
(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph ==
graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d; } _p_
= 0; _q_ = 1; _exist_size_[0] = (input_gradient_execs->rnum
); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[_p_]
> 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; _visit_->
node[_visit_->size].index = ((_idx_)); _visit_->node[_visit_
->size].term = ((_incomings_[_idx_].d)); ++_visit_->size
;; if (_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7
; } if ((exec_info)[_idx_].outgoings) { if ((exec_info)[_idx_
].outgoings->rnum == 1) { const int d = *(int*)((void*)(((
char*)(((exec_info)[_idx_].outgoings)->data)) + (size_t)((
exec_info)[_idx_].outgoings)->rsize * (size_t)(0))); --_incomings_
[d].c; if (_incomings_[d].c == 0 && _incomings_[d].r ==
6 && _d_ < (output_gradient_execs->rnum)) { _exists_
[_p_][_i_] = d; continue; } } else for (_j_ = 0; _j_ < (exec_info
)[_idx_].outgoings->rnum; _j_++) { const int d = *(int*)((
void*)(((char*)(((exec_info)[_idx_].outgoings)->data)) + (
size_t)((exec_info)[_idx_].outgoings)->rsize * (size_t)(_j_
))); --_incomings_[d].c; if (_incomings_[d].c == 0 &&
_incomings_[d].r == 6 && _d_ < (output_gradient_execs
->rnum)) { ((void) sizeof ((_exist_size_[_q_] < (graph->
exec_symbol_info->rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (graph->exec_symbol_info->rnum)) ; else __assert_fail
("_exist_size_[_q_] < (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_)
= (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
== 7) continue; if (!(1)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c
== 0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c
== 0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); } else if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c
> 0) continue; _visit_->node[_visit_->size].index =
((((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs
)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t
)(0))))[_i_].d)); _visit_->node[_visit_->size].term = (
(_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)
((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d].d)); ++_visit_->size;
; } if (_heap_mem_) free(_incomings_); } while (0);; ((void) sizeof
((_visit_->size <= (graph->exec_symbol_info->rnum
)) ? 1 : 0), __extension__ ({ if (_visit_->size <= (graph
->exec_symbol_info->rnum)) ; else __assert_fail ("_visit_->size <= (graph->exec_symbol_info->rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 408, __extension__
__PRETTY_FUNCTION__); })); _visit_; })
;
409 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
410 if (idx < exec_rnum && !CCV_NNC_GRAPH_EXEC_IS_DEAD(node->flags)((node->flags) & CCV_NNC_GRAPH_EXEC_DEAD))
411 maskbit[idx >> 5] |= (1u << (idx & 0x1f));
412 } ccv_nnc_graph_visit_endfor} }
413 ccv_array_clear(visited_backward_execs);
414 // Add more backward pass to the list. Note that we don't add everything, particularly there are new nodes created through gradient checkpointing are ignored.
415#define visitor(node, idx, _) \
416 if (idx < exec_rnum && !CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD) && maskbit[idx >> 5] & (1u << (idx & 0x1f))) \
417 ccv_array_add_unique_int(visited_backward_execs, idx);
418 CCV_NNC_GRAPH_VISIT(graph, reversed_nodes, exec_rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(output_gradient_execs, 0), output_gradient_execs->rnum, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, 0), input_gradient_execs->rnum, 0, visitor)do { typedef struct { int8_t d; int8_t r; uint16_t c; int32_t
edges; } ccv_nnc_incoming_t; int _i_, _j_; int _incoming_edges_
= 0; for (_i_ = 0; _i_ < (exec_rnum); _i_++) _incoming_edges_
+= ((reversed_nodes)[_i_].outgoings) ? (reversed_nodes)[_i_]
.outgoings->rnum : 0; const int _heap_mem_ = ((exec_rnum) +
_incoming_edges_ > 1024); ccv_nnc_incoming_t* _incomings_
; if (_heap_mem_) _incomings_ = (ccv_nnc_incoming_t*)malloc(sizeof
(ccv_nnc_incoming_t) * (exec_rnum) + sizeof(int32_t) * ((exec_rnum
) * 2 + _incoming_edges_)); else _incomings_ = (ccv_nnc_incoming_t
*)__builtin_alloca (sizeof(ccv_nnc_incoming_t) * (exec_rnum) +
sizeof(int32_t) * ((exec_rnum) * 2 + _incoming_edges_)); memset
(_incomings_, 0, sizeof(ccv_nnc_incoming_t) * (exec_rnum)); int32_t
* _exists_[2] = { (int32_t*)(_incomings_ + (exec_rnum)), (int32_t
*)(_incomings_ + (exec_rnum)) + (exec_rnum), }; int32_t* const
_edges_ = _exists_[1] + (exec_rnum); for (_i_ = 0; _i_ < (
output_gradient_execs->rnum); _i_++) { ((void) sizeof ((((
ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs
)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t
)(0))))[_i_].graph == graph) ? 1 : 0), __extension__ ({ if ((
(ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs
)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t
)(0))))[_i_].graph == graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 418, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
= 1; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } int _exist_size_[2] = {
(output_gradient_execs->rnum), 0, }; int _p_ = 0, _q_ = 1
; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0; for
(_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 1) continue
; _incomings_[_idx_].r = 2; if ((reversed_nodes)[_idx_].outgoings
) for (_j_ = 0; _j_ < (reversed_nodes)[_idx_].outgoings->
rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((reversed_nodes
)[_idx_].outgoings)->data)) + (size_t)((reversed_nodes)[_idx_
].outgoings)->rsize * (size_t)(_j_))); ++_incomings_[d].c;
if (_incomings_[d].r != 0) continue; _incomings_[d].r = 1; (
(void) sizeof ((_exist_size_[_q_] < (exec_rnum)) ? 1 : 0),
__extension__ ({ if (_exist_size_[_q_] < (exec_rnum)) ; else
__assert_fail ("_exist_size_[_q_] < (exec_rnum)", "ccv_cnnp_model_gradient_checkpointing.c"
, 418, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_]
[_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (output_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 418, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r
= 3; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void
*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (output_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; int _bump_
= 1; while (_exist_size_[_p_] > 0) { _exist_size_[_q_] = 0
; for (_i_ = 0; _i_ < _exist_size_[_p_]; _i_++) { const int32_t
_idx_ = _exists_[_p_][_i_]; if (_incomings_[_idx_].r != 3) continue
; _incomings_[_idx_].r = 4; if ((reversed_nodes)[_idx_].outgoings
) for (_j_ = 0; _j_ < (reversed_nodes)[_idx_].outgoings->
rnum; _j_++) { const int d = *(int*)((void*)(((char*)(((reversed_nodes
)[_idx_].outgoings)->data)) + (size_t)((reversed_nodes)[_idx_
].outgoings)->rsize * (size_t)(_j_))); if (_incomings_[d].
edges == 0) { _incomings_[d].edges = _bump_; _bump_ += _incomings_
[d].c; _incomings_[d].c = 0; } _edges_[_incomings_[d].edges -
1 + _incomings_[d].c] = _idx_; ++_incomings_[d].c; if (_incomings_
[d].r != 2) continue; _incomings_[d].r = 3; ((void) sizeof ((
_exist_size_[_q_] < (exec_rnum)) ? 1 : 0), __extension__ (
{ if (_exist_size_[_q_] < (exec_rnum)) ; else __assert_fail
("_exist_size_[_q_] < (exec_rnum)", "ccv_cnnp_model_gradient_checkpointing.c"
, 418, __extension__ __PRETTY_FUNCTION__); })); _exists_[_q_]
[_exist_size_[_q_]] = d; ++_exist_size_[_q_]; } } ((_i_) = (_p_
), (_p_) = (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 418, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r =
5; _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t*)((void*
)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs
)->rsize * (size_t)(0))))[_i_].d; } _exist_size_[0] = (input_gradient_execs
->rnum); _exist_size_[1] = 0; _p_ = 0, _q_ = 1; while (_exist_size_
[_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ <
_exist_size_[_p_]; _i_++) { const int32_t _idx_ = _exists_[_p_
][_i_]; if (_incomings_[_idx_].r != 5) continue; _incomings_[
_idx_].r = 6; if (_incomings_[_idx_].edges > 0) for (_j_ =
0; _j_ < _incomings_[_idx_].c; _j_++) { const int d = _edges_
[_incomings_[_idx_].edges - 1 + _j_]; if (_incomings_[d].r !=
4) continue; _incomings_[d].r = 5; ((void) sizeof ((_exist_size_
[_q_] < (exec_rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_rnum)) ; else __assert_fail ("_exist_size_[_q_] < (exec_rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 418, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } ((_i_) = (_p_), (_p_) = (_q_), (
_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 418, __extension__
__PRETTY_FUNCTION__); })); _incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].d =
1; } for (_i_ = 0; _i_ < (output_gradient_execs->rnum)
; _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t*)((
void*)(((char*)((output_gradient_execs)->data)) + (size_t)
(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 418, __extension__
__PRETTY_FUNCTION__); })); _exists_[0][_i_] = ((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((output_gradient_execs)->data)) + (size_t
)(output_gradient_execs)->rsize * (size_t)(0))))[_i_].d; }
_p_ = 0; _q_ = 1; _exist_size_[0] = (output_gradient_execs->
rnum); _exist_size_[1] = 0; int _d_ = 0; while (_exist_size_[
_p_] > 0) { _exist_size_[_q_] = 0; for (_i_ = 0; _i_ < _exist_size_
[_p_];) { const int32_t _idx_ = _exists_[_p_][_i_]; visitor((
(reversed_nodes) + _idx_), (_idx_), (_incomings_[_idx_].d)); if
(_incomings_[_idx_].d) { ++_d_; _incomings_[_idx_].r = 7; } if
((reversed_nodes)[_idx_].outgoings) { if ((reversed_nodes)[_idx_
].outgoings->rnum == 1) { const int d = *(int*)((void*)(((
char*)(((reversed_nodes)[_idx_].outgoings)->data)) + (size_t
)((reversed_nodes)[_idx_].outgoings)->rsize * (size_t)(0))
); --_incomings_[d].c; if (_incomings_[d].c == 0 && _incomings_
[d].r == 6 && _d_ < (input_gradient_execs->rnum
)) { _exists_[_p_][_i_] = d; continue; } } else for (_j_ = 0;
_j_ < (reversed_nodes)[_idx_].outgoings->rnum; _j_++) {
const int d = *(int*)((void*)(((char*)(((reversed_nodes)[_idx_
].outgoings)->data)) + (size_t)((reversed_nodes)[_idx_].outgoings
)->rsize * (size_t)(_j_))); --_incomings_[d].c; if (_incomings_
[d].c == 0 && _incomings_[d].r == 6 && _d_ <
(input_gradient_execs->rnum)) { ((void) sizeof ((_exist_size_
[_q_] < (exec_rnum)) ? 1 : 0), __extension__ ({ if (_exist_size_
[_q_] < (exec_rnum)) ; else __assert_fail ("_exist_size_[_q_] < (exec_rnum)"
, "ccv_cnnp_model_gradient_checkpointing.c", 418, __extension__
__PRETTY_FUNCTION__); })); _exists_[_q_][_exist_size_[_q_]] =
d; ++_exist_size_[_q_]; } } } ++_i_; } ((_i_) = (_p_), (_p_)
= (_q_), (_q_) = (_i_)); } for (_i_ = 0; _i_ < (input_gradient_execs
->rnum); _i_++) { ((void) sizeof ((((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ? 1 : 0), __extension__ ({ if (((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph
== graph) ; else __assert_fail ("((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].graph == graph"
, "ccv_cnnp_model_gradient_checkpointing.c", 418, __extension__
__PRETTY_FUNCTION__); })); if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].r ==
7) continue; if (!(0)) { ((void) sizeof ((_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c ==
0) ? 1 : 0), __extension__ ({ if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c ==
0) ; else __assert_fail ("_incomings_[((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c == 0"
, "ccv_cnnp_model_gradient_checkpointing.c", 418, __extension__
__PRETTY_FUNCTION__); })); } else if (_incomings_[((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].c >
0) continue; visitor(((reversed_nodes) + ((ccv_nnc_graph_exec_symbol_t
*)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d), (
((ccv_nnc_graph_exec_symbol_t*)((void*)(((char*)((input_gradient_execs
)->data)) + (size_t)(input_gradient_execs)->rsize * (size_t
)(0))))[_i_].d), (_incomings_[((ccv_nnc_graph_exec_symbol_t*)
((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(0))))[_i_].d].d)
); } if (_heap_mem_) free(_incomings_); } while (0);
;
419 for (j = 0; j < input_gradient_execs->rnum; j++)
420 ccv_array_add_unique_int(visited_backward_execs, ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(input_gradient_execs, j)((void*)(((char*)((input_gradient_execs)->data)) + (size_t
)(input_gradient_execs)->rsize * (size_t)(j)))
)->d);
421#undef visitor
422 ccv_cnnp_gradient_checkpoint_build_t build = {
423 .tensor_context = {
424 .record = 1,
425 .tensor_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0),
426 },
427 .graph_exec_symbols = ccv_array_new(sizeof(ccv_nnc_graph_exec_symbol_t), 0, 0),
428 .all_tensor_symbols = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0),
429 };
430 build.tensor_context.old_tensor_symbol_new_hook_context = ccv_nnc_tensor_symbol_new_hook(graph, _ccv_cnnp_gradient_checkpoint_tensor_symbol_new_hook, &build, &build.tensor_context.old_tensor_symbol_new_hook);
431 build.tensor_context.old_tensor_symbol_alias_new_hook_context = ccv_nnc_tensor_symbol_alias_new_hook(graph, _ccv_cnnp_gradient_checkpoint_tensor_symbol_alias_new_hook, &build, &build.tensor_context.old_tensor_symbol_alias_new_hook);
432 build.old_graph_exec_symbol_new_hook_context = ccv_nnc_graph_exec_symbol_new_hook(graph, _ccv_cnnp_model_gradient_checkpoint_graph_exec_symbol_new_hook, &build, &build.old_graph_exec_symbol_new_hook);
433 ccv_array_clear(parameters);
434 ccv_array_clear(parameter_ids);
435 ccv_array_clear(parameter_trainables);
436 ccv_array_clear(internals);
437 ccv_array_clear(internal_ids);
438 ccv_cnnp_model_sequence_t model_sequence = {
439 .bank = kh_init(ccv_cnnp_model_name_bank)kh_init_ccv_cnnp_model_name_bank()
440 };
441 ccv_cnnp_model_add_to_array_context_t add_to_parameter_context = {
442 .add_parameter_indices = 0,
443 .prefix = 't',
444 .sequence = &model_sequence,
445 .symbols = parameters,
446 .ids = parameter_ids,
447 .trainables = parameter_trainables,
448 };
449 ccv_cnnp_model_add_to_array_context_t add_to_output_context = {
450 .add_parameter_indices = 0,
451 .prefix = 'r',
452 .sequence = &model_sequence,
453 .symbols = internals,
454 .ids = internal_ids,
455 .trainables = 0,
456 };
457 ccv_cnnp_model_build_data_t build_data = {
458 .is_trainable = checkpoint->is_trainable,
459 .model_sequence = &model_sequence,
460 .add_to_array = ccv_cnnp_model_add_to_array,
461 .parameters = parameters,
462 .context = {
463 .add_to_parameter = &add_to_parameter_context,
464 .add_to_output = &add_to_output_context,
465 },
466 .is_gradient_checkpointing = 1, // Mark this as true so we don't allocate gradient_checkpoints array or override the hooks.
467 .gradient_checkpoints = 0,
468 };
469 checkpoint->model->data = &build_data;
470 checkpoint->build(checkpoint->model, graph, checkpoint->inputs, checkpoint->input_size, max_outputs, checkpoint->output_size);
471 checkpoint->model->data = 0;
472 kh_destroy(ccv_cnnp_model_name_bank, model_sequence.bank)kh_destroy_ccv_cnnp_model_name_bank(model_sequence.bank);
473 if (model_sequence.sequences)
474 ccv_array_free(model_sequence.sequences);
475 ccv_nnc_tensor_symbol_new_hook(graph, build.tensor_context.old_tensor_symbol_new_hook, build.tensor_context.old_tensor_symbol_new_hook_context, 0);
476 ccv_nnc_tensor_symbol_alias_new_hook(graph, build.tensor_context.old_tensor_symbol_alias_new_hook, build.tensor_context.old_tensor_symbol_alias_new_hook_context, 0);
477 ccv_nnc_graph_exec_symbol_autogen(graph, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, 0)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(0)))
, build.graph_exec_symbols->rnum, 0);
478 for (j = 0; j < parameter_ids->rnum; j++)
479 ccfreefree(*(char**)ccv_array_get(parameter_ids, j)((void*)(((char*)((parameter_ids)->data)) + (size_t)(parameter_ids
)->rsize * (size_t)(j)))
);
480 for (j = 0; j < internal_ids->rnum; j++)
481 ccfreefree(*(char**)ccv_array_get(internal_ids, j)((void*)(((char*)((internal_ids)->data)) + (size_t)(internal_ids
)->rsize * (size_t)(j)))
);
482 // Note that there is no graph optimization applied here.
483 exec_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0)((void*)(((char*)((graph->exec_symbol_info)->data)) + (
size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)))
;
484 // Reuse existing one.
485 kh_clear(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols)kh_clear_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
)
;
486 for (j = 0; j < build.tensor_context.tensor_symbols->rnum; j++)
487 {
488 const int idx = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(build.tensor_context.tensor_symbols, j)((void*)(((char*)((build.tensor_context.tensor_symbols)->data
)) + (size_t)(build.tensor_context.tensor_symbols)->rsize *
(size_t)(j)))
)->d;
489 if (idx < 0)
490 continue;
491 if (kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, idx)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, idx
)
!= kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets))
492 continue;
493 int ret;
494 kh_put(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, idx, &ret)kh_put_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, idx, &ret)
;
495 }
496 ccv_array_t* const newly_input_execs = input_execs;
497 ccv_array_t* const newly_output_execs = output_execs;
498 ccv_array_clear(newly_input_execs);
499 ccv_array_clear(newly_output_execs);
500 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
501 {
502 const int idx = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
)->d;
503 if (idx < 0)
504 continue;
505 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
506 continue;
507 const ccv_nnc_graph_exec_symbol_t symbol = {
508 .graph = graph,
509 .d = idx
510 };
511 const int* inputs = exec_info[idx].inputs;
512 int input_size = exec_info[idx].input_size;
513 // Only go through forward pass.
514 assert(!ccv_nnc_cmd_is_backward(exec_info[idx].cmd))((void) sizeof ((!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)
) ? 1 : 0), __extension__ ({ if (!ccv_nnc_cmd_is_backward(exec_info
[idx].cmd)) ; else __assert_fail ("!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)"
, "ccv_cnnp_model_gradient_checkpointing.c", 514, __extension__
__PRETTY_FUNCTION__); }))
;
515 int flag = 0;
516 for (k = 0; inputs && k < input_size && !flag; k++)
517 if (inputs[k] >= 0)
518 for (l = 0; l < checkpoint->input_size && !flag; l++)
519 if (checkpoint->inputs[l].d >= 0 && inputs[k] == checkpoint->inputs[l].d)
520 flag = 1;
521 // Input logic is different from output logic. We need to filter out these exec that contains inputs from within the graph.
522 for (k = 0; inputs && k < input_size && flag; k++)
523 if (inputs[k] >= 0 && kh_get(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, inputs[k])kh_get_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, inputs[k])
!= kh_end(newly_created_tensor_symbols)((newly_created_tensor_symbols)->n_buckets))
524 flag = 0;
525 if (flag)
526 ccv_array_push(newly_input_execs, &symbol);
527 flag = 0;
528 const int* outputs = exec_info[idx].outputs;
529 int output_size = exec_info[idx].output_size;
530 for (k = 0; inputs && k < output_size && !flag; k++)
531 if (outputs[k] >= 0)
532 for (l = 0; l < checkpoint->output_size && !flag; l++)
533 if (max_outputs[l].d >= 0 && outputs[k] == max_outputs[l].d)
534 flag = 1;
535 if (flag)
536 ccv_array_push(newly_output_execs, &symbol);
537 }
538 for (j = 0; j < checkpoint->input_size; j++)
539 if (checkpoint->inputs[j].d >= 0)
540 ccv_array_push(parameters, checkpoint->inputs + j);
541 ccv_nnc_symbolic_graph_simplify(graph,
542 SYMBOLIC_GRAPH_PASSES(CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
}, (1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
543 CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
}, (1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
544 CCV_NNC_SIMPLIFY_OPS_FUSION)(const int []){CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION
, CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT, CCV_NNC_SIMPLIFY_OPS_FUSION
}, (1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0
+0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +
0 +0 +0 +0 -1)
,
545 ccv_array_get(parameters, 0)((void*)(((char*)((parameters)->data)) + (size_t)(parameters
)->rsize * (size_t)(0)))
, parameters->rnum,
546 max_outputs, checkpoint->output_size,
547 ccv_array_get(newly_input_execs, 0)((void*)(((char*)((newly_input_execs)->data)) + (size_t)(newly_input_execs
)->rsize * (size_t)(0)))
, newly_input_execs->rnum, ccv_array_get(newly_output_execs, 0)((void*)(((char*)((newly_output_execs)->data)) + (size_t)(
newly_output_execs)->rsize * (size_t)(0)))
, newly_output_execs->rnum);
548 ccv_nnc_graph_exec_symbol_new_hook(graph, build.old_graph_exec_symbol_new_hook, build.old_graph_exec_symbol_new_hook_context, 0);
549 // Need to autogen and redo source / destination.
550 ccv_nnc_graph_exec_symbol_autogen(graph, (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, 0)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(0)))
, build.graph_exec_symbols->rnum, 0);
551 ccv_nnc_tensor_symbol_info_t* const tensor_symbol_info = (ccv_nnc_tensor_symbol_info_t*)ccv_array_get(graph->tensor_symbol_info, 0)((void*)(((char*)((graph->tensor_symbol_info)->data)) +
(size_t)(graph->tensor_symbol_info)->rsize * (size_t)(
0)))
;
552 exec_info = (ccv_nnc_graph_exec_symbol_info_t*)ccv_array_get(graph->exec_symbol_info, 0)((void*)(((char*)((graph->exec_symbol_info)->data)) + (
size_t)(graph->exec_symbol_info)->rsize * (size_t)(0)))
;
553 ccv_array_clear(newly_input_execs);
554 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
555 {
556 const int idx = ((ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
)->d;
557 if (idx < 0)
558 continue;
559 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
560 continue;
561 const ccv_nnc_graph_exec_symbol_t symbol = {
562 .graph = graph,
563 .d = idx
564 };
565 const int* inputs = exec_info[idx].inputs;
566 int input_size = exec_info[idx].input_size;
567 // Only go through forward pass.
568 assert(!ccv_nnc_cmd_is_backward(exec_info[idx].cmd))((void) sizeof ((!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)
) ? 1 : 0), __extension__ ({ if (!ccv_nnc_cmd_is_backward(exec_info
[idx].cmd)) ; else __assert_fail ("!ccv_nnc_cmd_is_backward(exec_info[idx].cmd)"
, "ccv_cnnp_model_gradient_checkpointing.c", 568, __extension__
__PRETTY_FUNCTION__); }))
;
569 int flag = 0;
570 for (k = 0; inputs && k < input_size && !flag; k++)
571 if (inputs[k] >= 0)
572 for (l = 0; l < checkpoint->input_size && !flag; l++)
573 if (checkpoint->inputs[l].d >= 0 && inputs[k] == checkpoint->inputs[l].d)
574 flag = 1;
575 for (k = 0; inputs && k < input_size && flag; k++)
576 if (inputs[k] >= 0 && kh_get(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols, inputs[k])kh_get_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
, inputs[k])
!= kh_end(newly_created_tensor_symbols)((newly_created_tensor_symbols)->n_buckets))
577 flag = 0;
578 if (flag)
579 ccv_array_push(newly_input_execs, &symbol);
580 }
581 // Build a map between old tensor symbols and new tensor symbols.
582 assert(build.tensor_context.tensor_symbols->rnum <= checkpoint->tensor_symbols->rnum)((void) sizeof ((build.tensor_context.tensor_symbols->rnum
<= checkpoint->tensor_symbols->rnum) ? 1 : 0), __extension__
({ if (build.tensor_context.tensor_symbols->rnum <= checkpoint
->tensor_symbols->rnum) ; else __assert_fail ("build.tensor_context.tensor_symbols->rnum <= checkpoint->tensor_symbols->rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 582, __extension__
__PRETTY_FUNCTION__); }))
;
583 // Build a map to potentially map from old input to new input.
584 kh_clear(ccv_cnnp_tensor_symbol_map, symbol_map)kh_clear_ccv_cnnp_tensor_symbol_map(symbol_map);
585 for (j = 0, k = 0; j < build.tensor_context.tensor_symbols->rnum && k < checkpoint->tensor_symbols->rnum;)
586 {
587 const int from_d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(checkpoint->tensor_symbols, k)((void*)(((char*)((checkpoint->tensor_symbols)->data)) +
(size_t)(checkpoint->tensor_symbols)->rsize * (size_t)
(k)))
)->d;
588 if (from_d < 0) // This is removed, move to the next one.
589 {
590 ++j;
591 ++k;
592 continue;
593 }
594 const int to_d = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(build.tensor_context.tensor_symbols, j)((void*)(((char*)((build.tensor_context.tensor_symbols)->data
)) + (size_t)(build.tensor_context.tensor_symbols)->rsize *
(size_t)(j)))
)->d;
595 assert(to_d >= 0)((void) sizeof ((to_d >= 0) ? 1 : 0), __extension__ ({ if (
to_d >= 0) ; else __assert_fail ("to_d >= 0", "ccv_cnnp_model_gradient_checkpointing.c"
, 595, __extension__ __PRETTY_FUNCTION__); }))
;
596 int from_flag = kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, from_d)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, from_d
)
!= kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets);
597 int to_flag = kh_get(ccv_cnnp_tensor_symbol_set, parameters_or_internals, to_d)kh_get_ccv_cnnp_tensor_symbol_set(parameters_or_internals, to_d
)
!= kh_end(parameters_or_internals)((parameters_or_internals)->n_buckets);
598 if (from_flag)
599 ++k;
600 if (to_flag)
601 ++j;
602 if (from_flag || to_flag)
603 continue;
604 ++k;
605 ++j;
606 // Skip if from_d is outputs.
607 for (l = 0; l < !from_flag && checkpoint->output_size; l++)
608 if (checkpoint->outputs[l].d == from_d)
609 from_flag = 1;
610 if (from_flag)
611 continue;
612 // Skip if to_d is outputs.
613 for (l = 0; l < !to_flag && checkpoint->output_size; l++)
614 if (checkpoint->outputs[l].d == to_d)
615 to_flag = 1;
616 if (to_flag)
617 continue;
618 int ret = 0;
619 khiter_t h = kh_put(ccv_cnnp_tensor_symbol_map, symbol_map, from_d, &ret)kh_put_ccv_cnnp_tensor_symbol_map(symbol_map, from_d, &ret
)
;
620 kh_val(symbol_map, h)((symbol_map)->vals[h]) = to_d;
621 }
622 // Now go over all backward passes to replace inputs with the ones from symbol map. Record these that are used.
623 ccv_array_clear(newly_used_outputs);
624 ccv_array_clear(replaced_backward_execs);
625 for (j = 0; j < visited_backward_execs->rnum; j++)
626 {
627 const int idx = *(int*)ccv_array_get(visited_backward_execs, j)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(j)))
;
628 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[idx].flags)((exec_info[idx].flags) & CCV_NNC_GRAPH_EXEC_DEAD))
629 continue;
630 assert(idx >= 0)((void) sizeof ((idx >= 0) ? 1 : 0), __extension__ ({ if (
idx >= 0) ; else __assert_fail ("idx >= 0", "ccv_cnnp_model_gradient_checkpointing.c"
, 630, __extension__ __PRETTY_FUNCTION__); }))
;
631 assert(idx < exec_rnum)((void) sizeof ((idx < exec_rnum) ? 1 : 0), __extension__ (
{ if (idx < exec_rnum) ; else __assert_fail ("idx < exec_rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 631, __extension__
__PRETTY_FUNCTION__); }))
;
632 if (!ccv_nnc_cmd_is_backward(exec_info[idx].cmd))
633 continue;
634 for (k = 0; k < exec_info[idx].input_size; k++)
635 if (exec_info[idx].inputs[k] >= 0)
636 {
637 const khiter_t h = kh_get(ccv_cnnp_tensor_symbol_map, symbol_map, exec_info[idx].inputs[k])kh_get_ccv_cnnp_tensor_symbol_map(symbol_map, exec_info[idx].
inputs[k])
;
638 if (h != kh_end(symbol_map)((symbol_map)->n_buckets)) // Replacing it.
639 {
640 int newly_created_output = kh_val(symbol_map, h)((symbol_map)->vals[h]);
641 exec_info[idx].inputs[k] = newly_created_output;
642 ccv_array_add_unique_int(newly_used_outputs, newly_created_output);
643 if (tensor_symbol_info[newly_created_output].alias_ref > 0)
644 {
645 newly_created_output = tensor_symbol_info[newly_created_output].alias_ref - 1;
646 ccv_array_add_unique_int(newly_used_outputs, newly_created_output);
647 }
648 ccv_array_add_unique_int(replaced_backward_execs, idx);
649 }
650 }
651 }
652 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
653 {
654 ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
;
655 if (symbol->d < 0)
656 continue;
657 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
)
)
658 continue;
659 int x, y;
660 for (k = 0; k < replaced_backward_execs->rnum; k++)
661 {
662 const int idx = *(int*)ccv_array_get(replaced_backward_execs, k)((void*)(((char*)((replaced_backward_execs)->data)) + (size_t
)(replaced_backward_execs)->rsize * (size_t)(k)))
;
663 assert(idx >= 0)((void) sizeof ((idx >= 0) ? 1 : 0), __extension__ ({ if (
idx >= 0) ; else __assert_fail ("idx >= 0", "ccv_cnnp_model_gradient_checkpointing.c"
, 663, __extension__ __PRETTY_FUNCTION__); }))
;
664 assert(idx < exec_rnum)((void) sizeof ((idx < exec_rnum) ? 1 : 0), __extension__ (
{ if (idx < exec_rnum) ; else __assert_fail ("idx < exec_rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 664, __extension__
__PRETTY_FUNCTION__); }))
;
665 assert(ccv_nnc_cmd_is_backward(exec_info[idx].cmd))((void) sizeof ((ccv_nnc_cmd_is_backward(exec_info[idx].cmd))
? 1 : 0), __extension__ ({ if (ccv_nnc_cmd_is_backward(exec_info
[idx].cmd)) ; else __assert_fail ("ccv_nnc_cmd_is_backward(exec_info[idx].cmd)"
, "ccv_cnnp_model_gradient_checkpointing.c", 665, __extension__
__PRETTY_FUNCTION__); }))
;
666 int flag = 0;
667 for (x = 0; !flag && x < exec_info[idx].input_size; x++)
668 {
669 int x_d = exec_info[idx].inputs[x];
670 if (x_d < 0)
671 continue;
672 if (tensor_symbol_info[x_d].alias_ref > 0)
673 x_d = tensor_symbol_info[x_d].alias_ref - 1;
674 for (y = 0; !flag && y < exec_info[symbol->d].output_size; y++)
675 {
676 int y_d = exec_info[symbol->d].outputs[y];
677 if (y_d < 0)
678 continue;
679 if (tensor_symbol_info[y_d].alias_ref > 0)
680 y_d = tensor_symbol_info[y_d].alias_ref - 1;
681 if (x_d == y_d)
682 flag = 1;
683 }
684 }
685 if (flag)
686 ccv_nnc_graph_exec_symbol_concat(graph, *symbol, (ccv_nnc_graph_exec_symbol_t){
687 .graph = graph,
688 .d = idx
689 });
690 }
691 }
692 // Find parents to visited_backward_execs, and use that as the starting point of all newly added graph_exec_symbols. Use the visited backward execs as the source, use all its parents as destination, go through with graph visit.
693 ccv_nnc_exec_dep_t exec_dep = _ccv_nnc_exec_dep_new(graph, visit, exec_rnum, maskbit);
694 // Now go from outputs to inputs, unmark visited ones.
695 ccv_nnc_graph_visit_for(visit, exec_info, node, idx){ int _i_; for (_i_ = 0; _i_ < (visit)->size; _i_++) { const
int idx __attribute__((unused)) = (visit)->node[_i_].index
; const int _node_unused_ __attribute__((unused)) = (visit)->
node[_i_].term; typeof ((exec_info)) const node __attribute__
((unused)) = (exec_info) + idx;
{
696 if (idx < exec_rnum)
697 maskbit[idx >> 5] &= ~(1u << (idx & 0x1f));
698 } ccv_nnc_graph_visit_endfor} }
699 ccv_nnc_graph_visit_free(visit);
700 // Go through visited backward execs, remove the ones that has no dependency on any replaced backward execs.
701 for (j = 0; j < visited_backward_execs->rnum;)
702 {
703 const int idx = *(int*)ccv_array_get(visited_backward_execs, j)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(j)))
;
704 if (ccv_array_contain_int(replaced_backward_execs, idx))
705 {
706 ++j;
707 continue;
708 }
709 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep.deps, idx);
710 if (!vector)
711 vector = (ccv_sparse_matrix_vector_t*)1; // Mark it as we tried but cannot find.
712 int flag = 0;
713 for (k = 0; !flag && k < replaced_backward_execs->rnum; k++)
714 {
715 const int d = *(int*)ccv_array_get(replaced_backward_execs, k)((void*)(((char*)((replaced_backward_execs)->data)) + (size_t
)(replaced_backward_execs)->rsize * (size_t)(k)))
;
716 flag = ccv_nnc_exec_dep_check(exec_dep, idx, vector, d);
717 }
718 if (!flag)
719 {
720 if (j < visited_backward_execs->rnum - 1)
721 *(int*)ccv_array_get(visited_backward_execs, j)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(j)))
= *(int*)ccv_array_get(visited_backward_execs, visited_backward_execs->rnum - 1)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(visited_backward_execs
->rnum - 1)))
;
722 --visited_backward_execs->rnum;
723 continue;
724 }
725 ++j;
726 }
727 // Now go through all replaced_backward_execs to find the ones has no dependencies in visited_backward_execs.
728 for (j = 0; j < replaced_backward_execs->rnum; j++)
729 {
730 const int idx = *(int*)ccv_array_get(replaced_backward_execs, j)((void*)(((char*)((replaced_backward_execs)->data)) + (size_t
)(replaced_backward_execs)->rsize * (size_t)(j)))
;
731 ccv_sparse_matrix_vector_t* vector = ccv_get_sparse_matrix_vector(exec_dep.deps, idx);
732 if (!vector)
733 vector = (ccv_sparse_matrix_vector_t*)1; // Mark it as we tried but cannot find.
734 int flag = 0;
735 for (k = 0; !flag && k < visited_backward_execs->rnum; k++)
736 {
737 const int d = *(int*)ccv_array_get(visited_backward_execs, k)((void*)(((char*)((visited_backward_execs)->data)) + (size_t
)(visited_backward_execs)->rsize * (size_t)(k)))
;
738 flag = ccv_nnc_exec_dep_check(exec_dep, idx, vector, d);
739 }
740 // If this one has no parents that is within the visited_backward_execs, it is a good place for us to add all its parents as dependency for input_execs.
741 if (!flag)
742 {
743 assert(idx < exec_rnum)((void) sizeof ((idx < exec_rnum) ? 1 : 0), __extension__ (
{ if (idx < exec_rnum) ; else __assert_fail ("idx < exec_rnum"
, "ccv_cnnp_model_gradient_checkpointing.c", 743, __extension__
__PRETTY_FUNCTION__); }))
;
744 ccv_array_t* const outgoings = reversed_nodes[idx].outgoings;
745 assert(outgoings)((void) sizeof ((outgoings) ? 1 : 0), __extension__ ({ if (outgoings
) ; else __assert_fail ("outgoings", "ccv_cnnp_model_gradient_checkpointing.c"
, 745, __extension__ __PRETTY_FUNCTION__); }))
;
746 for (k = 0; k < outgoings->rnum; k++)
747 {
748 const int d = *(int*)ccv_array_get(outgoings, k)((void*)(((char*)((outgoings)->data)) + (size_t)(outgoings
)->rsize * (size_t)(k)))
;
749 for (l = 0; l < newly_input_execs->rnum; l++)
750 {
751 ccv_nnc_graph_exec_symbol_concat(graph, (ccv_nnc_graph_exec_symbol_t){
752 .graph = graph,
753 .d = d
754 }, *(ccv_nnc_graph_exec_symbol_t*)ccv_array_get(newly_input_execs, l)((void*)(((char*)((newly_input_execs)->data)) + (size_t)(newly_input_execs
)->rsize * (size_t)(l)))
);
755 }
756 }
757 }
758 }
759 ccv_nnc_exec_dep_free(exec_dep);
760 // Go through all exec, free ones that doesn't have output used.
761 // Reuse this array because it is not useful any more.
762 ccv_array_t* forward_pass_inputs = visited_backward_execs;
763 int any_deleted;
764 do {
765 // Build a map of still active inputs.
766 ccv_array_clear(forward_pass_inputs);
767 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
768 {
769 ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
;
770 if (symbol->d < 0)
771 continue;
772 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
)
)
773 continue;
774 int* const inputs = exec_info[symbol->d].inputs;
775 const int input_size = exec_info[symbol->d].input_size;
776 for (k = 0; k < input_size; k++)
777 {
778 int d = inputs[k];
779 if (d < 0)
780 continue;
781 ccv_array_add_unique_int(forward_pass_inputs, d);
782 if (tensor_symbol_info[d].alias_ref > 0)
783 {
784 d = tensor_symbol_info[d].alias_ref - 1;
785 ccv_array_add_unique_int(forward_pass_inputs, d);
786 }
787 }
788 }
789 any_deleted = 0;
790 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
791 {
792 ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
;
793 if (symbol->d < 0)
794 continue;
795 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
)
)
796 continue;
797 int* const outputs = exec_info[symbol->d].outputs;
798 const int output_size = exec_info[symbol->d].output_size;
799 int flag = 0;
800 for (k = 0; !flag && k < output_size; k++)
801 {
802 int d = outputs[k];
803 if (d < 0)
804 continue;
805 flag = ccv_array_contain_int(newly_used_outputs, d) || ccv_array_contain_int(forward_pass_inputs, d);
806 if (!flag && tensor_symbol_info[d].alias_ref > 0)
807 {
808 d = tensor_symbol_info[d].alias_ref - 1;
809 flag = ccv_array_contain_int(newly_used_outputs, d) || ccv_array_contain_int(forward_pass_inputs, d);
810 }
811 }
812 if (flag)
813 continue;
814 ccv_nnc_graph_exec_symbol_free(graph, *symbol);
815 symbol->d = -1;
816 symbol->graph = 0;
817 any_deleted = 1;
818 }
819 } while (any_deleted);
820 ccv_array_clear(forward_pass_inputs);
821 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
822 {
823 ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
;
824 if (symbol->d < 0)
825 continue;
826 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
)
)
827 continue;
828 int* const inputs = exec_info[symbol->d].inputs;
829 const int input_size = exec_info[symbol->d].input_size;
830 for (k = 0; k < input_size; k++)
831 {
832 if (inputs[k] < 0)
833 continue;
834 ccv_array_add_unique_int(forward_pass_inputs, inputs[k]);
835 if (tensor_symbol_info[inputs[k]].alias_ref > 0)
836 ccv_array_add_unique_int(forward_pass_inputs, tensor_symbol_info[inputs[k]].alias_ref - 1);
837 }
838 int* const outputs = exec_info[symbol->d].outputs;
839 const int output_size = exec_info[symbol->d].output_size;
840 for (k = 0; k < output_size; k++)
841 {
842 if (outputs[k] < 0)
843 continue;
844 ccv_array_add_unique_int(forward_pass_inputs, outputs[k]);
845 if (tensor_symbol_info[outputs[k]].alias_ref > 0)
846 ccv_array_add_unique_int(forward_pass_inputs, tensor_symbol_info[outputs[k]].alias_ref - 1);
847 }
848 }
849 // Free unused tensor symbols.
850 for (j = 0; j < build.all_tensor_symbols->rnum; j++)
851 {
852 const ccv_nnc_tensor_symbol_t* symbol = ((ccv_nnc_tensor_symbol_t*)ccv_array_get(build.all_tensor_symbols, j)((void*)(((char*)((build.all_tensor_symbols)->data)) + (size_t
)(build.all_tensor_symbols)->rsize * (size_t)(j)))
);
853 if (ccv_array_contain_int(newly_used_outputs, symbol->d) || ccv_array_contain_int(forward_pass_inputs, symbol->d))
854 continue;
855 if (tensor_symbol_info[symbol->d].alias_ref > 0)
856 {
857 const int d = tensor_symbol_info[symbol->d].alias_ref - 1;
858 if (ccv_array_contain_int(newly_used_outputs, d) || ccv_array_contain_int(forward_pass_inputs, d))
859 continue;
860 }
861 ccv_nnc_tensor_symbol_free(graph, *symbol);
862 }
863 for (j = 0; j < build.graph_exec_symbols->rnum; j++)
864 {
865 ccv_nnc_graph_exec_symbol_t* const symbol = (ccv_nnc_graph_exec_symbol_t*)ccv_array_get(build.graph_exec_symbols, j)((void*)(((char*)((build.graph_exec_symbols)->data)) + (size_t
)(build.graph_exec_symbols)->rsize * (size_t)(j)))
;
866 if (symbol->d < 0)
867 continue;
868 if (CCV_NNC_GRAPH_EXEC_IS_DEAD(exec_info[symbol->d].flags)((exec_info[symbol->d].flags) & CCV_NNC_GRAPH_EXEC_DEAD
)
)
869 continue;
870 ccv_nnc_graph_exec_symbol_set_flags(graph, *symbol, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
871 }
872 // Free these newly created execs and tensor symbols.
873 ccv_array_free(build.tensor_context.tensor_symbols);
874 ccv_array_free(build.graph_exec_symbols);
875 ccv_array_free(build.all_tensor_symbols);
876 }
877 kh_destroy(ccv_cnnp_tensor_symbol_map, symbol_map)kh_destroy_ccv_cnnp_tensor_symbol_map(symbol_map);
878 kh_destroy(ccv_cnnp_tensor_symbol_set, newly_created_tensor_symbols)kh_destroy_ccv_cnnp_tensor_symbol_set(newly_created_tensor_symbols
)
;
879 kh_destroy(ccv_cnnp_tensor_symbol_set, parameters_or_internals)kh_destroy_ccv_cnnp_tensor_symbol_set(parameters_or_internals
)
;
880 ccfreefree(max_outputs);
881 ccv_array_free(newly_used_outputs);
882 ccv_array_free(parameters);
883 ccv_array_free(parameter_ids);
884 ccv_array_free(parameter_trainables);
885 ccv_array_free(internals);
886 ccv_array_free(internal_ids);
887 ccfreefree(maskbit);
888 ccv_array_free(input_gradient_execs);
889 ccv_array_free(output_gradient_execs);
890 ccv_array_free(input_execs);
891 ccv_array_free(output_execs);
892 ccv_array_free(replaced_backward_execs);
893 ccv_array_free(visited_backward_execs);
894 for (i = 0; i < exec_rnum; i++)
895 if (reversed_nodes[i].outgoings)
896 ccv_array_free(reversed_nodes[i].outgoings);
897 ccfreefree(reversed_nodes);
898}