Coverage Report

Created: 2017-11-12 13:27

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc.h
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************
2
 * C-based/Cached/Core Computer Vision Library
3
 * Liu Liu, 2010-02-01
4
 **********************************************************/
5
6
/**********************************************************
7
 * CCV - Neural Network Collection
8
 **********************************************************/
9
10
#ifndef GUARD_ccv_nnc_h
11
#define GUARD_ccv_nnc_h
12
13
#include <ccv.h>
14
15
// These are generated by cmd/build-cmd.rb
16
#include "cmd/ccv_nnc_cmd.h"
17
#include "cmd/ccv_nnc_backend.h"
18
19
enum {
20
  // Attributes that enable tensor allocation optimization
21
  CCV_NNC_CMD_ATTR_INPLACE      = 0x01, // Is it a inplace operation? (Thus, the input tensor can be the same as the output tensor). This is actually a stronger assumption than it seems. It says that the input tensors can be the same as any of the output tensors. Thus, input tensors of [a, b] and output tensors of [b, a] or [a, a] or [b, b] are perfectly supported if your compute node supports this flag.
22
  // Attributes that enable symbolic graph simplification
23
  CCV_NNC_CMD_ATTR_PASSTHROUGH  = 0x02, // This doesn't compute anything, but pass the first n tensors to the output (useful for backprop that is identical).
24
  CCV_NNC_CMD_ATTR_OUTPUT_ONES  = 0x04, // All the output tensors are 1s (unit).
25
  CCV_NNC_CMD_ATTR_NULL_IS_ONES = 0x08, // Accept nullptr input as if these are tensors with 1s (unit).
26
};
27
28
enum {
29
  CCV_NNC_ACCUMULATE_OUTPUT = 0x01, // Enable accumulate outputs.
30
  CCV_NNC_ZERO_MEMORY_ALLOC = 0x02, // Don't allocate any extra memory for this operation.
31
};
32
33
enum {
34
  CCV_NNC_EXEC_SUCCESS   = 0,
35
  CCV_NNC_EXEC_INVALID   = -1, // Invalid input.
36
  CCV_NNC_EXEC_NO_KERNEL = -2,
37
  CCV_NNC_EXEC_OOM       = -3,
38
};
39
40
typedef struct {
41
  struct {
42
    int dim[CCV_NNC_MAX_DIM_ALLOC];
43
  } size; /**< [size] The window size for the layer. For full connect layer, it is 1 because it is 1x1 convolutional layer with count of filters */
44
  union {
45
    struct {
46
      int count; /**< [convolution.count] The number of filters for convolutional layer. */
47
    } convolution;
48
    struct {
49
      int reserved;
50
    } pool;
51
    struct {
52
      float kappa; /**< [rnorm.kappa] As of b[i] = a[i] / (rnorm.kappa + rnorm.alpha * sum(a, i - rnorm.size / 2, i + rnorm.size / 2)) ^ rnorm.beta */
53
      float alpha; /**< [rnorm.alpha] See **rnorm.kappa**. */
54
      float beta; /**< [rnorm.beta] See **rnorm.kappa**. */
55
    } rnorm;
56
    struct {
57
      float a[3]; /**< BLAS scalars. */
58
      int count; /**< [blas.count] The number of outputs for blas layer. */
59
    } blas;
60
    void* userdata;
61
  };
62
} ccv_nnc_cmd_param_t;
63
64
typedef struct {
65
  struct {
66
    int dim[CCV_NNC_MAX_DIM_ALLOC];
67
  } stride;
68
  struct {
69
    int begin[CCV_NNC_MAX_DIM_ALLOC];
70
    int end[CCV_NNC_MAX_DIM_ALLOC];
71
  } border;
72
} ccv_nnc_hint_t;
73
74
typedef struct ccv_nnc_stream_context_s ccv_nnc_stream_context_t;
75
76
typedef struct ccv_nnc_cmd_s {
77
  uint32_t cmd;
78
  uint32_t backend;
79
  int algorithm;
80
  ccv_nnc_cmd_param_t info;
81
  // This has to be the same as the ccv_nnc_cmd_exec_f type.
82
  // This is for type CCV_NNC_COMPUTE_CUSTOM
83
  int(*exec)(const struct ccv_nnc_cmd_s cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* stream_context);
84
} ccv_nnc_cmd_t;
85
86
// For forward functions, the input tensors and output tensors can be arbitrary.
87
// However, for backward functions (backpropagation, or gradient functions in other libs),
88
// the input is: 0~m-1: gradient for output tensors, 1~n: input tensors for forward functions, n+1~n+m: output tensors for forward functions,
89
// the output is: 0~n-1: output gradients w.r.t. input tensors.
90
// Which input / output tensors can be ignored can be specified in the cmd config structs.
91
typedef int(*ccv_nnc_cmd_exec_f)(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* stream_context);
92
93
typedef int(*ccv_nnc_cmd_autotune_f)(const ccv_nnc_cmd_t cmd, const size_t max_workspace_size, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* stream_context);
94
95
static inline int ccv_nnc_tensor_nd(const int dim[CCV_NNC_MAX_DIM_ALLOC])
96
3.84k
{
97
3.84k
  int i;
98
13.4k
  for (i = 0; 
i < 13.4k
CCV_NNC_MAX_DIM_ALLOC13.4k
;
i++9.61k
)
99
13.4k
    
if (13.4k
dim[i] == 013.4k
)
100
3.84k
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
3.84k
}
Unexecuted instantiation: ccv_nnc_ew.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_tensor_tape.c:ccv_nnc_tensor_nd
ccv_nnc_ew_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
96
484
{
97
484
  int i;
98
968
  for (i = 0; 
i < 968
CCV_NNC_MAX_DIM_ALLOC968
;
i++484
)
99
968
    
if (968
dim[i] == 0968
)
100
484
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
484
}
ccv_nnc_max_pool_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
96
56
{
97
56
  int i;
98
224
  for (i = 0; 
i < 224
CCV_NNC_MAX_DIM_ALLOC224
;
i++168
)
99
224
    
if (224
dim[i] == 0224
)
100
56
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
56
}
ccv_nnc_avg_pool_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
96
26
{
97
26
  int i;
98
104
  for (i = 0; 
i < 104
CCV_NNC_MAX_DIM_ALLOC104
;
i++78
)
99
104
    
if (104
dim[i] == 0104
)
100
26
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
26
}
ccv_nnc_conv_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
96
1.36k
{
97
1.36k
  int i;
98
5.46k
  for (i = 0; 
i < 5.46k
CCV_NNC_MAX_DIM_ALLOC5.46k
;
i++4.10k
)
99
5.46k
    
if (5.46k
dim[i] == 05.46k
)
100
1.36k
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
1.36k
}
ccv_nnc_conv_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
96
680
{
97
680
  int i;
98
2.72k
  for (i = 0; 
i < 2.72k
CCV_NNC_MAX_DIM_ALLOC2.72k
;
i++2.04k
)
99
2.72k
    
if (2.72k
dim[i] == 02.72k
)
100
680
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
680
}
Unexecuted instantiation: ccv_nnc_softmax_cpu_ref.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_relu_cpu_ref.c:ccv_nnc_tensor_nd
ccv_nnc_gemm_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
96
52
{
97
52
  int i;
98
115
  for (i = 0; 
i < 115
CCV_NNC_MAX_DIM_ALLOC115
;
i++63
)
99
115
    
if (115
dim[i] == 0115
)
100
52
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
52
}
ccv_nnc_gemm_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
96
84
{
97
84
  int i;
98
168
  for (i = 0; 
i < 168
CCV_NNC_MAX_DIM_ALLOC168
;
i++84
)
99
168
    
if (168
dim[i] == 0168
)
100
84
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
84
}
ccv_nnc_axpy_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
96
5
{
97
5
  int i;
98
10
  for (i = 0; 
i < 10
CCV_NNC_MAX_DIM_ALLOC10
;
i++5
)
99
10
    
if (10
dim[i] == 010
)
100
5
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
5
}
ccv_nnc_util_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
96
32
{
97
32
  int i;
98
104
  for (i = 0; 
i < 104
CCV_NNC_MAX_DIM_ALLOC104
;
i++72
)
99
104
    
if (104
dim[i] == 0104
)
100
32
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
32
}
Unexecuted instantiation: ccv_nnc_graph_while.c:ccv_nnc_tensor_nd
ccv_nnc_pool.c:ccv_nnc_tensor_nd
Line
Count
Source
96
8
{
97
8
  int i;
98
32
  for (i = 0; 
i < 32
CCV_NNC_MAX_DIM_ALLOC32
;
i++24
)
99
32
    
if (32
dim[i] == 032
)
100
8
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
8
}
ccv_nnc_convolution.c:ccv_nnc_tensor_nd
Line
Count
Source
96
99
{
97
99
  int i;
98
431
  for (i = 0; 
i < 431
CCV_NNC_MAX_DIM_ALLOC431
;
i++332
)
99
431
    
if (431
dim[i] == 0431
)
100
99
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
99
}
_ccv_nnc_conv_cpu_4x4_3x3_winograd.c:ccv_nnc_tensor_nd
Line
Count
Source
96
212
{
97
212
  int i;
98
848
  for (i = 0; 
i < 848
CCV_NNC_MAX_DIM_ALLOC848
;
i++636
)
99
848
    
if (848
dim[i] == 0848
)
100
212
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
212
}
Unexecuted instantiation: _ccv_nnc_conv_cpu_fft.c:ccv_nnc_tensor_nd
Unexecuted instantiation: _ccv_nnc_conv_cpu_gemm.c:ccv_nnc_tensor_nd
_ccv_nnc_conv_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
96
156
{
97
156
  int i;
98
624
  for (i = 0; 
i < 624
CCV_NNC_MAX_DIM_ALLOC624
;
i++468
)
99
624
    
if (624
dim[i] == 0624
)
100
156
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
156
}
Unexecuted instantiation: ccv_nnc_softmax.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_relu.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_blas.c:ccv_nnc_tensor_nd
_ccv_nnc_gemm_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
96
54
{
97
54
  int i;
98
108
  for (i = 0; 
i < 108
CCV_NNC_MAX_DIM_ALLOC108
;
i++54
)
99
108
    
if (108
dim[i] == 0108
)
100
54
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
54
}
_ccv_nnc_gemm_cpu_sys.c:ccv_nnc_tensor_nd
Line
Count
Source
96
48
{
97
48
  int i;
98
96
  for (i = 0; 
i < 96
CCV_NNC_MAX_DIM_ALLOC96
;
i++48
)
99
96
    
if (96
dim[i] == 096
)
100
48
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
48
}
Unexecuted instantiation: ccv_nnc_util.c:ccv_nnc_tensor_nd
Unexecuted instantiation: symbolic.graph.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: backward.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: winograd.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: tape.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: tensor.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: forward.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: autograd.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: tfb.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: gradient.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: transform.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: while.backward.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: graph.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: autograd.vector.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: while.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: cudnn.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: graph.vgg.d.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: symbolic.graph.vgg.d.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: dense.net.tests.c:ccv_nnc_tensor_nd
ccv_nnc_cmd.c:ccv_nnc_tensor_nd
Line
Count
Source
96
443
{
97
443
  int i;
98
1.32k
  for (i = 0; 
i < 1.32k
CCV_NNC_MAX_DIM_ALLOC1.32k
;
i++879
)
99
1.32k
    
if (1.32k
dim[i] == 01.32k
)
100
443
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
443
}
ccv_nnc_tensor.c:ccv_nnc_tensor_nd
Line
Count
Source
96
44
{
97
44
  int i;
98
123
  for (i = 0; 
i < 123
CCV_NNC_MAX_DIM_ALLOC123
;
i++79
)
99
123
    
if (123
dim[i] == 0123
)
100
44
      return i;
101
0
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
102
44
}
Unexecuted instantiation: ccv_nnc_graph.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_compile.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_backward.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_while.c:ccv_nnc_tensor_nd
103
104
/**
105
 * Level-0 API
106
 */
107
108
void ccv_nnc_init(void);
109
110
/**
111
 * Level-1 API
112
 */
113
114
// For tensor
115
CCV_WARN_UNUSED(ccv_nnc_tensor_t*) ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags);
116
// Allocating on stack
117
CCV_WARN_UNUSED(ccv_nnc_tensor_t) ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags);
118
void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor);
119
CCV_WARN_UNUSED(ccv_nnc_tensor_view_t*) ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int dim[CCV_NNC_MAX_DIM_ALLOC]);
120
// Allocating on stack
121
CCV_WARN_UNUSED(ccv_nnc_tensor_view_t) ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int dim[CCV_NNC_MAX_DIM_ALLOC]);
122
void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view);
123
// All these functions afterwards should be compatible with both tensor and tensor view unless assertion.
124
void ccv_nnc_tensor_zero(void* const tensor);
125
int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b);
126
127
// For computation node
128
// Return high precision time unit.
129
uint64_t ccv_nnc_cmd_mono_time(void);
130
CCV_WARN_UNUSED(const char*) ccv_nnc_cmd_name(const uint32_t cmd);
131
CCV_WARN_UNUSED(const char*) ccv_nnc_cmd_backend_name(const uint32_t backend);
132
CCV_WARN_UNUSED(int) ccv_nnc_cmd_ok(const uint32_t cmd, const uint32_t backend);
133
CCV_WARN_UNUSED(ccv_nnc_cmd_t) ccv_nnc_cmd(const uint32_t cmd, ccv_nnc_cmd_exec_f exec, const ccv_nnc_cmd_param_t params, const int flags);
134
// Verify the hint
135
CCV_WARN_UNUSED(int) ccv_nnc_hint_verify(const ccv_nnc_hint_t hint, const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t a, const ccv_nnc_tensor_param_t b);
136
// Auto find the best hint for a given input / output (on forward pass only).
137
CCV_WARN_UNUSED(ccv_nnc_hint_t) ccv_nnc_hint_auto(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t a, const ccv_nnc_tensor_param_t b);
138
// Auto find the outputs for the given inputs / hint.
139
void ccv_nnc_hint_tensor_auto(const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size);
140
// Run autotune to find the best kernel and configuration for the given input, returned is the modified
141
// cmd that contains the updated configuration.
142
CCV_WARN_UNUSED(ccv_nnc_cmd_t) ccv_nnc_cmd_autotune(const ccv_nnc_cmd_t cmd, const size_t max_workspace_size, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* const stream_context);
143
CCV_WARN_UNUSED(int) ccv_nnc_cmd_bitmask(const ccv_nnc_cmd_t cmd, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size);
144
int ccv_nnc_cmd_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const ccv_nnc_stream_context_t* const stream_context);
145
CCV_WARN_UNUSED(int) ccv_nnc_cmd_attr(const ccv_nnc_cmd_t cmd, const int flags);
146
CCV_WARN_UNUSED(int) ccv_nnc_cmd_is_forward(const ccv_nnc_cmd_t cmd);
147
CCV_WARN_UNUSED(int) ccv_nnc_cmd_is_backward(const ccv_nnc_cmd_t cmd);
148
149
// Control flow constructs
150
// Follow heavily based along CUDA's stream / event idea.
151
enum {
152
  CCV_STREAM_CONTEXT_CPU = 0x1,
153
  CCV_STREAM_CONTEXT_GPU = 0x2,
154
};
155
5
#define CCV_STREAM_GET_CONTEXT(type) ((type) & 0x3)
156
#define CCV_STREAM_GET_DEVICE(type) ((type) & 0xff00)
157
#define CCV_STREAM_GET_DEVICE_ID(type) (CCV_STREAM_GET_DEVICE(type) >> 8)
158
// Flag is a combination of CPU / GPU and DEVICE_ID
159
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_nnc_stream_context_new(const int type);
160
void ccv_nnc_stream_context_wait(const ccv_nnc_stream_context_t* const stream);
161
void ccv_nnc_stream_context_free(ccv_nnc_stream_context_t* const stream_context);
162
163
typedef struct ccv_nnc_stream_signal_s ccv_nnc_stream_signal_t;
164
165
CCV_WARN_UNUSED(ccv_nnc_stream_signal_t*) ccv_nnc_stream_signal_new(const int type);
166
void ccv_nnc_stream_context_emit_signal(const ccv_nnc_stream_context_t* const stream, const ccv_nnc_stream_signal_t* const signal);
167
void ccv_nnc_stream_context_wait_signal(const ccv_nnc_stream_context_t* const stream, const ccv_nnc_stream_signal_t* const signal);
168
void ccv_nnc_stream_signal_free(ccv_nnc_stream_signal_t* const signal);
169
170
/**
171
 * Level-2 API
172
 */
173
174
enum {
175
  CCV_NNC_SHORT_DOT_GRAPH = 0x0,
176
  CCV_NNC_LONG_DOT_GRAPH  = 0x1,
177
};
178
179
typedef struct ccv_nnc_graph_s ccv_nnc_graph_t;
180
181
typedef struct {
182
  int32_t d; // This is int because sometimes I piggy-back on negatives to carry out some internal computations.
183
  const ccv_nnc_graph_t* graph;
184
} ccv_nnc_graph_exec_t;
185
186
275
#define CCV_NO_GRAPH_EXEC(exec) ((exec).graph == 0)
187
188
// Create an empty graph.
189
// Note that all graph mutation methods are not thread-safe.
190
// You should only operate the graph in serial fashion.
191
CCV_WARN_UNUSED(ccv_nnc_graph_t*) ccv_nnc_graph_new(void);
192
// Create a node with specific command execution, as well as its inputs & outputs.
193
// Underlying, the graph maintains the backing object for the node, and all you get is
194
// a on-stack object to index the backing object from the graph.
195
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_new(ccv_nnc_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
196
void ccv_nnc_graph_exec_set_hint(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_hint_t hint);
197
void ccv_nnc_graph_exec_set_io(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
198
// Broadcasts are the tensors that not directly involved in the computation, but its pointers need to get updated along with this exec, thus need to be "broadcast" to other exec nodes.
199
void ccv_nnc_graph_exec_add_broadcast(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const broadcast);
200
// Concatenate input graph nodes with an output graph node to create a new graph.
201
// Return non-zero if cannot concat successfully.
202
int ccv_nnc_graph_exec_concat(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination);
203
// Disconnect input graph nodes with an output graph nodes in this graph.
204
// Return non-zero if cannot disjoin successfully.
205
int ccv_nnc_graph_exec_disjoin(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination);
206
// Generate output that can be parsed by GraphViz (DOT language).
207
void ccv_nnc_graph_dot(const ccv_nnc_graph_t* const graph, const int flags, FILE* out);
208
// Run the autotune function for all the inputs / outputs, afterwards, assigning the optimized cmd back.
209
void ccv_nnc_graph_autotune(ccv_nnc_graph_t* const graph, const size_t max_workspace_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size);
210
// Run the graph from source nodes all the way to the destination nodes.
211
void ccv_nnc_graph_run(const ccv_nnc_graph_t* const graph, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size);
212
// The sources / destinations.
213
void ccv_nnc_graph_set_sources(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const sources, const int source_size);
214
ccv_nnc_graph_exec_t* ccv_nnc_graph_sources(const ccv_nnc_graph_t* const graph);
215
int ccv_nnc_graph_source_size(const ccv_nnc_graph_t* const graph);
216
void ccv_nnc_graph_set_destinations(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const destinations, const int destination_size);
217
ccv_nnc_graph_exec_t* ccv_nnc_graph_destinations(const ccv_nnc_graph_t* const graph);
218
int ccv_nnc_graph_destination_size(const ccv_nnc_graph_t* const graph);
219
// This graph, and its relevant auxiliary objects (opaque to user) are deallocated.
220
void ccv_nnc_graph_free(ccv_nnc_graph_t* const graph);
221
222
/**
223
 * Level-3 API
224
 */
225
226
typedef struct ccv_nnc_symbolic_graph_s ccv_nnc_symbolic_graph_t;
227
228
// Opaque pointer to an arena of allocated tensors.
229
typedef struct ccv_nnc_tensor_arena_s ccv_nnc_tensor_arena_t;
230
231
// Opaque pointer to an arena of allocated execs.
232
typedef struct ccv_nnc_graph_exec_arena_s ccv_nnc_graph_exec_arena_t;
233
234
typedef struct {
235
  ccv_nnc_tensor_param_t info;
236
  int32_t d;
237
  const ccv_nnc_symbolic_graph_t* graph;
238
} ccv_nnc_tensor_symbol_t;
239
240
typedef struct {
241
  int32_t d;
242
  const ccv_nnc_symbolic_graph_t* graph;
243
} ccv_nnc_graph_exec_symbol_t;
244
245
enum {
246
  CCV_NNC_SYM_TENSOR_INIT_ZEROS = 0x01, // Initialize underlying tensor for the symbol with zeros
247
};
248
249
// Create an empty symbolic graph.
250
// Note that all graph mutation methods are not thread-safe.
251
// You should only operate the graph in serial fashion.
252
253
// Create a new symbolic graph. It is an opaque data structure that maintains the whole graph of computation in its symbolic form.
254
CCV_WARN_UNUSED(ccv_nnc_symbolic_graph_t*) ccv_nnc_symbolic_graph_new(void);
255
// Create an tensor symbol (thus, with no actual memory space allocation) in a symbolic graph.
256
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_param_t info, const char* const name);
257
// Create an alias to the tensor symbol as tensor view (thus, pointing to the same memory region, but with a different header info and offset).
258
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_alias_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info, const char* const name);
259
// For a given alias, this method resolve to referenced tensor symbol.
260
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_resolve_alias(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_alias);
261
// Set the peer reference.
262
void ccv_nnc_tensor_symbol_set_peer(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol, const ccv_nnc_tensor_symbol_t peer_tensor_symbol);
263
// Pass graph's tensor symbol into its sub graph.
264
void ccv_nnc_tensor_symbol_pass(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_t* const sub_graph, const ccv_nnc_tensor_symbol_t tensor_symbol, const ccv_nnc_tensor_symbol_t sub_tensor_symbol);
265
// Create a graph node (an operation that takes a set of inputs and generates a set of outputs).
266
ccv_nnc_graph_exec_symbol_t ccv_nnc_graph_exec_symbol_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name);
267
// Return the command on this exec symbol
268
CCV_WARN_UNUSED(ccv_nnc_cmd_t) ccv_nnc_graph_exec_symbol_cmd(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec);
269
// The operation defaults to use `ccv_nnc_hint_auto` find the best hints for a set of inputs / outputs.
270
// However, you can also set your own hints. Return non-zero if cannot set successfully.
271
int ccv_nnc_graph_exec_symbol_set_hint(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const ccv_nnc_hint_t hint);
272
void ccv_nnc_graph_exec_symbol_set_io(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size);
273
// Set the tensor symbol info again. Thus, its dimensionality depends on the tensor input.
274
int ccv_nnc_tensor_symbol_set(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, const ccv_nnc_tensor_param_t info);
275
// Set the flags for this tensor symbol. The flags are only used for symbol, not for tensor.
276
int ccv_nnc_tensor_symbol_set_flags(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, const int flags);
277
CCV_WARN_UNUSED(int) ccv_nnc_tensor_symbol_flag(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, const int flags);
278
// Manually concatenate input graph nodes with an output graph node to create a new graph.
279
// Return non-zero if cannot concat successfully.
280
int ccv_nnc_graph_exec_symbol_concat(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t source, const ccv_nnc_graph_exec_symbol_t destination);
281
// Manually disconnect input graph nodes with an output graph node for this graph.
282
// Return non-zero if cannot disjoin successfully.
283
int ccv_nnc_graph_exec_symbol_disjoin(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t source, const ccv_nnc_graph_exec_symbol_t destination);
284
// Automatic concatenate these nodes together based on its inputs / outputs.
285
// Return non-zero if cannot figure out.
286
// Imagining this is to generate the execution flow based on input tensors and output tensors.
287
// nil for execs and 0 for exec_size means to loop over all the execs on the graph and autogen.
288
enum {
289
  CCV_NNC_AUTOGEN_ALL_EXECS = 0x1,
290
  CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS = 0x2,
291
};
292
int ccv_nnc_graph_exec_symbol_autogen(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const execs, const int exec_size, const int flags);
293
// Generate a duplicate of the provided graph.
294
// While generating the duplicate, it calls the function pointer to re-process the node type.
295
typedef ccv_nnc_cmd_t(*ccv_nnc_symbolic_graph_subst_f)(const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd);
296
CCV_WARN_UNUSED(ccv_nnc_symbolic_graph_t*) ccv_nnc_symbolic_graph_dup(const ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_subst_f subst);
297
// The source / destination generated by the autogen.
298
void ccv_nnc_symbolic_graph_set_sources(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size);
299
ccv_nnc_graph_exec_symbol_t* ccv_nnc_symbolic_graph_sources(const ccv_nnc_symbolic_graph_t* const graph);
300
int ccv_nnc_symbolic_graph_source_size(const ccv_nnc_symbolic_graph_t* const graph);
301
void ccv_nnc_symbolic_graph_set_destinations(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
302
ccv_nnc_graph_exec_symbol_t* ccv_nnc_symbolic_graph_destinations(const ccv_nnc_symbolic_graph_t* const graph);
303
int ccv_nnc_symbolic_graph_destination_size(const ccv_nnc_symbolic_graph_t* const graph);
304
// Generate output that can be parsed by GraphViz (DOT language).
305
void ccv_nnc_symbolic_graph_dot(const ccv_nnc_symbolic_graph_t* const graph, const int flags, FILE* out);
306
307
typedef struct {
308
  ccv_nnc_tensor_symbol_t symbol;
309
  const ccv_nnc_tensor_t* tensor;
310
} ccv_nnc_tensor_bind_t;
311
312
// Compile a symbolic graph into a graph that can be executed, and a set of tensors (opaque data structure tensor arena) are allocated based on which tensor symbols are the input and which are the outputs. The tensor allocation is done to minimize the required storage.
313
// tensor_binds provide custom binding for these tensors. You still responsible to manage the life-time of these tensors.
314
void ccv_nnc_symbolic_graph_compile(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_bind_t* const tensor_binds, const int tensor_binds_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, ccv_nnc_graph_t** const graph_ref, ccv_nnc_tensor_arena_t** const tensor_arena_ref, ccv_nnc_graph_exec_arena_t** const graph_exec_arena_ref);
315
// Free the symbolic graph and its associated memory. Note that if you compiled a graph / tensor arena out of this symbolic graph, these won't be free'd.
316
void ccv_nnc_symbolic_graph_free(ccv_nnc_symbolic_graph_t* const graph);
317
// Find corresponding tensor by a symbol from the tensor arena.
318
CCV_WARN_UNUSED(ccv_nnc_tensor_t*) ccv_nnc_tensor_from_symbol(const ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_tensor_symbol_t symbol);
319
// Bind a tensor to a symbol. You still responsible to manage the life-time of the tensor to make sure it is not freed until everything is done.
320
int ccv_nnc_tensor_bind_symbol(const ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_t* const tensor);
321
// Free the opaque tensor arena structure.
322
void ccv_nnc_tensor_arena_free(ccv_nnc_tensor_arena_t* const tensor_arena);
323
// Find corresponding graph exec by a exec symbol from graph exec arena.
324
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_from_symbol(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const ccv_nnc_graph_exec_symbol_t symbol);
325
// Return the node that can drive all the source nodes from the compilation.
326
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_source(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena);
327
// Return the node that can drain all the destination nodes from the compilation.
328
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_destination(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena);
329
// Free the opaque graph exec arena structure.
330
void ccv_nnc_graph_exec_arena_free(ccv_nnc_graph_exec_arena_t* const graph_exec_arena);
331
332
/**
333
 * Level-4 API
334
 */
335
// Compute the backward graph, assuming the provided symbolic graph only contain the "forward" part from sources to destinations.
336
// This effectively is called the "autograd" or automatic differentiation process (specifically, "reverse AD") in other libs.
337
void ccv_nnc_symbolic_graph_backward(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, const ccv_nnc_tensor_symbol_t* const f_symbols, const int f_symbol_size, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size);
338
// Get the symbol that contains the gradient. The list will be flushed if the ccv_nnc_symbolic_graph_backward function is called again.
339
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_for_backward(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol);
340
// This has to get the exec symbol from the tensor.
341
CCV_WARN_UNUSED(ccv_nnc_graph_exec_symbol_t) ccv_nnc_graph_exec_symbol_for_backward(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol);
342
343
// Construct a "while" loop in a symbolic graph.
344
//
345
// In NNC, a computation graph cannot allow cycles. Thus, there is no flexible way to express loops.
346
//
347
// A little survey on this problem:
348
//
349
// Caffe2 supports specific type of recurrent neural network.
350
// TensorFlow as it stands, supports while construct. Its while construct is very straightforward, a body and a condition is provided, you can construct whatever graph as you want.
351
// MxNet supports recurrent neural network by unrolling it into normal none-looped graph.
352
// Theano supports "scan" ops, which is a terminable loop (with loop variant, known as sequence).
353
// CNTK supports this with custom BrainScript. Within BrainScript, you can access the previous state in a function, therefore, effectively supports calling a method multiple times (looping over).
354
//
355
// Of above, Caffe2 and MxNet gave up on supporting generic loop for performance reasons.
356
// TensorFlow supports generic while loop, with all the trouble it may introduce (see the Nested while loop bug in TensorFlow that recently fixed).
357
// Theano picked a point seems pretty sweet, although there are limitations.
358
// CNTK's BrainScript is a DSL, they can do whatever they want with the drawback now that they need to implement a language runtime.
359
// TensorFlow, Theano and CNTK all support auto-differentiation over the while loop with tape (Wengert list).
360
//
361
// A simple way to support loop is to support conditional jump. In fact, conditional jump is a more generic way of doing loops. However,
362
// if you put this into the consideration that fully differentiable computation graph wanna to be supported, it is terrible. With conditional
363
// jump, it is really hard for you to know which tensor is used where, thus keep track for reverse accumulation (backward propagation). There
364
// is no counter or whatsoever, it is pretty hard to trace back on which line is executed how many times. Compounding this with NNC's promise
365
// that as long as it shows on the graph can be "parallel" computed, it will be parallel computed, it is close to impossible to track if
366
// conditional jump used in its raw form. Certain restrictions must be applied to how to do the loop. The compromise comes from closer
367
// examination of NNC's preferences.
368
//
369
// NNC prefers to have the graph without cycles. It also prefers to be fully differentiable. Another important criteria is that most
370
// functions in NNC require SSA (Static Single Assignment) representation. With these in mind, supporting while loop has to be strict.
371
//
372
// Luckily, there are well-formalized way of supporting this in literature and practice. Because it is well-formalized, translating this
373
// into existing NNC implementation is actually pretty straightforward. We are going to introduce a special version of while loop. In
374
// literature that discussed about SSA, it may be called parameterized loop. For us, it works like this:
375
//
376
// To construct a while loop for existing NNC graph, you need to be able to separate the existing graph into two sub-graphs.
377
//
378
// The while-loop sub-graph (WL sub-graph) contains a set of incoming nodes (I-nodes), Condition false output nodes (CFO-nodes) and end nodes (E-nodes).
379
// Each set have its own properties, but in short, all incoming edges to the WL sub-graph connect to one of the I-nodes, but nothing else. All outgoing
380
// edges from the WL sub-graph connect to one of the CFO-nodes, but nothing else. A nodes can be either a I-node, CFO-node or E-node, non-exclusively.
381
// There are also 3 types of tensors used for all nodes in WL sub-graph: Input tensors (I-tensors) are tensors that are inputs to some nodes, and will
382
// never be outputs. Output tensors (O-tensors) are tensors that are outputs from some nodes, but never be inputs to any nodes. I-tensors can be outputs
383
// from some nodes that outside of WL sub-graph. O-tensors can be inputs to some nodes that outside of WL sub-graph. Internal tensors (IN-tensors) are
384
// not visible outside of WL sub-graph, therefore, they can be both inputs and outputs of some nodes inside the sub-graph. Some tensors can be feedback
385
// into the WL sub-graph, given either O-tensors or IN-tensors. A parameter map can be given in these cases to describe which maps to what.
386
//
387
// The way to drive a WL sub-graph like this: the WL sub-graph runs until all CFO-nodes are reached. At this point, the while_f condition is checked.
388
// If true, we continue until all the end-nodes are reached. At this point, we increase the counter, reconfigure the WL sub-graph with parameter map,
389
// and run from I-nodes all over again. When reached all CFO-nodes, the condition is checked again, if false, WL sub-graph terminates, and the graph
390
// continues from the nodes that are pointed by CFO-nodes.
391
//
392
// Given these constraints, doing automatic differentiation is not that hard any more. A WL sub-graph, from the whole graph's point of view, is just
393
// a giant command supports both forward / backward operations, with some extra information passed around in the form of userdata (tape).
394
//
395
// For WL sub-graph, we can continue to leverage the compile / backward function that already written for symbolic graph as well.
396
// For compile function, we just need to take care of parameter maps (these need to be converted into binded tensors).
397
// For backward function, we need to convert parameter maps from assigner (thus, y = x) to accumulator (x += y).
398
//
399
// This function will replace the nodes that it affects to one sub-graph node.
400
// Thus, how to drive this sub-graph is opaque. Its backward form is opaque as well.
401
//
402
// There are no connection between its nodes and the outside graph nodes other than the
403
// three sets:
404
// 1). Incoming nodes, the set of nodes that contains the incoming edges from outside, they cannot have edges points by inside nodes. The sub-graph computation starts from these incoming nodes;
405
// 2). Condition false output nodes, when condition is false, we will break out of this while loop, these nodes pointing to the outside nodes, but no inside nodes;
406
// 3). End nodes, the set of nodes that marks the end of the while body, and after these nodes are executed, we will return to the incoming nodes. These end nodes shouldn't have any edges pointing to inside nodes (OK if end nodes are condition true output nodes as well);
407
//
408
// Since these will become a sub-graph (which, to its owner graph, just simple "node"), it will have inputs and outputs. Besides that, the loop body needs to be parameterized to be SSA compliant (see: https://www.cs.cmu.edu/~fp/courses/15411-f13/lectures/06-ssa.pdf). Thus, a list of body parameters need to be provided.
409
410
// The given tensors contains all the common / input / output tensors specified in the sub-graph.
411
// Currently, the special_size should always be 1, and contains only the loop counter.
412
typedef int(*ccv_nnc_graph_while_f)(ccv_nnc_tensor_t* const* const commons, const int common_size, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, const void* const data);
413
// Opaque pointer to the tape of tensors. The tape are used by the while loop.
414
typedef struct ccv_nnc_tensor_tape_s ccv_nnc_tensor_tape_t;
415
CCV_WARN_UNUSED(ccv_nnc_tensor_tape_t*) ccv_nnc_tensor_tape_new(void);
416
void ccv_nnc_tensor_tape_io(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
417
uint64_t ccv_nnc_tensor_tape_while_count(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph);
418
void ccv_nnc_tensor_tape_set_while_count(ccv_nnc_tensor_tape_t* const tape, ccv_nnc_graph_t* const graph, const uint64_t while_count);
419
void ccv_nnc_tensor_tape_free(ccv_nnc_tensor_tape_t* const tape);
420
// Augmented function to run a graph with while loop (An obvious example is dynamic RNN).
421
27
#define CCV_NNC_MAX_INLINE_UNROLL (4)
422
typedef struct ccv_nnc_tensor_multiview_s {
423
  // This is an augmented ccv_nnc_tensor_view_t
424
  // Namely, it can point to multiple versions of tensors.
425
  int type; // This type is CCV_NNC_TENSOR_MULTI_VIEW
426
  // kind specified how the multi-version tensors stored.
427
  // See the comment on the follow up enums.
428
  uint8_t kind;
429
  uint16_t repeat;
430
  intptr_t anchor; // on which graph this multi-view tensor is wrapped. This helps to determine on which level the multi-view tensor should be unwrapped.
431
  // If this tensor points to a tensor view, data.u8 - offset is the real pointer start.
432
  off_t offset;
433
  struct ccv_nnc_tensor_multiview_s* p; // If this is wrapped with another multiview tensor. Get to the parent one.
434
  ccv_nnc_tensor_t* it; // Current tensor (tensor in use), this is updated along with the graph computation.
435
  // This is useful because by just traverse tv, I can get the latest up-to-date reference to this multi-view tensor.
436
  ccv_array_t* rtvs; // Referenced tensor view array. This corresponds to ccv_nnc_tensor_reference_to_multiview method, that records all the tensors registered for updates.
437
  ccv_nnc_tensor_t* _inline_data[CCV_NNC_MAX_INLINE_UNROLL];
438
  ccv_nnc_tensor_t** _heap_data;
439
} ccv_nnc_tensor_multiview_t;
440
859
#define CCV_NNC_MULTIVIEW_DATA(x) 
((x)->_heap_data ? 859
(x)->_heap_data0
:
(x)->_inline_data859
)
441
442
enum {
443
  CCV_NNC_MULTIVIEW_K0N = 0, // All of them are repeated.
444
  CCV_NNC_MULTIVIEW_K1N = 1, // The first one is the first, the second one starts to repeat. (0111111...)
445
};
446
#define CCV_NNC_MULTIVIEW_K01(x) ((x)->kind == CCV_NNC_MULTIVIEW_K0N && (x)->repeat == 1)
447
// Setup a tensor multiview with a given set of tensors.
448
void ccv_nnc_tensor_multiview(ccv_nnc_tensor_t* data[], const uint8_t kind, const uint16_t repeat, const ccv_nnc_graph_t* const graph, ccv_nnc_tensor_multiview_t* const tensor_multiview);
449
// Since tensor_multiview will never be allocated with *_new method, the *_free method simply frees anything that is dynamically allocated afterwards (such as the reference items).
450
void ccv_nnc_tensor_multiview_free(const ccv_nnc_tensor_multiview_t tensor_multiview);
451
// Setup a tensor as a reference to a tensor multiview, thus, when tensor multiview's tu (current tensor) updates, the tensor reference's data.u8 will get update as well (point to the same memory region as the tu).
452
void ccv_nnc_tensor_reference_to_multiview(ccv_nnc_tensor_multiview_t* const tensor_multiview, ccv_nnc_tensor_t* const tensor);
453
// Constructing looped concrete graph. Note that this interface is a little bit simpler than the one for symbolic graph. The reason
454
// is that a concrete graph operates on allocated tensors, thus, there is no mapping of tensor symbols between the parent graph
455
// and the while graph. (The reason to have a mapping in symbolic graphs is to constraint the variable leaking between the sub graph
456
// and parent graph).
457
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_while(ccv_nnc_graph_t* const graph, uint32_t cmd, ccv_nnc_graph_t* const while_graph);
458
CCV_WARN_UNUSED(ccv_nnc_graph_t*) ccv_nnc_graph_from_graph_exec(const ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_t exec);
459
void ccv_nnc_graph_set_while_expr(ccv_nnc_graph_t* const while_graph, const ccv_nnc_graph_while_f while_expr, const void* const while_data, const ccv_nnc_graph_exec_t* const breakpoints, const int breakpoint_size);
460
// In that case, the computation graph still has no loops or cycles, but you can run it multiple times against different
461
// versions of the tensors until the condition not met (thus, the tensor is versioned, so you can "backpropagate through time").
462
int ccv_nnc_graph_while_run(ccv_nnc_graph_t* const graph, ccv_nnc_tensor_tape_t* const tensor_tape, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size);
463
464
// The API to operate on the symbolic graph is more involved than the concrete graph for while loops.
465
// The reason is because symbolic graph operates in SSA form (static single assignment), therefore, the while loops
466
// for the symbolic graph has to be parameterized.
467
468
// Return a while exec symbol (backed by a sub-graph) of the giving graph. The exec nodes on the way from sources to destinations will be moved from the giving graph to the sub-graph.
469
ccv_nnc_graph_exec_symbol_t ccv_nnc_symbolic_graph_while(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_t* const while_graph, const char* const name);
470
// Set the expression to be evaluated, and at which nodes to be evaluated.
471
void ccv_nnc_symbolic_graph_set_while_expr(ccv_nnc_symbolic_graph_t* const while_graph, const ccv_nnc_graph_while_f while_expr, const void* const while_data, const ccv_nnc_graph_exec_symbol_t* const breakpoints, const int breakpoint_size);
472
473
typedef struct {
474
  ccv_nnc_tensor_symbol_t source;
475
  ccv_nnc_tensor_symbol_t destination;
476
} ccv_nnc_tensor_symbol_map_t;
477
478
// Set the loop parameters when reuse. (parameterized loop).
479
void ccv_nnc_symbolic_graph_set_while_params(ccv_nnc_symbolic_graph_t* const while_graph, const ccv_nnc_tensor_symbol_map_t* const symbol_map, const int symbol_map_size);
480
// Retrieve the special (magical) tensor symbol that retains the while loop counter (thus, dimension of 1x1x1, CCV_64S type).
481
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_for_while_count(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t while_symbol);
482
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_find_tensor_symbol_from_graph(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol);
483
// Extract the sub-graph of the while loop from a symbol.
484
CCV_WARN_UNUSED(ccv_nnc_symbolic_graph_t*) ccv_nnc_symbolic_graph_from_while_symbol(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t while_symbol);
485
486
#endif