Coverage Report

Created: 2019-07-03 22:50

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc.h
Line
Count
Source (jump to first uncovered line)
1
/**********************************************************
2
 * C-based/Cached/Core Computer Vision Library
3
 * Liu Liu, 2010-02-01
4
 **********************************************************/
5
6
/**********************************************************
7
 * CCV - Neural Network Collection
8
 **********************************************************/
9
10
#ifndef GUARD_ccv_nnc_h
11
#define GUARD_ccv_nnc_h
12
13
#include <ccv.h>
14
#include <stddef.h>
15
16
// These are generated by cmd/build-cmd.rb
17
#include "cmd/ccv_nnc_cmd.h"
18
#include "cmd/ccv_nnc_backend.h"
19
20
/**
21
 * @defgroup level_0 Level-0 API
22
 * @{
23
 */
24
25
/**
26
 * Initialize the library.
27
 */
28
void ccv_nnc_init(void);
29
30
/** @} */
31
32
/**
33
 * @defgroup level_1 Level-1 API
34
 * @{
35
 */
36
37
/**
38
 * @defgroup level_1_cmd Commands
39
 * @{
40
 */
41
enum {
42
  // Attributes that enable symbolic graph simplification
43
  CCV_NNC_CMD_ATTR_PASSTHROUGH  = 0x01, /**< This doesn't compute anything, but pass the first n tensors to the output (useful for backprop that is identical). */
44
  CCV_NNC_CMD_ATTR_OUTPUT_ONES  = 0x02, /**< All the output tensors are 1s (unit). */
45
  CCV_NNC_CMD_ATTR_NULL_IS_ONES = 0x04, /**< Accept nullptr input as if these are tensors with 1s (unit). */
46
};
47
48
// Flags pass into cmd when executing.
49
enum {
50
  CCV_NNC_ACCUMULATE_OUTPUT = 0x01, /**< Enable accumulate outputs (unsupported). */
51
  CCV_NNC_ZERO_MEMORY_ALLOC = 0x02, /**< Don't allocate any extra memory for this operation. */
52
};
53
54
enum {
55
  CCV_NNC_EXEC_SUCCESS   = 0, /**< Successfully executed the command. */
56
  CCV_NNC_EXEC_INVALID   = -1, /**< Invalid inputs. */
57
  CCV_NNC_EXEC_NO_KERNEL = -2, /**< No kernel available for a given command / backend. */
58
  CCV_NNC_EXEC_OOM       = -3, /**< Out of memory error. */
59
};
60
61
/**
62
 * Parameters for command.
63
 */
64
typedef struct {
65
  struct {
66
    int dim[CCV_NNC_MAX_DIM_ALLOC]; /**< [size.dim] The window size for the layer. For full connect layer, it is 1 because it is 1x1 convolutional layer with count of filters */
67
  } size;
68
  union {
69
    struct {
70
      int count; /**< [convolution.count] The number of filters for convolutional layer. */
71
      int groups; /**< [convolution.groups] The number of groups for convolutional layer. */
72
    } convolution;
73
    struct {
74
      int reserved; /**< [pool.reserved] A reserved field. */
75
    } pool;
76
    struct {
77
      float kappa; /**< [rnorm.kappa] As of b[i] = a[i] / (rnorm.kappa + rnorm.alpha * sum(a, i - rnorm.size / 2, i + rnorm.size / 2)) ^ rnorm.beta */
78
      float alpha; /**< [rnorm.alpha] See **rnorm.kappa**. */
79
      float beta; /**< [rnorm.beta] See **rnorm.kappa**. */
80
    } rnorm;
81
    struct {
82
      int axis[CCV_NNC_MAX_DIM_ALLOC]; /**< [bnorm.axis[]] The axis selected to compute mean / variance. */
83
      int count; /**< [bnorm.count] The number of axis selected. */
84
      float epsilon; /**< [bnorm.epsilon] The epsilon for standard derivation. */
85
      int is_test; /**< [bnorm.is_test] Whether in test mode. */
86
      float momentum; /**< [bnorm.momentum] running_mean = running_mean * momentum + mean * (1 - momentum). */
87
    } bnorm;
88
    struct {
89
      int nesterov; /**< [minimize.nesterov] Nesterov accelerated gradient. */
90
      float rate; /**< [minimize.rate] The learning rate. */
91
      float scale; /**< [minimize.scale] The scale to be applied to the gradient before doing any minimization. */
92
      float decay; /**< [minimize.decay] This is the weight decay parameter, which represents L2 regularization after momentum applied. */
93
      float momentum; /**< [minimize.momentum] For SGD, this follows http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf. */
94
      float dampening; /**< [minimize.dampening] This usually == momentum, however, it can be changed. */
95
    } minimize;
96
    struct {
97
      float a[3]; /**< [blas.a[3]] BLAS scalars. */
98
      int count; /**< [blas.count] The number of outputs for blas layer. */
99
    } blas;
100
    struct {
101
      int axis[CCV_NNC_MAX_DIM_ALLOC]; /**< [reduce.axis[]] The axis selected to reduce. */
102
      int count; /**< [reduce.count] The number of axis selected. */
103
    } reduce;
104
    struct {
105
      float p; /**< [dropout.p] Dropout probability. */
106
    } dropout;
107
    void* userdata;
108
  };
109
} ccv_nnc_cmd_param_t;
110
111
/*
112
 * Hints for command.
113
 */
114
typedef struct {
115
  struct {
116
    int dim[CCV_NNC_MAX_DIM_ALLOC]; /**< Stride for each dimension. */
117
  } stride;
118
  struct {
119
    int begin[CCV_NNC_MAX_DIM_ALLOC]; /**< Padding at the beginning of a dimension. */
120
    int end[CCV_NNC_MAX_DIM_ALLOC]; /**< Padding at the end of a dimension. */
121
  } border;
122
} ccv_nnc_hint_t;
123
124
/**
125
 * Opaque pointer to a stream object.
126
 */
127
typedef struct ccv_nnc_stream_context_s ccv_nnc_stream_context_t;
128
129
typedef struct ccv_nnc_cmd_s {
130
  uint32_t cmd; /**< The identifier for command. */
131
  uint32_t backend; /**< The identifier for backend. */
132
  int algorithm; /**< The algorithm selector (as defined by backend). */
133
  ccv_nnc_cmd_param_t info; /**< The command parameters. */
134
  /**
135
   * This has to be the same as the ccv_nnc_cmd_exec_f type.
136
   * This is for type CCV_NNC_CUSTOM_FORWARD / CCV_NNC_CUSTOM_BACKWARD
137
   */
138
  int(*exec)(const struct ccv_nnc_cmd_s cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
139
} ccv_nnc_cmd_t;
140
141
/**
142
 * For forward functions, the input tensors and output tensors can be arbitrary.
143
 * However, for backward functions (backpropagation, or gradient functions in other libs),
144
 * the input is: 0~m-1: gradient for output tensors, 1~n: input tensors for forward functions, n+1~n+m: output tensors for forward functions,
145
 * the output is: 0~n-1: output gradients w.r.t. input tensors.
146
 * Which input / output tensors can be ignored can be specified in the cmd config structs.
147
 */
148
typedef int(*ccv_nnc_cmd_exec_f)(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
149
150
/**
151
 * The function prototype for autotune. The only difference is the max_workspace_size.
152
 * Whoever implement this function prototype means we handled over autotune task to the
153
 * command itself, you are responsible to select the best algorithm.
154
 * @return The selected algorithm.
155
 */
156
typedef int(*ccv_nnc_cmd_autotune_f)(const ccv_nnc_cmd_t cmd, const size_t max_workspace_size, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
157
158
/** @} */
159
160
/**
161
 * @defgroup level_1_tensor Tensors
162
 * @{
163
 */
164
165
/**
166
 * Count the dimensionality of a tensor.
167
 */
168
static inline int ccv_nnc_tensor_nd(const int dim[CCV_NNC_MAX_DIM_ALLOC])
169
3.64M
{
170
3.64M
  int i;
171
14.0M
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++10.3M
)
172
14.1M
    if (dim[i] == 0)
173
3.70M
      return i;
174
18.4E
  return CCV_NNC_MAX_DIM_ALLOC;
175
3.64M
}
Unexecuted instantiation: while.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: case_of.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: crossentropy.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: backward.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: simplify.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: rand.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: dropout.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: winograd.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: tape.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: broadcast.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: tensor.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: dataframe.addons.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: numa.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: case_of.backward.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: forward.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: autograd.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: tfb.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: custom.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: dataframe.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: gradient.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: transform.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: graph.io.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: batch.norm.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: tensor.bind.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: symbolic.graph.compile.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: dynamic.graph.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: cnnp.core.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: minimize.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: while.backward.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: graph.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: parallel.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: autograd.vector.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: reduce.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: symbolic.graph.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: cudnn.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: cublas.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: nccl.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: schedule.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: graph.vgg.d.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: symbolic.graph.vgg.d.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: cifar.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: dense.net.tests.c:ccv_nnc_tensor_nd
ccv_nnc_cmd.c:ccv_nnc_tensor_nd
Line
Count
Source
169
24.9k
{
170
24.9k
  int i;
171
75.3k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++50.4k
)
172
75.3k
    if (dim[i] == 0)
173
24.9k
      return i;
174
24.9k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
24.9k
}
ccv_nnc_tensor.c:ccv_nnc_tensor_nd
Line
Count
Source
169
70
{
170
70
  int i;
171
199
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++129
)
172
199
    if (dim[i] == 0)
173
70
      return i;
174
70
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
70
}
Unexecuted instantiation: ccv_nnc_stream.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_graph.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_io.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_compile.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_backward.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_while.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_graph_while.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_tensor_tape.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_case_of.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_graph_case_of.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_minimize.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_parallel.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_simplify.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_graph_run.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_dynamic_graph.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_dynamic_graph_backward.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_dynamic_graph_minimize.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_cnnp_dataframe.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_cnnp_dataframe_core.c:ccv_nnc_tensor_nd
ccv_cnnp_dataframe_addons.c:ccv_nnc_tensor_nd
Line
Count
Source
169
3.22M
{
170
3.22M
  int i;
171
12.7M
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++9.56M
)
172
12.8M
    if (dim[i] == 0)
173
3.29M
      return i;
174
18.4E
  return CCV_NNC_MAX_DIM_ALLOC;
175
3.22M
}
Unexecuted instantiation: ccv_cnnp_model.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_cnnp_model_io.c:ccv_nnc_tensor_nd
ccv_cnnp_model_core.c:ccv_nnc_tensor_nd
Line
Count
Source
169
156
{
170
156
  int i;
171
748
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++592
)
172
748
    if (dim[i] == 0)
173
156
      return i;
174
156
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
156
}
ccv_nnc_categorical_crossentropy_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
9
{
170
9
  int i;
171
24
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++15
)
172
24
    if (dim[i] == 0)
173
9
      return i;
174
9
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
9
}
ccv_nnc_reduce_sum_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
18.1k
{
170
18.1k
  int i;
171
54.5k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++36.4k
)
172
54.5k
    if (dim[i] == 0)
173
18.1k
      return i;
174
18.1k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
18.1k
}
ccv_nnc_reduce_max_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
14
{
170
14
  int i;
171
28
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++14
)
172
28
    if (dim[i] == 0)
173
14
      return i;
174
14
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
14
}
ccv_nnc_util_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
36
{
170
36
  int i;
171
114
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++78
)
172
114
    if (dim[i] == 0)
173
36
      return i;
174
36
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
36
}
ccv_nnc_sgd_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
94.4k
{
170
94.4k
  int i;
171
264k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++169k
)
172
264k
    if (dim[i] == 0)
173
94.4k
      return i;
174
94.4k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
94.4k
}
Unexecuted instantiation: ccv_nnc_relu_cpu_ref.c:ccv_nnc_tensor_nd
ccv_nnc_softmax_crossentropy_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
1.21k
{
170
1.21k
  int i;
171
3.05k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.83k
)
172
3.05k
    if (dim[i] == 0)
173
1.21k
      return i;
174
1.21k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
1.21k
}
ccv_nnc_softmax_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
512
{
170
512
  int i;
171
1.03k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++527
)
172
1.03k
    if (dim[i] == 0)
173
512
      return i;
174
512
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
512
}
ccv_nnc_conv_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
4.41k
{
170
4.41k
  int i;
171
20.7k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++16.3k
)
172
20.7k
    if (dim[i] == 0)
173
4.41k
      return i;
174
4.41k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
4.41k
}
ccv_nnc_conv_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
169
3.24k
{
170
3.24k
  int i;
171
15.3k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++12.0k
)
172
15.3k
    if (dim[i] == 0)
173
3.24k
      return i;
174
3.24k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
3.24k
}
ccv_nnc_gemm_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
10.4k
{
170
10.4k
  int i;
171
31.1k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++20.7k
)
172
31.1k
    if (dim[i] == 0)
173
10.4k
      return i;
174
10.4k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
10.4k
}
ccv_nnc_gemm_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
169
26.3k
{
170
26.3k
  int i;
171
78.7k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++52.4k
)
172
78.7k
    if (dim[i] == 0)
173
26.3k
      return i;
174
26.3k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
26.3k
}
ccv_nnc_add_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
53.9k
{
170
53.9k
  int i;
171
161k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++107k
)
172
161k
    if (dim[i] == 0)
173
53.9k
      return i;
174
53.9k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
53.9k
}
ccv_nnc_mul_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
179
{
170
179
  int i;
171
450
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++271
)
172
450
    if (dim[i] == 0)
173
179
      return i;
174
179
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
179
}
ccv_nnc_batch_norm_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
381
{
170
381
  int i;
171
1.12k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++747
)
172
1.12k
    if (dim[i] == 0)
173
381
      return i;
174
381
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
381
}
ccv_nnc_ew_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
139k
{
170
139k
  int i;
171
408k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++268k
)
172
408k
    if (dim[i] == 0)
173
139k
      return i;
174
139k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
139k
}
Unexecuted instantiation: ccv_nnc_rand_uniform_cpu_ref.c:ccv_nnc_tensor_nd
ccv_nnc_max_pool_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
2.12k
{
170
2.12k
  int i;
171
10.5k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++8.46k
)
172
10.5k
    if (dim[i] == 0)
173
2.12k
      return i;
174
2.12k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
2.12k
}
ccv_nnc_avg_pool_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
2.73k
{
170
2.73k
  int i;
171
13.6k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++10.8k
)
172
13.6k
    if (dim[i] == 0)
173
2.73k
      return i;
174
2.73k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
2.73k
}
ccv_nnc_dropout_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
169
6
{
170
6
  int i;
171
18
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++12
)
172
18
    if (dim[i] == 0)
173
6
      return i;
174
6
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
6
}
ccv_nnc_categorical_crossentropy.c:ccv_nnc_tensor_nd
Line
Count
Source
169
10
{
170
10
  int i;
171
24
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++14
)
172
24
    if (dim[i] == 0)
173
10
      return i;
174
10
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
10
}
Unexecuted instantiation: ccv_nnc_reduce.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_util.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_sgd.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_relu.c:ccv_nnc_tensor_nd
ccv_nnc_softmax_crossentropy.c:ccv_nnc_tensor_nd
Line
Count
Source
169
44
{
170
44
  int i;
171
112
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++68
)
172
112
    if (dim[i] == 0)
173
44
      return i;
174
44
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
44
}
Unexecuted instantiation: ccv_nnc_softmax.c:ccv_nnc_tensor_nd
_ccv_nnc_conv_cpu_4x4_3x3_winograd.c:ccv_nnc_tensor_nd
Line
Count
Source
169
214
{
170
214
  int i;
171
856
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++642
)
172
856
    if (dim[i] == 0)
173
214
      return i;
174
214
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
214
}
Unexecuted instantiation: _ccv_nnc_conv_cpu_fft.c:ccv_nnc_tensor_nd
Unexecuted instantiation: _ccv_nnc_conv_cpu_gemm.c:ccv_nnc_tensor_nd
_ccv_nnc_conv_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
169
2.27k
{
170
2.27k
  int i;
171
11.0k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++8.78k
)
172
11.0k
    if (dim[i] == 0)
173
2.27k
      return i;
174
2.27k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
2.27k
}
ccv_nnc_convolution.c:ccv_nnc_tensor_nd
Line
Count
Source
169
1.16k
{
170
1.16k
  int i;
171
5.68k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++4.52k
)
172
5.68k
    if (dim[i] == 0)
173
1.16k
      return i;
174
1.16k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
1.16k
}
_ccv_nnc_gemm_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
169
372
{
170
372
  int i;
171
1.02k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++651
)
172
1.02k
    if (dim[i] == 0)
173
372
      return i;
174
372
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
372
}
_ccv_nnc_gemm_cpu_sys.c:ccv_nnc_tensor_nd
Line
Count
Source
169
25.9k
{
170
25.9k
  int i;
171
77.8k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++51.8k
)
172
77.8k
    if (dim[i] == 0)
173
25.9k
      return i;
174
25.9k
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
25.9k
}
Unexecuted instantiation: ccv_nnc_blas.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_batch_norm.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_ew.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_comm.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_rand.c:ccv_nnc_tensor_nd
ccv_nnc_pool.c:ccv_nnc_tensor_nd
Line
Count
Source
169
255
{
170
255
  int i;
171
1.25k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++997
)
172
1.25k
    if (dim[i] == 0)
173
255
      return i;
174
255
  
return 0
CCV_NNC_MAX_DIM_ALLOC0
;
175
255
}
Unexecuted instantiation: ccv_nnc_dropout.c:ccv_nnc_tensor_nd
176
177
/**
178
 * Create a new tensor.
179
 * @param ptr If 0, nnc will allocate the tensor ourselves. Otherwise, will use the memory region referenced by 'ptr'.
180
 * @param params Tensor parameters.
181
 * @param flags Reserved flags for the allocation.
182
 */
183
CCV_WARN_UNUSED(ccv_nnc_tensor_t*) ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags);
184
/**
185
 * Create a new tensor on stack.
186
 * @param ptr If 0, nnc will allocate the tensor ourselves. Otherwise, will use the memory region referenced by 'ptr'.
187
 * @param params Tensor parameters.
188
 * @param flags Reserved flags for the allocation.
189
 */
190
CCV_WARN_UNUSED(ccv_nnc_tensor_t) ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags);
191
/**
192
 * Pin the tensor memory for faster access on GPU.
193
 * @param tensor A tensor that we want to pin the memory.
194
 * @return 0 for success.
195
 */
196
int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor);
197
/**
198
 * Free a tensor object.
199
 * @param tensor The tensor to be freed.
200
 */
201
void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor);
202
/**
203
 * Create a tensor view. A tensor view can be non-continuous. Essentially, it provides a view into a tensor.
204
 * @param tensor The tensor that we want to view into.
205
 * @param dim The new dimension of the tensor view.
206
 * @param ofs The offset on each of the dimension.
207
 * @param inc The line size of each dimension.
208
 */
209
CCV_WARN_UNUSED(ccv_nnc_tensor_view_t*) ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC]);
210
/**
211
 * Create a tensor view on stack.
212
 * @param tensor The tensor that we want to view into.
213
 * @param dim The new dimension of the tensor view.
214
 * @param ofs The offset on each of the dimension.
215
 * @param inc The line size of each dimension.
216
 */
217
CCV_WARN_UNUSED(ccv_nnc_tensor_view_t) ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC]);
218
/**
219
 * Free a tensor view object.
220
 * @param tensor_view The tensor view to be freed.
221
 */
222
void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view);
223
/**
224
 * Zero out a given tensor.
225
 * @param tensor The tensor to be zero out.
226
 */
227
void ccv_nnc_tensor_zero(void* const tensor);
228
/**
229
 * Compare whether two tensors are equal. This will tolerant some floating point issues follow http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
230
 * @param a Tensor a.
231
 * @param b Tensor b.
232
 * @return 0 if equal, -1 otherwise.
233
 */
234
CCV_WARN_UNUSED(int) ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b);
235
236
/** @} */
237
238
/**
239
 * @addtogroup level_1_cmd
240
 * @{
241
 */
242
243
/**
244
 * Return a high precision time unit. What this time unit is is platform specific.
245
 * @return A monotonic increasing 64-bit integer w.r.t. passing of time.
246
 */
247
uint64_t ccv_nnc_cmd_mono_time(void);
248
/**
249
 * Return UTF-8 encoded name of a given command.
250
 * @return A UTF-8 string (pointing to a static constant).
251
 */
252
CCV_WARN_UNUSED(const char*) ccv_nnc_cmd_name(const uint32_t cmd);
253
/**
254
 * Return UTF-8 encoded name of a given backend.
255
 * @return A UTF-8 string (pointing to a static constant).
256
 */
257
CCV_WARN_UNUSED(const char*) ccv_nnc_cmd_backend_name(const uint32_t backend);
258
/**
259
 * Check whether a given backend is available for a given command.
260
 * @return 1 if it is available.
261
 */
262
CCV_WARN_UNUSED(int) ccv_nnc_cmd_ok(const uint32_t cmd, const uint32_t backend);
263
/**
264
 * Create a wrapped command with parameters.
265
 * @param cmd The command identifier.
266
 * @param exec If this is a CCV_NNC_CUSTOM_FORWARD / CCV_NNC_CUSTOM_BACKWARD command, this supplies the custom function.
267
 * @param params The parameters for the command.
268
 * @param flags A reserved field for flags.
269
 * @return A wrapped ccv_nnc_cmd_t structure.
270
 */
271
CCV_WARN_UNUSED(ccv_nnc_cmd_t) ccv_nnc_cmd(const uint32_t cmd, ccv_nnc_cmd_exec_f exec, const ccv_nnc_cmd_param_t params, const int flags);
272
/**
273
 * Verify whether a hint is compatible with a given command and a given input tensor parameters / output tensor parameters.
274
 * @param hint The hint for a given command. Hint defines things such as paddings, strides etc. for a given command.
275
 * @param cmd The wrapped command.
276
 * @param a The input tensor parameters.
277
 * @param b The output tensor parameters.
278
 * @return 1 if it passes.
279
 */
280
CCV_WARN_UNUSED(int) ccv_nnc_hint_verify(const ccv_nnc_hint_t hint, const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t a, const ccv_nnc_tensor_param_t b);
281
/**
282
 * Automatically find the best hint for a given input / output (on forward pass only).
283
 * @param cmd The wrapped command.
284
 * @param a The input tensor parameters.
285
 * @param b The output tensor parameters.
286
 * @return Best hint we can guess.
287
 */
288
CCV_WARN_UNUSED(ccv_nnc_hint_t) ccv_nnc_hint_auto(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t a, const ccv_nnc_tensor_param_t b);
289
/**
290
 * Automatically find the outputs for the given inputs / hint.
291
 * @param cmd The wrapped command.
292
 * @param inputs An array of input tensor parameters.
293
 * @param input_size The size of input array.
294
 * @param hint The hint for the given command.
295
 * @param outputs An array for the output tensor parameters.
296
 * @param output_size The size of the output array.
297
 */
298
void ccv_nnc_hint_tensor_auto(const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size);
299
/**
300
 * Find a suitable backend for a given command and tensor settings.
301
 * @param cmd The wrapped command.
302
 * @param tensor_memory The tensor memory setup (whether it is CPU or GPU).
303
 * @param tensor_formats The tensor layout format (NCHW, NHWC, CHWN etc.)
304
 * @param tensor_datatypes The datatype of a given tensor (FP32 etc.)
305
 * @return The backend identifier for the selected backend.
306
 */
307
CCV_WARN_UNUSED(uint32_t) ccv_nnc_cmd_find_backend(const ccv_nnc_cmd_t cmd, const int tensor_memory, const int tensor_formats, const int tensor_datatypes);
308
/**
309
 * Run autotune to find the best kernel and configuration for the given input.
310
 * @param cmd The original wrapped command.
311
 * @param max_workspace_size The maximum memory allowed for this command to execute.
312
 * @param hint The hint for the given command.
313
 * @param flags The reserved field for flags.
314
 * @param inputs An array of input tensors.
315
 * @param input_size The size of input array.
316
 * @param outputs An array of output tensors.
317
 * @param output_size The size of output array.
318
 * @param stream_context The stream we can do the autotune on. 0 uses default stream.
319
 * @return The modified cmd that contains the updated configuration.
320
 */
321
CCV_WARN_UNUSED(ccv_nnc_cmd_t) ccv_nnc_cmd_autotune(const ccv_nnc_cmd_t cmd, const size_t max_workspace_size, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
322
/**
323
 * Check whether a given tensor input / output pattern can be computed by the given command.
324
 * bitmasks encode whether a given input tensor / output tensor available at a position.
325
 * @param cmd The wrapped command to check.
326
 * @param input_size The intended size of the input tensor array.
327
 * @param output_size The intended size of the output tensor array.
328
 * @param input_bitmasks The input tensor array encoding in bitmap, 0: no tensor, 1: has a tensor.
329
 * @param input_bitmask_size The size of the input bitmask array.
330
 * @param output_bitmasks The output tensor array encoding in bitmap.
331
 * @param output_bitmask_size The size of the output bitmask array.
332
 * @return 1 if the command can be executed with the given input / output pattern.
333
 */
334
CCV_WARN_UNUSED(int) ccv_nnc_cmd_bitmask(const ccv_nnc_cmd_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size);
335
/**
336
 * Execute a given command.
337
 * @param cmd The wrapped command to be executed.
338
 * @param hint The hint provided for the command.
339
 * @param flags A reserved field for flags.
340
 * @param inputs The input tensor array.
341
 * @param input_size The size of input tensor array.
342
 * @param outputs The output tensor array.
343
 * @param output_size The size of output tensor array.
344
 * @param stream_context The stream which the command will be executed upon.
345
 * @return CCV_NNC_EXEC_SUCCESS if succeed.
346
 */
347
int ccv_nnc_cmd_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
348
/**
349
 * Check whether the command is a forward pass or not.
350
 * @param cmd The wrapped command.
351
 * @return 1 if it is a forward pass.
352
 */
353
CCV_WARN_UNUSED(int) ccv_nnc_cmd_is_forward(const ccv_nnc_cmd_t cmd);
354
/**
355
 * Check whether the command is a backward pass or not.
356
 * @param cmd The wrapped command.
357
 * @return 1 if it is a backward pass.
358
 */
359
CCV_WARN_UNUSED(int) ccv_nnc_cmd_is_backward(const ccv_nnc_cmd_t cmd);
360
/**
361
 * Check this command against listed attributes.
362
 * @param cmd The wrapped command.
363
 * @param flags The flags to check against the command (unsupported).
364
 * @return 1 if the flag is supported by the command.
365
 */
366
CCV_WARN_UNUSED(int) ccv_nnc_cmd_attr(const ccv_nnc_cmd_t cmd, const int flags);
367
/**
368
 * Check whether this command allow inplace operation against a particular input and output (index from 0).
369
 * @param cmd The wrapped command.
370
 * @param input_idx The index of the input tensor we want to check.
371
 * @param input_size The total number of inputs.
372
 * @param output_idx the index of the output tensor we want to check.
373
 * @param output_size The total number of outputs.
374
 * @return 1 if the input tensor can be used as the output tensor.
375
 */
376
CCV_WARN_UNUSED(int) ccv_nnc_cmd_allow_inplace(const ccv_nnc_cmd_t cmd, const int input_idx, const int input_size, const int output_idx, const int output_size);
377
/**
378
 * Check whether this command need to enforce inplace operation against a particular input and output (index from 0).
379
 * @param cmd The wrapped command.
380
 * @param input_idx The index of the input tensor we want to check.
381
 * @param input_size The total number of inputs.
382
 * @param output_idx the index of the output tensor we want to check.
383
 * @param output_size The total number of outputs.
384
 * @return 1 if the input tensor is required to be used as the output tensor.
385
 */
386
CCV_WARN_UNUSED(int) ccv_nnc_cmd_enforce_inplace(const ccv_nnc_cmd_t cmd, const int input_idx, const int input_size, const int output_idx, const int output_size);
387
388
/** @} */
389
390
/**
391
 * @defgroup level_1_stream Streams
392
 * @{
393
 */
394
395
// Control flow constructs
396
// Follow heavily based along CUDA's stream / event idea.
397
enum {
398
  CCV_STREAM_CONTEXT_CPU = 0x1, /**< A CPU based stream context (unsupported). */
399
  CCV_STREAM_CONTEXT_GPU = 0x2, /**< A GPU based stream context. */
400
};
401
2.41M
#define CCV_STREAM_GET_CONTEXT(type) ((type) & 0x3)
402
1.15M
#define CCV_STREAM_GET_DEVICE(type) ((type) & 0xff00)
403
1.15M
#define CCV_STREAM_GET_DEVICE_ID(type) (CCV_STREAM_GET_DEVICE(type) >> 8)
404
52
#define CCV_STREAM_SET_DEVICE_ID(type, device_id) (type) = (((type) & ~0xfff00) | (((device_id) & 0xfff) << 8))
405
/**
406
 * Create a new stream context.
407
 * @param type A combination of CPU / GPU and DEVICE_ID.
408
 * @return The newly created stream context.
409
 */
410
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_nnc_stream_context_new(const int type);
411
/**
412
 * Get the type of the stream context.
413
 * @param stream_context The stream context we want to inspect.
414
 * @return The type of the stream context.
415
 */
416
CCV_WARN_UNUSED(int) ccv_nnc_stream_context_type(const ccv_nnc_stream_context_t* const stream_context);
417
/**
418
 * Get a stream context local workspace memory. This memory region will be reused
419
 * the next time when you call this method on the same stream context.
420
 * @param stream_context The stream context which provides the workspace memory.
421
 * @param workspace_size The size of the workspace memory.
422
 * @param mem The memory type of the said workspace memory (GPU or CPU).
423
 * @return A pointer to the workspace memory.
424
 */
425
CCV_WARN_UNUSED(void*) ccv_nnc_stream_context_get_workspace(ccv_nnc_stream_context_t* const stream_context, const size_t workspace_size, const int mem);
426
/**
427
 * Deallocate any workspace memory on the stream context.
428
 * @param stream The stream context to drain workspace memory.
429
 */
430
void ccv_nnc_stream_context_drain(ccv_nnc_stream_context_t* const stream);
431
/**
432
 * Wait until all tasks submitted (command, graph run etc.) on the stream context
433
 * completed.
434
 * @param stream The stream context to wait.
435
 */
436
void ccv_nnc_stream_context_wait(const ccv_nnc_stream_context_t* const stream);
437
/**
438
 * Deallocate the stream context.
439
 * @param stream_context The stream context to be destroyed.
440
 */
441
void ccv_nnc_stream_context_free(ccv_nnc_stream_context_t* const stream_context);
442
443
/**
444
 * Opaque pointer to the signal object.
445
 */
446
typedef struct ccv_nnc_stream_signal_s ccv_nnc_stream_signal_t;
447
448
/**
449
 * Create a new stream signal.
450
 * @param type A composed type denotes whether it associated with a GPU or CPU stream context, and on which device.
451
 * @return The newly created stream signal.
452
 */
453
CCV_WARN_UNUSED(ccv_nnc_stream_signal_t*) ccv_nnc_stream_signal_new(const int type);
454
/**
455
 * Emit a signal on a stream.
456
 * @param stream The stream context where the signal will be emitted.
457
 * @param signal The signal to be emitted. It has to be on the same device as the stream.
458
 */
459
void ccv_nnc_stream_context_emit_signal(ccv_nnc_stream_context_t* const stream, ccv_nnc_stream_signal_t* const signal);
460
/**
461
 * Wait a signal on a stream.
462
 * @param stream The stream context that will be blocked by the signal.
463
 * @param signal The signal to be waited. It can be on a different device of the stream.
464
 */
465
void ccv_nnc_stream_context_wait_signal(const ccv_nnc_stream_context_t* const stream, const ccv_nnc_stream_signal_t* const signal);
466
/**
467
 * Get on which stream context this signal is going to be emitted on.
468
 * @param signal The signal we want to inspect.
469
 * @return The most recent stream context you called ccv_nnc_stream_context_emit_signal with.
470
 */
471
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_nnc_stream_signal_get_emitter(const ccv_nnc_stream_signal_t* const signal);
472
/**
473
 * Get a signal handle for a stream context, you can then wait this signal for this stream.
474
 * This is handy if you want to have a stream context specific signal that can be identified
475
 * with an identifier, and don't want to maintain it yourself. The returned signal cannot
476
 * be freed and managed by the stream itself.
477
 * @param stream The stream where the signal will be bind to.
478
 * @param identifier The identifier for this signal.
479
 * @return A signal that is binded to this stream and ready to be used.
480
 */
481
CCV_WARN_UNUSED(ccv_nnc_stream_signal_t*) ccv_nnc_stream_context_get_signal(ccv_nnc_stream_context_t* const stream, const int64_t identifier);
482
/**
483
 * Deallocate the signal.
484
 * @param signal The signal to be destroyed.
485
 */
486
void ccv_nnc_stream_signal_free(ccv_nnc_stream_signal_t* const signal);
487
/**
488
 * Return number of devices.
489
 * @param type The type of devices (CCV_NNC_STREAM_CONTEXT_GPU / CCV_NNC_STREAM_CONTEXT_CPU)
490
 * @return The number of devices.
491
 */
492
CCV_WARN_UNUSED(int) ccv_nnc_device_count(const int type);
493
/**
494
 * Remap a source device as the destination device.
495
 * @param type The type of devices (CCV_NNC_STREAM_CONTEXT_GPU / CCV_NNC_STREAM_CONTEXT_CPU)
496
 * @param source The original device id.
497
 * @param destination The new device id.
498
 * @return 0 if the device remap is successful, -1 if it is not.
499
 */
500
CCV_WARN_UNUSED(int) ccv_nnc_device_remap(const int type, const int source, const int destination);
501
/**
502
 * The neighbor discovery function that will be called with the device id.
503
 */
504
typedef ccv_nnc_stream_context_t*(*ccv_nnc_stream_context_neighbor_discovery_f)(const int device_id, void* const context);
505
/**
506
 * Set the neighbor stream context discovery mechanism. This method exposes how
507
 * neighbor should be defined per stream context. This method is useful for
508
 * commands that operates cross devices and need to find the correct stream
509
 * context for these devices. Stream context itself is bounded to one device
510
 * only.
511
 * @param stream_context The stream context that bounds to a discovery mechanism.
512
 * @param discovery The neighbor discovery function to invoke.
513
 * @param context The associated context with the neighbor discovery function.
514
 */
515
void ccv_nnc_stream_context_set_neighbor_discovery(ccv_nnc_stream_context_t* const stream_context, ccv_nnc_stream_context_neighbor_discovery_f discovery, void* const context);
516
/**
517
 * Find a neighbor stream context on a given device id for current stream context.
518
 * @param stream_context The stream context which we will look for neighbors.
519
 * @param device_id On which device the stream context may exist.
520
 * @return 0 if no stream context found. Otherwise, return the stream context on that device.
521
 */
522
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_nnc_stream_context_find_neighbor(ccv_nnc_stream_context_t* const stream_context, const int device_id);
523
524
/** @} */
525
526
/** @} */
527
528
/**
529
 * @defgroup level_2 Level-2 API
530
 * @{
531
 */
532
533
/**
534
 * @defgroup level_2_essentials Essentials
535
 * @{
536
 */
537
538
enum {
539
  CCV_NNC_SHORT_DOT_GRAPH = 0x0, /**< Display a simplified graph. */
540
  CCV_NNC_LONG_DOT_GRAPH  = 0x1, /**< Display a graph that contains all information. */
541
};
542
543
/**
544
 * Opaque pointer holds the concrete graph representation.
545
 */
546
typedef struct ccv_nnc_graph_s ccv_nnc_graph_t;
547
548
/**
549
 * The opaque on stack object hold a reference to an execution node within a graph.
550
 */
551
typedef struct {
552
  int32_t d; // This is int because sometimes I piggy-back on negatives to carry out some internal computations.
553
  ccv_nnc_graph_t* graph;
554
} ccv_nnc_graph_exec_t;
555
556
720k
#define CCV_NO_GRAPH_EXEC(exec) ((exec).graph == 0)
557
558
/**
559
 * Create an empty graph.
560
 * Note that all graph mutation methods are not thread-safe.
561
 * You should only operate the graph in serial fashion.
562
 * @return An opaque ccv_nnc_graph_t pointer.
563
 */
564
CCV_WARN_UNUSED(ccv_nnc_graph_t*) ccv_nnc_graph_new(void);
565
/**
566
 * Create a node with specific command execution, as well as its inputs & outputs.
567
 * Underlying, the graph maintains the backing object for the node, and all you get is
568
 * a on-stack object to index the backing object from the graph.
569
 * @param graph The concrete graph.
570
 * @param cmd The wrapped command.
571
 * @param hint The hint for this command.
572
 * @param inputs The input tensors array.
573
 * @param input_size The size of input tensors array.
574
 * @param outputs The output tensors array.
575
 * @param output_size The size of output tensors array.
576
 * @return An on-stack object that references a execution node.
577
 */
578
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_new(ccv_nnc_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
579
/**
580
 * Set the command for an existing execution node.
581
 * @param graph The concrete graph.
582
 * @param exec The execution node reference.
583
 * @param cmd The new wrapped command.
584
 */
585
void ccv_nnc_graph_exec_set(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_cmd_t cmd);
586
/**
587
 * Set hint for an existing execution node.
588
 * @param graph The concrete graph.
589
 * @param exec The execution node reference.
590
 * @param hint The new hint.
591
 */
592
void ccv_nnc_graph_exec_set_hint(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_hint_t hint);
593
/**
594
 * Set input / output tensors for an existing execution node.
595
 * @param graph The concrete graph.
596
 * @param exec The execution node reference.
597
 * @param inputs The input tensors array.
598
 * @param input_size The size of input tensors array.
599
 * @param outputs The output tensors array.
600
 * @param output_size The size of output tensors array.
601
 */
602
void ccv_nnc_graph_exec_set_io(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
603
/**
604
 * Concatenate input graph nodes with an output graph node to create a new graph.
605
 * @param graph The concrete graph.
606
 * @param source The execution node reference to connect.
607
 * @param destination The execution node reference connect to.
608
 * @return Non-zero if cannot concat successfully.
609
 */
610
int ccv_nnc_graph_exec_concat(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination);
611
/**
612
 * Disconnect input graph nodes with an output graph nodes in this graph.
613
 * @param graph The concrete graph.
614
 * @param source The execution node reference to disconnect.
615
 * @param destination The execution node reference disconnect to.
616
 * @return Non-zero if cannot disjoin successfully.
617
 */
618
int ccv_nnc_graph_exec_disjoin(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination);
619
/**
620
 * Count number of exec in the graph.
621
 * @param graph The concrete graph.
622
 * @return The number of execution nodes in the graph.
623
 */
624
int ccv_nnc_graph_exec_count(const ccv_nnc_graph_t* const graph);
625
/**
626
 * Generate output that can be parsed by GraphViz (DOT language).
627
 * @param graph The concrete graph.
628
 * @param flags Either CCV_NNC_SHORT_DOT_GRAPH or CCV_NNC_LONG_DOT_GRAPH
629
 * @param out The output file stream.
630
 */
631
void ccv_nnc_graph_dot(const ccv_nnc_graph_t* const graph, const int flags, FILE* out);
632
/**
633
 * Run the autotune function on all execution node, and assign back with the optimized commands.
634
 * @param graph The concrete graph.
635
 * @param max_workspace_size The maximum allowed extra memory usage.
636
 * @param flags A reserved field for flags.
637
 * @param sources The source execution nodes to begin. 0 uses default sources.
638
 * @param source_size The size of source execution nodes.
639
 * @param destinations The destination execution nodes which we end. 0 uses default destinations.
640
 * @param destination_size The size of destination execution nodes.
641
 */
642
void ccv_nnc_graph_autotune(ccv_nnc_graph_t* const graph, const size_t max_workspace_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size);
643
/**
644
 * Make the graph topsorted, thus, do a topological sort so when run the graph, no additional memory will be allocated.
645
 * Otherwise when we run the graph, we need to allocate some memory on heap to faciliate.
646
 * @param graph The concrete graph.
647
 * @param exec_cvt The execution node assignments will change, and you can give an array to know the changes.
648
 * @param exec_cvt_size The provided conversion array size.
649
 */
650
void ccv_nnc_graph_topsort(ccv_nnc_graph_t* const graph, int* const exec_cvt, const int exec_cvt_size);
651
/**
652
 * Allocate extra streams to make this graph parallel runnable. Note this requires the graph to be topsorted.
653
 * After this is done, you can schedule a graph either on its default stream, or a new stream.
654
 * @param graph The concrete graph.
655
 * @param stream_type The type of stream context we are going to use.
656
 */
657
void ccv_nnc_graph_static_schedule(ccv_nnc_graph_t* const graph, const int stream_type);
658
/**
659
 * Query the default stream for a given graph.
660
 * @param graph The concrete graph.
661
 * @return The default stream context.
662
 */
663
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_nnc_graph_default_stream(const ccv_nnc_graph_t* const graph);
664
/**
665
 * Set default sources for a give graph.
666
 * @param graph The concrete graph.
667
 * @param sources The source execution nodes to begin.
668
 * @param source_size The size of source execution nodes.
669
 */
670
void ccv_nnc_graph_set_sources(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const sources, const int source_size);
671
/**
672
 * Get the default source execution nodes pointer.
673
 * @param graph The concrete graph.
674
 * @return A pointer to an array of default source execution nodes.
675
 */
676
ccv_nnc_graph_exec_t* ccv_nnc_graph_sources(const ccv_nnc_graph_t* const graph);
677
/**
678
 * Get the number of default source execution nodes.
679
 * @param graph The concrete graph.
680
 * @return The number of default source execution nodes.
681
 */
682
int ccv_nnc_graph_source_size(const ccv_nnc_graph_t* const graph);
683
/**
684
 * Set default destinations for a give graph.
685
 * @param graph The concrete graph.
686
 * @param destinations The destination execution nodes which we end.
687
 * @param destination_size The size of destination execution nodes.
688
 */
689
void ccv_nnc_graph_set_destinations(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const destinations, const int destination_size);
690
/**
691
 * Get the default destination execution nodes pointer.
692
 * @param graph The concrete graph.
693
 * @return A pointer to an array of default destination execution nodes.
694
 */
695
ccv_nnc_graph_exec_t* ccv_nnc_graph_destinations(const ccv_nnc_graph_t* const graph);
696
/**
697
 * Get the number of default destination execution nodes.
698
 * @param graph The concrete graph.
699
 * @return The number of default destination execution nodes.
700
 */
701
int ccv_nnc_graph_destination_size(const ccv_nnc_graph_t* const graph);
702
/**
703
 * This graph, and its relevant auxiliary objects (opaque to user) are deallocated.
704
 * @param graph The concrete graph.
705
 */
706
void ccv_nnc_graph_free(ccv_nnc_graph_t* const graph);
707
/**
708
 * Opaque pointer to the tape of tensors. The tape are used by the while loop.
709
 */
710
typedef struct ccv_nnc_tensor_tape_s ccv_nnc_tensor_tape_t;
711
/**
712
 * Execute a computation graph with all bells and whistles. Need to supply a tensor tape if it contains backward pass
713
 * for while loop or branches. With tensor tape, the tensors are versioned, so you can "backpropagate through time".
714
 * @param graph The concrete graph.
715
 * @param tensor_tape An opaque tensor tape object to "backpropagate through time".
716
 * @param stream_context Which stream this graph will be executed upon.
717
 * @param flags A reserved field for flags.
718
 * @param sources The source execution nodes array.
719
 * @param source_size The size of source execution nodes array. 0 uses default sources.
720
 * @param destinations The destination execution nodes array.
721
 * @param destination_size The size of destination execution nodes array. 0 uses default destinations.
722
 * @return CCV_NNC_EXEC_SUCCESS if succeed.
723
 */
724
int ccv_nnc_graph_run(ccv_nnc_graph_t* const graph, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size);
725
726
/** @} */
727
728
/**
729
 * @defgroup level_2_others Others
730
 * @{
731
 */
732
733
/**
734
 * Set input / output flags for an existing execution node.
735
 * This must be called after set_io, set additional flags for tensors related to this exec.
736
 * @param graph The concrete graph.
737
 * @param exec The execution node reference.
738
 * @param input_flags The input flags array.
739
 * @param input_flag_size the size of input flags array, should be the same as input tensors array (or 0).
740
 * @param output_flags The output flags array.
741
 * @param output_flag_size the size of output flags array, should be the same as output tensors array (or 0).
742
 */
743
void ccv_nnc_graph_exec_set_io_flags(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const int* const input_flags, const int input_flag_size, const int* const output_flags, const int output_flag_size);
744
/**
745
 * Set the peer reference for exec. In backward pass, an execution node's peer node is the forward pass node.
746
 * @param graph The concrete graph.
747
 * @param exec The execution node reference.
748
 * @param peer_exec The peer execution node reference.
749
 */
750
void ccv_nnc_graph_exec_set_peer(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_graph_exec_t peer_exec);
751
/**
752
 * Add tensor pair that can be used to "carry over". (carry over: passing a tensor from current loop to the next loop).
753
 * @param graph The concrete graph.
754
 * @param from The tensor we have output in this loop.
755
 * @param to The tensor we will use as input in the next loop.
756
 */
757
void ccv_nnc_graph_add_carry_over(ccv_nnc_graph_t* const graph, const ccv_nnc_tensor_t* const from, const ccv_nnc_tensor_t* const to);
758
/**
759
 * Updates are the tensors that not directly involved in the computation, but its pointers need to get updated
760
 * along with this exec, thus need to be "update" to other exec nodes.
761
 * @param graph The concrete graph.
762
 * @param exec The execution node reference.
763
 * @param update The tensor need to be updated along the execution node.
764
 */
765
void ccv_nnc_graph_exec_add_update(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const update);
766
767
/** @} */
768
769
/** @} */
770
771
/**
772
 * @defgroup level_3 Level-3 API
773
 * @{
774
 */
775
776
/**
777
 * @defgroup level_3_essentials Essentials
778
 * @{
779
 */
780
781
/**
782
 * Opaque pointer to the symbolic graph object.
783
 */
784
typedef struct ccv_nnc_symbolic_graph_s ccv_nnc_symbolic_graph_t;
785
786
/**
787
 * Opaque pointer to an arena of allocated tensors.
788
 */
789
typedef struct ccv_nnc_tensor_arena_s ccv_nnc_tensor_arena_t;
790
791
/**
792
 * Opaque pointer to an arena of allocated execs.
793
 */
794
typedef struct ccv_nnc_graph_exec_arena_s ccv_nnc_graph_exec_arena_t;
795
796
/**
797
 * On stack object references a tensor symbol in the symbolic graph.
798
 */
799
typedef struct {
800
  int32_t d;
801
  const ccv_nnc_symbolic_graph_t* graph;
802
} ccv_nnc_tensor_symbol_t;
803
804
/**
805
 * On stack object references a execution node symbol in the symbolic graph.
806
 */
807
typedef struct {
808
  int32_t d;
809
  const ccv_nnc_symbolic_graph_t* graph;
810
} ccv_nnc_graph_exec_symbol_t;
811
812
enum {
813
  CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS = 0x01, /**< Initialize underlying tensor for the symbol with zeros */
814
  CCV_NNC_TENSOR_SYMBOL_INIT_ONES = 0x02, /**< Initialize underlying tensor for the symbol with ones */
815
  CCV_NNC_TENSOR_SYMBOL_TAPE_VAR = 0x04, /**< Mark this as a tape variable (it cannot be folded, will contain flag CCV_TAPE_ALLOC) */
816
  // The one below is special.
817
  CCV_NNC_TENSOR_SYMBOL_DEAD = 0x80000000, /**< Mark this tensor symbol as dead, any future usage will cause assertion */
818
};
819
820
36.2k
#define CCV_NNC_TENSOR_SYMBOL_IS_DEAD(x) ((x) & CCV_NNC_TENSOR_SYMBOL_DEAD)
821
822
enum {
823
  CCV_NNC_GRAPH_EXEC_DEAD = 0x1, /**< Mark this node as dead. */
824
  CCV_NNC_GRAPH_EXEC_P_WHILE = 0x10, /**< Mark this node keyword is while */
825
  CCV_NNC_GRAPH_EXEC_CASE_OF = 0x20, /**< Mark this node keyword is case_of */
826
};
827
828
65.0k
#define CCV_NNC_GRAPH_EXEC_IS_DEAD(x) ((x) & CCV_NNC_GRAPH_EXEC_DEAD)
829
4.96k
#define CCV_NNC_GRAPH_REF(x) ((x)->_heap_graph_ref ? 
(x)->_heap_graph_ref178
:
(x)->_inline_graph_ref4.78k
)
830
831
enum {
832
  CCV_NNC_NO_TENSOR_SYMBOL = -1, /**< Special symbol reference for no tensor symbol. */
833
  CCV_NNC_WHILE_COUNT_TENSOR_SYMBOL = -2, /**< Special symbol reference for while loop count tensor. */
834
};
835
836
enum {
837
  CCV_NNC_NO_GRAPH_EXEC_SYMBOL = -1, /**< Special symbol reference for no exec symbol. */
838
};
839
840
22
#define CCV_NNC_IS_WHILE_COUNT_TENSOR_SYMBOL(d) (((uint32_t)(d) & 0xf) == 0xe)
841
842
/**
843
 * A data structure to pass in a pair of tensor symbols.
844
 */
845
typedef struct {
846
  ccv_nnc_tensor_symbol_t source; /**< The 'from' tensor symbol. */
847
  ccv_nnc_tensor_symbol_t destination; /**< The 'to' tensor symbol. */
848
} ccv_nnc_tensor_symbol_map_t;
849
850
/**
851
 * Create a new empty symbolic graph. It is an opaque data structure that maintains the whole graph of computation in its symbolic form.
852
 * Note that all graph mutation methods are not thread-safe. You should only operate the graph in serial fashion.
853
 */
854
CCV_WARN_UNUSED(ccv_nnc_symbolic_graph_t*) ccv_nnc_symbolic_graph_new(void);
855
/**
856
 * Create an tensor symbol (thus, with no actual memory space allocation) in a symbolic graph.
857
 * @param graph The symbolic graph.
858
 * @param info The tensor parameters.
859
 * @param name The name of the tensor symbol, it is optional.
860
 * @return A tensor symbol reference.
861
 */
862
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_param_t info, const char* const name);
863
/**
864
 * Create an alias to the tensor symbol as tensor view (thus, pointing to the same memory region, but with a different header info and offset).
865
 * @param graph The symbolic graph.
866
 * @param tensor_symbol The tensor symbol we are going to reference to.
867
 * @param ofs The offset on each of the dimension.
868
 * @param inc The line size of each dimension.
869
 * @param info The tensor parameters for the new alias.
870
 * @param name The name of the tensor symbol alias, it is optional.
871
 * @return A tensor symbol alias reference.
872
 */
873
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_alias_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info, const char* const name);
874
/**
875
 * Manually delete a tensor symbol off the symbolic graph.
876
 * @param graph The symbolic graph.
877
 * @param tensor The tensor symbol reference.
878
 */
879
void ccv_nnc_tensor_symbol_free(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_tensor_symbol_t tensor);
880
/**
881
 * Create a graph execution node (an operation that takes a set of inputs and generates a set of outputs).
882
 * @param graph The symbolic graph.
883
 * @param cmd The wrapped command.
884
 * @param inputs The input tensor symbols array.
885
 * @param input_size The size of input tensor symbols array.
886
 * @param outputs The output tensor symbols array.
887
 * @param output_size The size of output tensor symbols array.
888
 * @param name The name of this execution node, optional.
889
 * @return The execution node symbol reference.
890
 */
891
ccv_nnc_graph_exec_symbol_t ccv_nnc_graph_exec_symbol_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name);
892
/**
893
 * ccv_nnc_graph_exec_symbol_new defaults to use `ccv_nnc_hint_auto` find the best hints for a set of inputs / outputs.
894
 * However, you can also set your own hints.
895
 * @param graph The symbolic graph.
896
 * @param exec The execution node symbol reference.
897
 * @param hint The hint for the command.
898
 */
899
void ccv_nnc_graph_exec_symbol_set_hint(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const ccv_nnc_hint_t hint);
900
/**
901
 * Manually delete a exec symbol off the symbolic graph.
902
 * @param graph The symbolic graph.
903
 * @param symbol The execution node symbol reference.
904
 */
905
void ccv_nnc_graph_exec_symbol_free(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_graph_exec_symbol_t symbol);
906
enum {
907
  CCV_NNC_AUTOGEN_ALL_EXECS = 0x1, /**< Automatic concatenation for all execution nodes */
908
  CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS = 0x2, /**< Automatically find all source and destination nodes. */
909
};
910
/**
911
 * Automatic concatenate these nodes together based on its inputs / outputs.
912
 * Imagining this is to generate the execution flow based on input tensors and output tensors.
913
 * nil for execs and 0 for exec_size means to loop over all the execs on the graph and autogen.
914
 * @param graph The symbolic graph.
915
 * @param execs The execution nodes array.
916
 * @param exec_size The size of execution nodes array.
917
 * @param flags The flags determines what operations to perform when concatenating.
918
 * @return non-zero if cannot figure out.
919
 */
920
int ccv_nnc_graph_exec_symbol_autogen(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const execs, const int exec_size, const int flags);
921
/**
922
 * Set the default sources for a symbolic graph.
923
 * @param graph The symbolic graph.
924
 * @param sources The source execution nodes array.
925
 * @param source_size The size of source execution nodes array.
926
 */
927
void ccv_nnc_symbolic_graph_set_sources(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size);
928
/**
929
 * Get the pointer to the default sources.
930
 * @param graph The symbolic graph.
931
 * @return The pointer to the source execution nodes array.
932
 */
933
ccv_nnc_graph_exec_symbol_t* ccv_nnc_symbolic_graph_sources(const ccv_nnc_symbolic_graph_t* const graph);
934
/**
935
 * Get the size of the default source nodes array.
936
 * @param graph The symbolic graph.
937
 * @return The size of the default source nodes array.
938
 */
939
int ccv_nnc_symbolic_graph_source_size(const ccv_nnc_symbolic_graph_t* const graph);
940
/**
941
 * Set the default destinations for a symbolic graph.
942
 * @param graph The symbolic graph.
943
 * @param destinations The destination execution nodes array.
944
 * @param destination_size The size of destination execution nodes array.
945
 */
946
void ccv_nnc_symbolic_graph_set_destinations(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
947
/**
948
 * Get the pointer to the default destinations.
949
 * @param graph The symbolic graph.
950
 * @return The pointer to the destinationsexecution nodes array.
951
 */
952
ccv_nnc_graph_exec_symbol_t* ccv_nnc_symbolic_graph_destinations(const ccv_nnc_symbolic_graph_t* const graph);
953
/**
954
 * Get the size of the default destination nodes array.
955
 * @param graph The symbolic graph.
956
 * @return The size of the default destination nodes array.
957
 */
958
int ccv_nnc_symbolic_graph_destination_size(const ccv_nnc_symbolic_graph_t* const graph);
959
/**
960
 * Generate output that can be parsed by GraphViz (DOT language).
961
 * @param graph The symbolic graph.
962
 * @param flags Either CCV_NNC_SHORT_DOT_GRAPH or CCV_NNC_LONG_DOT_GRAPH
963
 * @param out The output file stream.
964
 */
965
void ccv_nnc_symbolic_graph_dot(const ccv_nnc_symbolic_graph_t* const graph, const int flags, FILE* out);
966
967
/**
968
 * The data structure to wrap a tensor symbol and a concrete tensor together.
969
 */
970
typedef struct {
971
  ccv_nnc_tensor_symbol_t symbol;
972
  const ccv_nnc_tensor_t* tensor;
973
} ccv_nnc_tensor_bind_t;
974
975
/**
976
 * Compile a symbolic graph into a graph that can be executed, and a set of tensors (opaque data structure tensor arena) are allocated based on which tensor symbols are the input and which are the outputs. The tensor allocation is done to minimize the required storage.
977
 * tensor_binds provide custom binding for these tensors. You still responsible to manage the life-time of these tensors.
978
 * outputs marks the tensor symbols that need to be kept til the end of the graph.
979
 * @param graph The symbolic graph.
980
 * @param tensor_binds The binding array (a tensor symbol and a concrete tensor). We replace everywhere that uses the tensor symbol with the concrete tensor.
981
 * @param tensor_bind_size The size of the binding array.
982
 * @param outputs The output tensor symbols that we want to keep the value.
983
 * @param output_size The size of the output tensor symbols array.
984
 * @param sources The sources for the graph.
985
 * @param source_size The size of the sources array. 0 to use default sources.
986
 * @param destinations The destinations for the graph.
987
 * @param destination_size The size of the destinations array. 0 to use default destinations.
988
 * @param graph_ref The pointer to store the generated concrete graph.
989
 * @param tensor_arena_ref The pointer to store ccv_nnc_tensor_arena_t.
990
 * @param graph_exec_arena_ref The pointer to store ccv_nnc_graph_exec_arena_t.
991
 */
992
void ccv_nnc_symbolic_graph_compile(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_bind_t* const tensor_binds, const int tensor_bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, ccv_nnc_graph_t** const graph_ref, ccv_nnc_tensor_arena_t** const tensor_arena_ref, ccv_nnc_graph_exec_arena_t** const graph_exec_arena_ref);
993
/**
994
 * Free the symbolic graph and its associated memory. Note that if you compiled a graph / tensor arena out of this symbolic graph, these won't be free'd.
995
 * @param graph The symbolic graph.
996
 */
997
void ccv_nnc_symbolic_graph_free(ccv_nnc_symbolic_graph_t* const graph);
998
/**
999
 * Find corresponding tensor by a symbol from the tensor arena.
1000
 * @param tensor_arena The tensor arena object generated through compilation,
1001
 * @param symbol The tensor symbol reference. Because tensor symbol reference is on stack. It can still be used even the original symbolic graph is free'd.
1002
 * @return A concrete tensor from the tensor arena.
1003
 */
1004
CCV_WARN_UNUSED(ccv_nnc_tensor_t*) ccv_nnc_tensor_from_symbol(const ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_tensor_symbol_t symbol);
1005
/**
1006
 * Bind a tensor to a symbol. You still responsible to manage the life-time of the tensor to make sure it is not freed until everything is done.
1007
 * @param tensor_arena The tensor arena object generated through compilation.
1008
 * @param symbol The tensor symbol reference. Because tensor symbol reference is on stack. It can still be used even the original symbolic graph is free'd.
1009
 * @param tensor The new tensor to bind to.
1010
 */
1011
void ccv_nnc_tensor_bind_symbol(const ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_t* const tensor);
1012
/**
1013
 * Free the opaque tensor arena structure.
1014
 * @param tensor_arena The tensor arena object generated through compilation.
1015
 */
1016
void ccv_nnc_tensor_arena_free(ccv_nnc_tensor_arena_t* const tensor_arena);
1017
/**
1018
 * Find corresponding graph exec by a exec symbol from graph exec arena.
1019
 * @param graph_exec_arena The graph execution node arena object generated through compilation,
1020
 * @param symbol The execution node symbol reference. Because execution node symbol reference is on stack. It can still be used even the original symbolic graph is free'd.
1021
 * @return A execution node reference to the concrete graph.
1022
 */
1023
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_from_symbol(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const ccv_nnc_graph_exec_symbol_t symbol);
1024
/**
1025
 * Return the node that can drive all the source nodes from the compilation.
1026
 * @param graph_exec_arena The graph execution node arena object generated through compilation,
1027
 * @return A execution node reference that is the source.
1028
 */
1029
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_source(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena);
1030
/**
1031
 * Return the node that can drain all the destination nodes from the compilation.
1032
 * @param graph_exec_arena The graph execution node arena object generated through compilation,
1033
 * @return A execution node reference that is the destination.
1034
 */
1035
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_destination(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena);
1036
/**
1037
 * Free the opaque graph exec arena structure.
1038
 * @param graph_exec_arena The graph execution node arena object generated through compilation,
1039
 */
1040
void ccv_nnc_graph_exec_arena_free(ccv_nnc_graph_exec_arena_t* const graph_exec_arena);
1041
/**
1042
 * Write symbolic graph to disk, along with some binding tensors.
1043
 * @param graph The symbolic graph.
1044
 * @param tensor_binds The binding array (pair of tensor symbol and concrete tensor).
1045
 * @param tensor_bind_size The size of the binding array.
1046
 * @param fn The file name.
1047
 */
1048
void ccv_nnc_symbolic_graph_write(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_bind_t* const tensor_binds, const int tensor_bind_size, const char* const fn);
1049
/**
1050
 * Read symbolic graph from disk, with some binding tensors.
1051
 * @param fn The file name.
1052
 * @param graph_ref The pointer to store symbolic graph.
1053
 * @param tensor_binds_ref The pointer to store the binding array.
1054
 * @param tensor_bind_size_ref The pointer to store the size of the binding array.
1055
 */
1056
void ccv_nnc_symbolic_graph_read(const char* const fn, ccv_nnc_symbolic_graph_t** const graph_ref, ccv_nnc_tensor_bind_t** const tensor_binds_ref, int* const tensor_bind_size_ref);
1057
1058
/** @} */
1059
1060
/**
1061
 * @defgroup level_3_others Others
1062
 * @{
1063
 */
1064
1065
/**
1066
 * Return the symbol it alias to.
1067
 * @param graph The symbolic graph.
1068
 * @param tensor_symbol The tensor symbol alias.
1069
 * @return A tensor symbol reference to the original tensor symbol. If this symbol has no reference, return NO_SYMBOL (.graph = 0)
1070
 */
1071
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_alias_to(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol);
1072
/**
1073
 * Set the tensor symbol parameters.
1074
 * @param graph The symbolic graph.
1075
 * @param tensor The tensor symbol reference.
1076
 * @param info The new tensor parameters.
1077
 * @return non-zero if encountered errors.
1078
 */
1079
int ccv_nnc_tensor_symbol_set(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, const ccv_nnc_tensor_param_t info);
1080
/**
1081
 * Get the parameters for a tensor symbol.
1082
 * @param graph The symbolic graph.
1083
 * @param tensor The tensor symbol reference.
1084
 * @return The tensor parameters.
1085
 */
1086
CCV_WARN_UNUSED(ccv_nnc_tensor_param_t) ccv_nnc_tensor_symbol_params(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor);
1087
/**
1088
 * Set the flags for this tensor symbol. The flags are only used for symbol, not for tensor.
1089
 * @param graph The symbolic graph.
1090
 * @param tensor The tensor symbol reference.
1091
 * @param flags A reserved field for flags.
1092
 */
1093
int ccv_nnc_tensor_symbol_set_flags(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, const int flags);
1094
/**
1095
 * Get all the flags for a tensor.
1096
 * @param graph The symbolic graph.
1097
 * @param tensor The tensor symbol reference.
1098
 */
1099
CCV_WARN_UNUSED(int) ccv_nnc_tensor_symbol_flags(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor);
1100
/**
1101
 * Set the cmd of this exec symbol.
1102
 * @param graph The symbolic graph.
1103
 * @param exec The execution node symbol reference.
1104
 * @param cmd The new wrapped command.
1105
 */
1106
void ccv_nnc_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const ccv_nnc_cmd_t cmd);
1107
/**
1108
 * Return the command on this exec symbol.
1109
 * @param graph The symbolic graph.
1110
 * @param exec The execution node symbol reference.
1111
 * @return The wrapped command.
1112
 */
1113
CCV_WARN_UNUSED(ccv_nnc_cmd_t) ccv_nnc_graph_exec_symbol_cmd(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec);
1114
/**
1115
 * Set the inputs / outputs for a exec symbol.
1116
 * @param graph The symbolic graph.
1117
 * @param exec The execution node symbol reference.
1118
 * @param inputs The input tensor symbols array.
1119
 * @param input_size The size of input tensor symbols array.
1120
 * @param outputs The output tensor symbols array.
1121
 * @param output_size The size of output tensor symbols array.
1122
 */
1123
void ccv_nnc_graph_exec_symbol_set_io(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size);
1124
/**
1125
 * Manually concatenate input node with an output graph node.
1126
 * @param graph The symbolic graph.
1127
 * @param source The source execution node symbol to connect.
1128
 * @param destination The destination execution node symbol connect to.
1129
 * @return non-zero if cannot concat successfully.
1130
 */
1131
int ccv_nnc_graph_exec_symbol_concat(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t source, const ccv_nnc_graph_exec_symbol_t destination);
1132
/**
1133
 * Manually disconnect input node with an output graph node for this graph.
1134
 * @param graph The symbolic graph.
1135
 * @param source The source execution node symbol to disconnect.
1136
 * @param destination The destination execution node symbol disconnect to.
1137
 * @return non-zero if cannot disjoin successfully.
1138
 */
1139
int ccv_nnc_graph_exec_symbol_disjoin(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t source, const ccv_nnc_graph_exec_symbol_t destination);
1140
/**
1141
 * Number of exec symbols.
1142
 * @param graph The symbolic graph.
1143
 */
1144
CCV_WARN_UNUSED(int) ccv_nnc_graph_exec_symbol_count(const ccv_nnc_symbolic_graph_t* const graph);
1145
/**
1146
 * Substitution function. Given an execution node symbol and a command, return a new command.
1147
 */
1148
typedef ccv_nnc_cmd_t(*ccv_nnc_symbolic_graph_subst_f)(const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd);
1149
/**
1150
 * Generate a duplicate of the provided graph.
1151
 * While generating the duplicate, it calls the function pointer to re-process the node type.
1152
 * @param graph The symbolic graph.
1153
 * @param subst The substitution function.
1154
 * @return The duplicated symbolic graph.
1155
 */
1156
CCV_WARN_UNUSED(ccv_nnc_symbolic_graph_t*) ccv_nnc_symbolic_graph_dup(const ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_subst_f subst);
1157
/**
1158
 * Number of tensor symbols.
1159
 * @param graph The symbolic graph.
1160
 */
1161
CCV_WARN_UNUSED(int) ccv_nnc_tensor_symbol_count(const ccv_nnc_symbolic_graph_t* const graph);
1162
/**
1163
 * The opaque structure to iterate over graph.
1164
 */
1165
typedef struct ccv_nnc_symbolic_graph_iter_s ccv_nnc_symbolic_graph_iter_t;
1166
/**
1167
 * Return iterator for graph exec symbols from a graph.
1168
 * @param graph The symbolic graph.
1169
 * @param sources The sources for the graph.
1170
 * @param source_size The size of the sources array. 0 to use default sources.
1171
 * @param destinations The destinations for the graph.
1172
 * @param destination_size The size of the destinations array. 0 to use default destinations.
1173
 * @return The iterator for the symbolic graph.
1174
 */
1175
CCV_WARN_UNUSED(ccv_nnc_symbolic_graph_iter_t*) ccv_nnc_symbolic_graph_iter_new(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
1176
/**
1177
 * Iterate to the next item.
1178
 * @param iter The iterator for the symbolic graph.
1179
 * @return 1 if successful, 0 if reached the end.
1180
 */
1181
CCV_WARN_UNUSED(int) ccv_nnc_symbolic_graph_iter_next(ccv_nnc_symbolic_graph_iter_t* const iter);
1182
/**
1183
 * Get the command and the name associated with the exec symbol.
1184
 * @param iter The iterator for the symbolic graph.
1185
 * @param cmd The pointer we are going to write command to.
1186
 * @param hint The pointer we are going to write hint to.
1187
 * @param flags The pointer we are going to write flags to.
1188
 * @param name The pointer we are going to write name to.
1189
 */
1190
void ccv_nnc_graph_exec_symbol_from_iter(ccv_nnc_symbolic_graph_iter_t* const iter, ccv_nnc_cmd_t* const cmd, ccv_nnc_hint_t* const hint, int* const flags, char** const name);
1191
/**
1192
 * Get the inputs and outputs associated with the exec symbol.
1193
 * @param iter The iterator for the symbolic graph.
1194
 * @param inputs The pointer we are going to write input tensor symbols to.
1195
 * @param input_size The pointer we are going to write input tensor symbol size to.
1196
 * @param outputs The pointer we are going to write output tensor symbols to.
1197
 * @param output_size The pointer we are going to write output tensor symbol size to.
1198
 */
1199
void ccv_nnc_tensor_symbol_io_from_iter(ccv_nnc_symbolic_graph_iter_t* const iter, ccv_nnc_tensor_symbol_t** const inputs, int* const input_size,  ccv_nnc_tensor_symbol_t** const outputs, int* const output_size);
1200
/**
1201
 * Free the iterator structure.
1202
 * @param iter The iterator for the symbolic graph.
1203
 */
1204
void ccv_nnc_symbolic_graph_iter_free(ccv_nnc_symbolic_graph_iter_t* const iter);
1205
/**
1206
 * For a given tensor symbol, this method resolves to its local reference inside the given graph.
1207
 * This is related to the sub-graph of symbolic graphs. A tensor symbol in the sub-graph can still have a
1208
 * representation in the parent graph. This method used to find the local reference in any graph.
1209
 * @param graph The symbolic graph.
1210
 * @param tensor_symbol The tensor symbol we want to resolve.
1211
 * @return A tensor symbol reference in the given graph.
1212
 */
1213
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_resolve(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol);
1214
/**
1215
 * Pass graph's tensor symbol into its sub graph. We will make the connection that the source tensor
1216
 * symbol in the source symbolic graph is the destination tensor symbol in the destination symbolic graph.
1217
 * The reason to do this inference is because a tensor symbol is local to a symbolic graph under the hood.
1218
 * Although you can use tensor symbols from different graphs directly (it calls this method or the resolve
1219
 * method above when create an execution node symbol), sometimes you need this method to do it manually.
1220
 * @param src_graph The source symbolic graph.
1221
 * @param dest_graph The destination symbolic graph.
1222
 * @param src_tensor_symbol The tensor symbol we want to resolve.
1223
 * @param dest_tensor_symbol The tensor symbol we want to resolve.
1224
 */
1225
void ccv_nnc_tensor_symbol_hookup(ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const ccv_nnc_tensor_symbol_t src_tensor_symbol, const ccv_nnc_tensor_symbol_t dest_tensor_symbol);
1226
/**
1227
 * Set bypasses for a tensor symbol.
1228
 * For case..of graphs, if the condition doesn't meet, we will skip the execution of a sub-graph.
1229
 * However, in that case, we cannot express easily which output tensor corresponds to which input tensor.
1230
 * This methods provides the way.
1231
 * @param graph The symbolic graph.
1232
 * @param symbol_map The pair of tensors array, source is the input tensor, destination is the output tensor.
1233
 * @param symbol_map_size The size of the tensor pairs array.
1234
 */
1235
void ccv_nnc_tensor_symbol_set_bypasses(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_map_t* const symbol_map, const int symbol_map_size);
1236
/**
1237
 * Fetch input / output for an exec symbol. For efficiency consideration, this returns pointer directly.
1238
 * @param graph The symbolic graph.
1239
 * @param symbol The execution node symbol reference.
1240
 * @param inputs The pointer to store input tensor symbols array.
1241
 * @param input_size The pointer to store the size of input tensor symbols array.
1242
 * @param outputs The pointer to store output tensor symbols array.
1243
 * @param output_size The pointer to store the size of output tensor symbols array.
1244
 */
1245
void ccv_nnc_graph_exec_symbol_io(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, const int** const inputs, int* const input_size, const int** const outputs, int* const output_size);
1246
/**
1247
 * Which exec symbol this is connected to. For efficiency consideration, this returns pointer directly.
1248
 * @param graph The symbolic graph.
1249
 * @param symbol The execution node symbol reference.
1250
 * @param tos The pointer to store outgoing indexes of the execution nodes.
1251
 * @param to_size the pointer to store the number of outgoing indexes.
1252
 */
1253
void ccv_nnc_graph_exec_symbol_to(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, const int** const tos, int* const to_size);
1254
/**
1255
 * Find the size allocated on the opaque tensor arena structure.
1256
 * @param tensor_arena The tensor arena object generated through compilation.
1257
 * @return The total allocated size in bytes.
1258
 */
1259
CCV_WARN_UNUSED(uint64_t) ccv_nnc_tensor_arena_size(const ccv_nnc_tensor_arena_t* const tensor_arena);
1260
/**
1261
 * Function prototype for tensor symbol creation callback.
1262
 */
1263
typedef void(*ccv_nnc_tensor_symbol_new_hook_f)(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name);
1264
/**
1265
 * Hook into the call to ccv_nnc_tensor_symbol_new, return previous provided context if call into this method.
1266
 * @param graph The symbolic graph.
1267
 * @param hook The function to be called if a new tensor symbol created.
1268
 * @param context The context associated with the callback function.
1269
 */
1270
void* ccv_nnc_tensor_symbol_new_hook(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_tensor_symbol_new_hook_f hook, void* context);
1271
/**
1272
 * Function prototype for tensor symbol alias creation callback.
1273
 */
1274
typedef void(*ccv_nnc_tensor_symbol_alias_new_hook_f)(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info, const char* const name);
1275
/**
1276
 * Hook into the call to ccv_nnc_tensor_symbol_alias_new, return previous provided context if call into this method.
1277
 * @param graph The symbolic graph.
1278
 * @param hook The function to be called if a new tensor symbol alias created.
1279
 * @param context The context associated with the callback function.
1280
 */
1281
void* ccv_nnc_tensor_symbol_alias_new_hook(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_tensor_symbol_alias_new_hook_f hook, void* context);
1282
/**
1283
 * Set the peer reference for tensor symbols. Peer reference for tensor symbols has very specific meanings.
1284
 * For a backward pass involves sub-graphs. The commands in the sub-graph could reference to tensor symbols of
1285
 * a different graph (its forward pass graph). That is not allowed (two graph has no ancestral relationship
1286
 * cannot share a tensor symbol). So we create a new tensor symbol, but set the peer reference.
1287
 * @param graph The symbolic graph.
1288
 * @param tensor_symbol The tensor symbol in the current graph.
1289
 * @param peer_tensor_symbol The tensor symbol in the peer graph.
1290
 */
1291
void ccv_nnc_tensor_symbol_set_peer(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol, const ccv_nnc_tensor_symbol_t peer_tensor_symbol);
1292
/**
1293
 * Function prototype for execution node symbol creation callback.
1294
 */
1295
typedef void(*ccv_nnc_graph_exec_symbol_new_hook_f)(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name);
1296
/**
1297
 * Hook into the call to ccv_nnc_graph_exec_symbol_new, return previous provided context if call into this method.
1298
 * @param graph The symbolic graph.
1299
 * @param hook The function to be called if a new execution node symbol created.
1300
 * @param context The context associated with the callback function.
1301
 */
1302
void* ccv_nnc_graph_exec_symbol_new_hook(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_graph_exec_symbol_new_hook_f hook, void* context);
1303
/**
1304
 * Set the peer reference for exec. This is very similar to the one for concrete graph. A peer reference
1305
 * of a backward pass execution node is its forward pass counterpart.
1306
 * @param graph The symbolic graph.
1307
 * @param exec_symbol The execution node symbol in the current graph.
1308
 * @param peer_exec_symbol The peering execution node symbol.
1309
 */
1310
void ccv_nnc_graph_exec_symbol_set_peer(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_graph_exec_symbol_t peer_exec_symbol);
1311
1312
/** @} */
1313
1314
/** @} */
1315
1316
/**
1317
 * @defgroup level_3_5 Level-3.5 API
1318
 * @{
1319
 */
1320
1321
/**
1322
 * @defgroup level_3_5_autograd Automatic Differentiation
1323
 * @{
1324
 */
1325
1326
/**
1327
 * Compute the backward graph, assuming the provided symbolic graph only contain the "forward" part from sources to destinations.
1328
 * This effectively is called the "autograd" or automatic differentiation process (specifically, "reverse AD") in other libs.
1329
 * For a expression y = f(x), to compute dx, x is the wrt_symbol, y is the f_symbol.
1330
 * @param graph The symbolic graph.
1331
 * @param f_symbols The tensor symbols array of the result (or loss).
1332
 * @param f_symbol_size The size of the f symbols array.
1333
 * @param wrt_symbols The tensor symbols array of the inputs.
1334
 * @param wrt_symbol_size The size of the wrt symbols array.
1335
 * @param sources The source execution nodes array for the computation.
1336
 * @param source_size The size of the source nodes array.
1337
 * @param destinations The destination execution nodes array for the computation.
1338
 * @param destination_size The size of the destination nodes array.
1339
 */
1340
void ccv_nnc_symbolic_graph_backward(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const f_symbols, const int f_symbol_size, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
1341
/**
1342
 * Get the symbol that contains the gradient. The list will be flushed if the ccv_nnc_symbolic_graph_backward function is called again.
1343
 * @param graph The symbolic graph.
1344
 * @param symbol The tensor symbol we want to retrieve its gradient (must be one of the wrt symbols or the f symbols).
1345
 * @return A tensor symbol that represents the gradient.
1346
 */
1347
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_for_backward(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol);
1348
/**
1349
 * Get the execution node symbol for a tensor symbol. This used to retrieve the execution node for a gradient tensor symbol.
1350
 * @param graph The symbolic graph.
1351
 * @param symbol The tensor symbol that represents the gradient (must be one of the wrt symbols).
1352
 * @return A execution node symbol that generates the gradient.
1353
 */
1354
CCV_WARN_UNUSED(ccv_nnc_graph_exec_symbol_t) ccv_nnc_graph_exec_symbol_for_backward(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol);
1355
1356
/** @} */
1357
1358
/**
1359
 * @defgroup level_3_5_while While Loop
1360
 * @{
1361
 */
1362
1363
/**
1364
 * @page symbolic_while Construct a "while" loop in a symbolic graph
1365
 *
1366
 * (This document was written in 2016, since then, Caffe2 added support for While loop (as sub-graph), similar
1367
 * implementation added for ONNX as well.)
1368
 *
1369
 * In NNC, a computation graph cannot allow cycles. Thus, there is no flexible way to express loops.
1370
 *
1371
 * A little survey on this problem:
1372
 *
1373
 * * Caffe2 supports specific type of recurrent neural network.
1374
 *
1375
 * * TensorFlow as it stands, supports while construct. Its while construct is very straightforward, a body and
1376
 *   a condition is provided, you can construct whatever graph as you want.
1377
 *
1378
 * * mxnet supports recurrent neural network by unrolling it into normal none-looped graph.
1379
 *
1380
 * * Theano supports "scan" ops, which is a terminable loop (with loop variant, known as sequence).
1381
 *
1382
 * * CNTK supports this with custom BrainScript. Within BrainScript, you can access the previous state in a
1383
 *   function, therefore, effectively supports calling a method multiple times (looping over).
1384
 *
1385
 * Of above, Caffe2 and mxnet gave up on supporting generic loop for performance reasons. TensorFlow supports
1386
 * generic while loop, with all the trouble it may introduce (see the Nested while loop bug in TensorFlow that
1387
 * recently fixed). Theano picked a point seems pretty sweet, although there are limitations. CNTK's BrainScript
1388
 * is a DSL, they can do whatever they want with the drawback now that they need to implement a language runtime.
1389
 * TensorFlow, Theano and CNTK all support auto-differentiation over the while loop with tape (Wengert list).
1390
 *
1391
 * A simple way to support loop is to support conditional jump. In fact, conditional jump is a more generic way
1392
 * of doing loops. However, if you put this into the consideration that fully differentiable computation graph
1393
 * wanna to be supported, it is terrible. With conditional jump, it is really hard for you to know which tensor
1394
 * is used where, thus keep track for reverse accumulation (backward propagation). There is no counter or
1395
 * whatsoever, it is pretty hard to trace back on which line is executed how many times. Compounding this with
1396
 * NNC's promise that as long as it shows on the graph can be "parallel" computed, it will be parallel computed,
1397
 * it is close to impossible to track if conditional jump used in its raw form. Certain restrictions must be
1398
 * applied to how to do the loop. The compromise comes from closer examination of NNC's preferences.
1399
 *
1400
 * NNC prefers to have the graph without cycles. It also prefers to be fully differentiable. Another important
1401
 * criteria is that most functions in NNC require SSA (Static Single Assignment) representation. With these in
1402
 * mind, supporting while loop has to be strict.
1403
 *
1404
 * Luckily, there are well-formalized way of supporting this in literature and practice. Because it is
1405
 * well-formalized, translating this into existing NNC implementation is actually pretty straightforward. We
1406
 * are going to introduce a special version of while loop. In literature that discussed about SSA, it may be
1407
 * called parameterized loop. For us, it works like this:
1408
 *
1409
 * To construct a while loop for existing NNC graph, you need to be able to separate the existing graph into
1410
 * two sub-graphs.
1411
 *
1412
 * The while-loop sub-graph (WL sub-graph) contains a set of incoming nodes (I-nodes), Condition false output
1413
 * nodes (CFO-nodes) and end nodes (E-nodes). Each set have its own properties, but in short, all incoming edges
1414
 * to the WL sub-graph connect to one of the I-nodes, but nothing else. All outgoing edges from the WL sub-graph
1415
 * connect to one of the CFO-nodes, but nothing else. A nodes can be either a I-node, CFO-node or E-node,
1416
 * non-exclusively.
1417
 *
1418
 * There are also 3 types of tensors used for all nodes in WL sub-graph: Input tensors (I-tensors) are tensors
1419
 * that are inputs to some nodes, and will never be outputs. Output tensors (O-tensors) are tensors that are
1420
 * outputs from some nodes, but never be inputs to any nodes. I-tensors can be outputs from some nodes that
1421
 * outside of WL sub-graph. O-tensors can be inputs to some nodes that outside of WL sub-graph. Internal
1422
 * tensors (IN-tensors) are not visible outside of WL sub-graph, therefore, they can be both inputs and outputs
1423
 * of some nodes inside the sub-graph. Some tensors can be feedback into the WL sub-graph, given either
1424
 * O-tensors or IN-tensors. A parameter map can be given in these cases to describe which maps to what.
1425
 *
1426
 * The way to drive a WL sub-graph like this: the WL sub-graph runs until all CFO-nodes are reached. At this
1427
 * point, the while_f condition is checked. If true, we continue until all the end-nodes are reached. At this
1428
 * point, we increase the counter, reconfigure the WL sub-graph with parameter map, and run from I-nodes all
1429
 * over again. When reached all CFO-nodes, the condition is checked again, if false, WL sub-graph terminates,
1430
 * and the graph continues from the nodes that are pointed by CFO-nodes.
1431
 *
1432
 * Given these constraints, doing automatic differentiation is not that hard any more. A WL sub-graph, from
1433
 * the whole graph's point of view, is just a giant command supports both forward / backward operations, with
1434
 * some extra information passed around in the form of userdata (tape).
1435
 *
1436
 * For WL sub-graph, we can continue to leverage the compile / backward function that already written for
1437
 * symbolic graph as well.
1438
 *
1439
 * For compile function, we just need to take care of parameter maps (these need to be converted into binded
1440
 * tensors).
1441
 *
1442
 * For backward function, we need to convert parameter maps from assigner (thus, y = x) to accumulator (x += y).
1443
 *
1444
 * This function will replace the nodes that it affects to one sub-graph node. Thus, how to drive this
1445
 * sub-graph is opaque. Its backward form is opaque as well.
1446
 *
1447
 * There are no connection between its nodes and the outside graph nodes other than the three sets:
1448
 *
1449
 * 1. Incoming nodes, the set of nodes that contains the incoming edges from outside, they cannot have edges
1450
 *    points by inside nodes. The sub-graph computation starts from these incoming nodes;
1451
 *
1452
 * 2. Condition false output nodes, when condition is false, we will break out of this while loop, these
1453
 *    nodes pointing to the outside nodes, but no inside nodes;
1454
 *
1455
 * 3. End nodes, the set of nodes that marks the end of the while body, and after these nodes are executed,
1456
 *    we will return to the incoming nodes. These end nodes shouldn't have any edges pointing to inside nodes
1457
 *    (OK if end nodes are condition true output nodes as well);
1458
 *
1459
 * Since these will become a sub-graph (which, to its owner graph, just simple "node"), it will have inputs
1460
 * and outputs. Besides that, the loop body needs to be parameterized to be SSA compliant (see:
1461
 * https://www.cs.cmu.edu/~fp/courses/15411-f13/lectures/06-ssa.pdf). Thus, a list of body parameters need to
1462
 * be provided.
1463
 */
1464
1465
/**
1466
 * @defgroup level_3_5_while_essentials While Loop Essentials
1467
 * @{
1468
 */
1469
1470
/**
1471
 * The given tensors contains all the common / input / output tensors specified in the sub-graph.
1472
 */
1473
typedef int(*ccv_nnc_graph_while_f)(ccv_nnc_tensor_t* const* const inputs, const int input_size, const void* const data);
1474
/**
1475
 * Create a tensor tape that can be used to record for while loop or case..of.
1476
 * @return A ccv_nnc_tensor_tape_t pointer.
1477
 */
1478
CCV_WARN_UNUSED(ccv_nnc_tensor_tape_t*) ccv_nnc_tensor_tape_new(void);
1479
/**
1480
 * Deallocate the tensor tape and all the memory it allocated.
1481
 * @param tape The tensor tape object.
1482
 */
1483
void ccv_nnc_tensor_tape_free(ccv_nnc_tensor_tape_t* const tape);
1484
/**
1485
 * The API to operate on the symbolic graph is more involved than the concrete graph for while loops.
1486
 * The reason is because symbolic graph operates in SSA form (static single assignment), therefore, the while
1487
 * loops for the symbolic graph has to be parameterized.
1488
 * @param graph The symbolic graph.
1489
 * @param cmd The command idenfitier, can be either CCV_NNC_GRAPH_FORWARD or CCV_NNC_GRAPH_BACKWARD
1490
 * @param while_graph The sub-graph to run the while loop.
1491
 * @param name The name of the while loop. Optional.
1492
 * @return A while loop execution symbol (backed by a sub-graph) of the giving graph.
1493
 */
1494
ccv_nnc_graph_exec_symbol_t ccv_nnc_symbolic_graph_while(ccv_nnc_symbolic_graph_t* const graph, const uint32_t cmd, ccv_nnc_symbolic_graph_t* const while_graph, const char* const name);
1495
/**
1496
 * Set the expression to be evaluated, and at which nodes to be evaluated.
1497
 * @param while_graph The symbolic graph that will run the while loop.
1498
 * @param while_expr The function pointer to the expression.
1499
 * @param while_data A custom data provided to the expression evaluation function.
1500
 * @param inputs The input tensor symbols array to the expression evaluation function.
1501
 * @param input_size The size of the input tensor symbols array.
1502
 * @param breakpoints The execution node symbols at which the while loop will pause, evaluate the expression, and choose to either break out or continue.
1503
 * @param breakpoint_size The size of the execution node symbols array.
1504
 */
1505
void ccv_nnc_symbolic_graph_set_while_expr(ccv_nnc_symbolic_graph_t* const while_graph, const ccv_nnc_graph_while_f while_expr, const void* const while_data, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_graph_exec_symbol_t* const breakpoints, const int breakpoint_size);
1506
/**
1507
 * Set the loop carry parameters when reuse. (parameterized loop, these will be carried over to the next loop).
1508
 * @param while_graph The symbolic graph that will run the while loop.
1509
 * @param symbol_map A pair of tensor symbols array, where the source tensor symbol is the output tensor symbol in this loop, the destination tensor symbol is the input tensor symbol in the next loop.
1510
 * @param symbol_map_size The size of the symbol map array.
1511
 */
1512
void ccv_nnc_symbolic_graph_set_carry_overs(ccv_nnc_symbolic_graph_t* const while_graph, const ccv_nnc_tensor_symbol_map_t* const symbol_map, const int symbol_map_size);
1513
/**
1514
 * Retrieve the special (magical) tensor symbol that retains the while loop counter (thus, dimension of 1x1x1, CCV_64S type).
1515
 * @param while_graph The symbolic graph that will run the while loop.
1516
 * @return A tensor symbol represents the implicit loop count.
1517
 */
1518
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_for_while_count(const ccv_nnc_symbolic_graph_t* const while_graph);
1519
/**
1520
 * Extract the sub-graph of the while loop from a symbol.
1521
 * @param graph The symbolic graph.
1522
 * @param while_symbol The execution node symbol.
1523
 * @return The sub-graph that represents a while loop.
1524
 */
1525
CCV_WARN_UNUSED(ccv_nnc_symbolic_graph_t*) ccv_nnc_symbolic_graph_from_while_symbol(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t while_symbol);
1526
/**
1527
 * Constructing looped concrete graph. Note that this interface is a little bit simpler than the one for symbolic
1528
 * graph. The reason is that a concrete graph operates on allocated tensors, thus, there is no mapping of tensor
1529
 * symbols between the parent graph and the while graph. (The reason to have a mapping in symbolic graphs is to
1530
 * constraint the variable leaking between the sub graph and parent graph).
1531
 * @param graph The concrete graph.
1532
 * @param cmd The command idenfitier, can be either CCV_NNC_GRAPH_FORWARD or CCV_NNC_GRAPH_BACKWARD
1533
 * @param while_graph The sub-graph to run the while loop.
1534
 * @return A execution node that represents the sub-graph.
1535
 */
1536
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_while(ccv_nnc_graph_t* const graph, const uint32_t cmd, ccv_nnc_graph_t* const while_graph);
1537
/**
1538
 * Set the evaluated expression for the while loop. The while loop will break out if the expression evaluates to 0.
1539
 * @param while_graph The concrete graph that will run the while loop.
1540
 * @param while_expr The function pointer to the expression.
1541
 * @param while_data A custom data provided to the expression evaluation function.
1542
 * @param inputs The input tensors array to the expression evaluation function.
1543
 * @param input_size The size of the input tensors array.
1544
 * @param breakpoints The execution nodes at which the while loop will pause, evaluate the expression, and choose to either break out or continue.
1545
 * @param breakpoint_size The size of the execution nodes array.
1546
 */
1547
void ccv_nnc_graph_set_while_expr(ccv_nnc_graph_t* const while_graph, const ccv_nnc_graph_while_f while_expr, const void* const while_data, ccv_nnc_tensor_t* const* const inputs, const int input_size, const ccv_nnc_graph_exec_t* const breakpoints, const int breakpoint_size);
1548
/**
1549
 * Get the special tensor for the while loop count. It contains one uint64_t value. We keep an implicit count
1550
 * when evaluate the while loop and you can access it with this tensor.
1551
 * @param while_graph The concrete graph that will run the while loop.
1552
 * @return A special tensor that you can retrieve the loop count at .data.i64[0].
1553
 */
1554
CCV_WARN_UNUSED(ccv_nnc_tensor_t) ccv_nnc_tensor_for_while_count(const ccv_nnc_graph_t* const while_graph);
1555
/**
1556
 * Retrieve the sub-graph from a execution node.
1557
 * @param graph The concrete graph.
1558
 * @param exec The execution node represents the sub-graph.
1559
 * @return The sub-graph.
1560
 */
1561
CCV_WARN_UNUSED(ccv_nnc_graph_t*) ccv_nnc_graph_from_while_exec(const ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_t exec);
1562
1563
/** @} */
1564
1565
/**
1566
 * @defgroup level_3_5_while_others While Loop Others
1567
 * @{
1568
 */
1569
1570
/**
1571
 * For a given tape on a given graph, update the input / output tensors so new version will be created (if needed).
1572
 * @param tape The tensor tape object.
1573
 * @param graph The concrete graph this tensor tape is executing in.
1574
 * @param input_flags The flags associated with input tensors.
1575
 * @param inputs The input tensors.
1576
 * @param input_size The size of input tensors array.
1577
 * @param output_flags The flags associated with output tensors.
1578
 * @param outputs The output tensors.
1579
 * @param output_size The size of output tensors array.
1580
 */
1581
void ccv_nnc_tensor_tape_io(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph, const int* const input_flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, const int* const output_flags, ccv_nnc_tensor_t* const* const outputs, const int output_size);
1582
/**
1583
 * Retrieve the number we associated with the execution node that recorded on the tape for a particular run of the graph.
1584
 * @param tape The tensor tape object.
1585
 * @param graph The concrete graph this tensor tape is executing in.
1586
 * @param exec The execution node.
1587
 * @return The number associated with the execution node.
1588
 */
1589
uint64_t ccv_nnc_tensor_tape_numbering(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec);
1590
/**
1591
 * Set the number we associated with the execution node that recorded on the tape for a particular run of the graph.
1592
 * @param tape The tensor tape object.
1593
 * @param graph The concrete graph this tensor tape is executing in.
1594
 * @param exec The execution node.
1595
 * @param numbering The number associated with the execution node.
1596
 */
1597
void ccv_nnc_tensor_tape_set_numbering(ccv_nnc_tensor_tape_t* const tape, ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const uint64_t numbering);
1598
/**
1599
 * Augmented tensor to run a graph with while loop (An obvious example is dynamic RNN).
1600
 */
1601
typedef struct ccv_nnc_tensor_multiview_s {
1602
  // This is an augmented ccv_nnc_tensor_view_t
1603
  // Namely, it can point to multiple versions of tensors.
1604
  int type; // This type is CCV_NNC_TENSOR_MULTI_VIEW
1605
  // kind specified how the multi-version tensors stored.
1606
  // See the comment on the follow up enums.
1607
  uint8_t kind;
1608
  uint16_t repeat;
1609
  intptr_t anchor; // on which graph this multi-view tensor is wrapped. This helps to determine on which level the multi-view tensor should be unwrapped.
1610
  // If this tensor points to a tensor view, data.u8 - offset is the real pointer start.
1611
  off_t offset;
1612
  struct ccv_nnc_tensor_multiview_s* p; // If this is wrapped with another multiview tensor. Get to the parent one.
1613
  ccv_nnc_tensor_t* it; // Current tensor (tensor in use), this is updated along with the graph computation.
1614
  // This is useful because by just traverse tv, I can get the latest up-to-date reference to this multi-view tensor.
1615
  ccv_array_t* sp; // Synchronized tensor views. This corresponds to ccv_nnc_tensor_synchronize_to_multiview method, that records all the tensors registered for updates.
1616
  ccv_nnc_tensor_t* _inline_data[4];
1617
  ccv_nnc_tensor_t** _heap_data;
1618
} ccv_nnc_tensor_multiview_t;
1619
3.45k
#define CCV_NNC_MULTIVIEW_DATA(x) ((x)->_heap_data ? 
(x)->_heap_data0
: (x)->_inline_data)
1620
235
#define CCV_NNC_MULTIVIEW_PHI (intptr_t)0x1 /**< Denote this is a phi multi-view tensor. */
1621
1622
enum {
1623
  CCV_NNC_MULTIVIEW_K0N = 0, /**< All of them are repeated. */
1624
  CCV_NNC_MULTIVIEW_K1N = 1, /**< The first one is the first, the second one starts to repeat. (0111111...) */
1625
};
1626
#define CCV_NNC_MULTIVIEW_K01(x) ((x)->kind == CCV_NNC_MULTIVIEW_K0N && (x)->repeat == 1)
1627
/**
1628
 * Setup a tensor multiview with a given set of tensors.
1629
 * A multiview tensor point to a list of tensors, and its access depends on the loop count.
1630
 * For example, if we have a multiview tensor with list of [a, b, c, d], and kind is 1N, repeat is 3.
1631
 * For loop count 0, 1, 2, 3, 4, 5, 6, the corresponding tensors used will be a, b, c, d, b, c. If kind
1632
 * is 0N, and repeat is 4, it will be a, b, c, d, a, b.
1633
 * @param data[] The pointer to the list of tensors the multiview object can point to.
1634
 * @param kind Can be either CCV_NNC_MULTIVIEW_K0N or CCV_NNC_MULTIVIEW_K1N, basically whether to keep the initial tensor.
1635
 * @param repeat The length of the repeat.
1636
 * @param graph Which graph this multiview object attaches to.
1637
 * @param tensor_multiview The tensor multiview object to be updated.
1638
 */
1639
void ccv_nnc_tensor_multiview(ccv_nnc_tensor_t* data[], const uint8_t kind, const uint16_t repeat, const ccv_nnc_graph_t* const graph, ccv_nnc_tensor_multiview_t* const tensor_multiview);
1640
/**
1641
 * Since tensor_multiview will never be allocated with *_new method, the *_free method simply frees anything that is dynamically allocated afterwards (such as the reference items).
1642
 * @param tensor_multiview The tensor multiview object to be deallocated.
1643
 */
1644
void ccv_nnc_tensor_multiview_free(const ccv_nnc_tensor_multiview_t tensor_multiview);
1645
/**
1646
 * Setup a tensor as a reference to a tensor multiview, thus, when tensor multiview's tu (current tensor) updates, the tensor reference's data.u8 will get update as well (point to the same memory region as the tu).
1647
 * @param tensor_multiview The tensor multiview object.
1648
 * @param tensor The tensor that will be updated along with the multiview object.
1649
 */
1650
void ccv_nnc_tensor_synchronize_to_multiview(ccv_nnc_tensor_multiview_t* const tensor_multiview, ccv_nnc_tensor_t* const tensor);
1651
/**
1652
 * Send broadcast to subscribers of the multiview, call this in the beginning of exec.
1653
 * @param tensor_multiview The tensor multiview object.
1654
 */
1655
void ccv_nnc_tensor_multiview_synchronize(ccv_nnc_tensor_multiview_t* const tensor_multiview);
1656
1657
/** @} */
1658
1659
/** @} */
1660
1661
/**
1662
 * @defgroup level_3_5_case_of Branching
1663
 * @{
1664
 */
1665
1666
/**
1667
 * @page symbolic_switch Construct "switch" control structure in symbolic graph
1668
 *
1669
 * Here I use the keyword case_of. To provide a "switch" control structure within NNC has some nice properties
1670
 * even though you can simulate this with a while loop technically.
1671
 *
1672
 * 1. More optimal memory allocation: with "switch" control structure, memory can be multiplexed for each code
1673
 *    path because they are mutually exclusive.
1674
 *
1675
 * 2. No tape should be used within each branch: if we simulate with a "while" loop, any results from within
1676
 *    the "switch" statement has to be kept on the tape, which is inefficient because you don't need any tape
1677
 *    for the "switch" statement other than record which path it is taken.
1678
 *
1679
 * The particular "switch" control structure provided here is a multi-way structured "switch". Each branch is a
1680
 * sub-graph, so it is well-scoped. A node branch out based on the case_of condition return value to either of
1681
 * the branch (numbering from 0 to n, -1 means no path taken). If no path taken, the output tensors will be
1682
 * assigned with the default tensors and continue. Otherwise the computation within the sub-graph will be
1683
 * carried out and the output tensors will be assigned with the tensors specified within that sub-graph and
1684
 * continue.
1685
 *
1686
 * If we want to consider speculative execution in the future, we need to revisit our memory allocation scheme.
1687
 */
1688
1689
/**
1690
 * Function prototype to evaluate a branch expression.
1691
 */
1692
typedef int(*ccv_nnc_graph_case_of_f)(ccv_nnc_tensor_t* const* const inputs, const int input_size, const void* const data);
1693
/**
1694
 * Create a new case..of execution node symbol.
1695
 * @param graph The symbolic graph.
1696
 * @param cmd The command idenfitier, can be either CCV_NNC_GRAPH_FORWARD or CCV_NNC_GRAPH_BACKWARD
1697
 * @param inputs The input tensor symbols array for the expression.
1698
 * @param input_size The size of the input tensor symbols array.
1699
 * @param symbol_map The pair of tensor symbols array where the source is the input tensor symbol and the destination is the output tensor symbol.
1700
 * @param symbol_map_size The size of symbol map array.
1701
 * @param name The name of the case..of graph. Optional.
1702
 * @return A execution node symbol that represents the case..of graph.
1703
 */
1704
CCV_WARN_UNUSED(ccv_nnc_graph_exec_symbol_t) ccv_nnc_symbolic_graph_case_of_new(ccv_nnc_symbolic_graph_t* const graph, const uint32_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_map_t* const symbol_map, const int symbol_map_size, const char* const name);
1705
/**
1706
 * Set the expression to be evaluated when choose which sub-graph to branch to.
1707
 * @param graph The symbolic graph.
1708
 * @param exec The execution node symbol that represents the case..of graph.
1709
 * @param case_of The function pointer to evaluate.
1710
 * @param case_of_data The data associated with the function pointer.
1711
 */
1712
void ccv_nnc_symbolic_graph_set_case_of_expr(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, ccv_nnc_graph_case_of_f case_of, const void* case_of_data);
1713
/**
1714
 * Set a sub-graph as one of the branch for the case..of graph.
1715
 * @param graph The symbolic graph.
1716
 * @param symbol The execution node symbol that represents the case..of graph.
1717
 * @param case_graph The sub-graph for one of the branch.
1718
 * @param case_of The index assigned to this sub-graph (expression returns this index to determine which sub-graph to execute).
1719
 * @param symbol_map The pair of tensor symbols array where the source is the output tensor symbol of the sub-graph, and the destination is the output tensor symbol of the execution node symbol.
1720
 * @param symbol_map_size The size of the symbol map array.
1721
 */
1722
void ccv_nnc_symbolic_graph_set_case_of(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, ccv_nnc_symbolic_graph_t* const case_graph, const int case_of, const ccv_nnc_tensor_symbol_map_t* const symbol_map, const int symbol_map_size);
1723
/**
1724
 * Create a new case..of execution node.
1725
 * @param graph The concrete graph.
1726
 * @param cmd The command idenfitier, can be either CCV_NNC_GRAPH_FORWARD or CCV_NNC_GRAPH_BACKWARD
1727
 * @param inputs The input tensors array supplied to the expression.
1728
 * @param input_size The size of the input tensors array.
1729
 * @param outputs The output tensors array.
1730
 * @param output_size The size of the output tensors array.
1731
 * @return A execution node that represents the case..of graph.
1732
 */
1733
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_case_of_new(ccv_nnc_graph_t* const graph, const uint32_t cmd, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
1734
/**
1735
 * Set the expression to be evaluated when choose which sub-graph to branch to.
1736
 * @param graph The concrete graph.
1737
 * @param exec The execution node that represents the case..of graph.
1738
 * @param case_of The function pointer to evaluate.
1739
 * @param case_of_data The data associated with the function pointer.
1740
 * @param offset A integer added to the expression output to help choose the index. Thus, real index = expression index + offset.
1741
 */
1742
void ccv_nnc_graph_set_case_of_expr(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_graph_case_of_f case_of, const void* case_of_data, const int offset);
1743
/**
1744
 * Set a sub-graph as one of the branch for the case..of graph.
1745
 * @param graph The concrete graph.
1746
 * @param exec The execution node that represents the case..of graph.
1747
 * @param case_graph The sub-graph for one of the branch.
1748
 * @param case_of The index assigned to this sub-graph (expression returns this index + offset to determine which sub-graph to execute).
1749
 */
1750
void ccv_nnc_graph_set_case_of(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_graph_t* const case_graph, const int case_of);
1751
1752
/** @} */
1753
1754
/**
1755
 * @defgroup level_3_5_minimizer Gradient-based Optimization
1756
 * @{
1757
 */
1758
1759
/**
1760
 * This is the comparable part to Caffe's solver or TensorFlow's optimizer. It took a step further than just
1761
 * compute the gradient, but also apply the gradient to update parameters to minimize the loss.
1762
 * @param graph The symbolic graph.
1763
 * @param minimizer The wrapped command that represents a particular optimization strategy.
1764
 * @param losses The tensor symbols array of losses.
1765
 * @param loss_size The size of the loss symbols array.
1766
 * @param parameters The parameter tensor symbols to optimize.
1767
 * @param parameter_size The size of parameter symbols array.
1768
 * @param inputs The additional input symbols we compute gradient against.
1769
 * @param input_size The size of the additional input symbols array.
1770
 * @param sources The source execution nodes array.
1771
 * @param source_size The size of source nodes array.
1772
 * @param destinations The destinations execution nodes array.
1773
 * @param destination_size The size of destination nodes array.
1774
 * @param gradients The tensor symbols that represents the gradient for update, should be the same size as the parameters array + input array size. This can be 0 (optional).
1775
 * @param updated_parameters The tensor symbols that represents the updated parameters, should be the same size as the parameters array.
1776
 * @param saved_aux The tensor symbols that is helpful for particular optimization strategy.
1777
 * @param graph_exec_symbols The execution node symbols for the updates, should be the same size as the parameters array.
1778
 */
1779
void ccv_nnc_symbolic_graph_minimize(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_cmd_t minimizer, const ccv_nnc_tensor_symbol_t* const losses, const int loss_size, const ccv_nnc_tensor_symbol_t* const parameters, const int parameter_size, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, ccv_nnc_tensor_symbol_t* const gradients, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols);
1780
/**
1781
 * The number of extra saved aux per parameter only depends on the commands. For example, SGD with momentum requires 1 aux (for momentum).
1782
 * Others require more.
1783
 * @param minimizer The wrapped command that represents a particular optimization strategy.
1784
 * @return the number of saved aux per parameter.
1785
 */
1786
CCV_WARN_UNUSED(int) ccv_nnc_minimizer_saved_aux_size(const ccv_nnc_cmd_t minimizer);
1787
1788
/** @} */
1789
1790
/**
1791
 * @defgroup level_3_5_simplify Graph Simplification
1792
 * @{
1793
 */
1794
1795
/**
1796
 * @page symbolic_simplify Symbolic graph simplification
1797
 *
1798
 * We make a distinction between graph simplifications and optimizations (autotune).
1799
 *
1800
 * Simplification: rewrite the graph and the resulting graph will have less nodes. This is done on the symbolic
1801
 * graph only. Passes that is "simplification" include pruning, common sub-expression eliminations, constant
1802
 * folding etc.
1803
 *
1804
 * Optimization (autotune): graph optimization can have more objectives. The most obvious objective is to reduce
1805
 * computation time. For symbolic graph, passes that reduces computation time include data layout optimizations,
1806
 * auto parallel etc (in normal optimization implementations, they have a cost model to guide the optimization.
1807
 * NNC's implementation uses a cost database that profiles the time cost on the device to guide the optimization.
1808
 * We call it autotune to distinguish with the normal optimization passes because we need device profile data).
1809
 * There could be other objectives, for example, in many deep learning applications, reducing memory footprint
1810
 * can be desirable. However, as always in computer science, memory and time is a typical trade-off. Memory
1811
 * optimization almost always results longer computation time, and the objective is to trade between these two
1812
 * with a bias term (in other frameworks such as TensorFlow, the memory optimizer uses a list of "cheap ops" to
1813
 * bias between the time and memory footprint).
1814
 *
1815
 * For graph optimizations, it can happen on both the symbolic graph level as well as the concrete graph level.
1816
 * For NNC, symbolic graph is already very explicit (data layout, device allocation and data transfer between
1817
 * devices / nodes, even the command backend can all be specified on the symbolic graph), however, some
1818
 * information is unknown until it is compiled down to concrete graph (tensor addresses, tensor initialization
1819
 * etc.), and since graph optimizations need all the information to optimize. Keeping the flexibility to do
1820
 * optimization on both symbolic and concrete graph level seems reasonable.
1821
 */
1822
1823
enum {
1824
  /**
1825
   * If two commands generated the same outputs, all the places where the newer output used will be replaced by
1826
   * the old output. Later on the graph pruning stage, the command that generate the newer output will be
1827
   * eliminated.
1828
   */
1829
  CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,
1830
  /**
1831
   * For the given outputs, eliminate unused input tensors, and then eliminate graph execs that don't contribute
1832
   * to the outputs.
1833
   */
1834
  CCV_NNC_SIMPLIFY_GRAPH_PRUNING,
1835
  /**
1836
   * For CCV_NNC_DATA_TRANSFER, if the input / output is the same (on the same device, no alias), we can skip.
1837
   * Similarly, if it is on the same device, but alias of some, for some cases we can skip as well (if neither
1838
   * are carry overs, bypasses etc.)
1839
   */
1840
  CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,
1841
  /**
1842
   * Combine a few smaller ops into bigger one. For now, this functionality is limited. I can only address ops
1843
   * that are sequential.
1844
   */
1845
  CCV_NNC_SIMPLIFY_OPS_FUSION,
1846
  // CCV_NNC_SIMPLIFY_CONSTANT_FOLDING, // This currently is not supported, because we don't have efficient way to express constant in symbolic graph.
1847
};
1848
/**
1849
 * Simplify a graph with given list of passes, in that particular order.
1850
 * Note, when a graph is simplified, its sources / destinations are changed as well.
1851
 * @param graph The symbolic graph.
1852
 * @param passes The array of passes we are going to apply.
1853
 * @param pass_size The size of the passes array.
1854
 * @param outputs The output tensor symbols we want to retain (we are going to prune any execution nodes that is not related to these outputs).
1855
 * @param output_size The size of the output array.
1856
 * @param sources The source execution node symbols array.
1857
 * @param source_size The size of source node symbols array.
1858
 * @param destinations The destinations execution node symbols array.
1859
 * @param destination_size The size of destination node symbols array.
1860
 */
1861
void ccv_nnc_symbolic_graph_simplify(ccv_nnc_symbolic_graph_t* const graph, const int* const passes, const int pass_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
1862
1863
/** @} */
1864
1865
/**
1866
 * @defgroup level_3_5_parallel Automatic Graph Parallelization
1867
 * @{
1868
 */
1869
1870
enum {
1871
  /**
1872
   * Op for reducer / allreducer. Currently only supports sum.
1873
   */
1874
  CCV_NNC_PARALLEL_REDUCE_OP_SUM,
1875
};
1876
1877
/**
1878
 * Turn the existing graph to be capable to run on several devices with different data inputs at parallel.
1879
 * With this method, additional tensor symbols will be created that runs on different devices. That has
1880
 * been said, there are concepts of "broadcast" and "reduce". "broadcast" tensor symbols will be copied to
1881
 * different devices, while "reduce" tensors will be summed from different devices to the default device.
1882
 * "allreducer" concept is simpler. The allreduce operation will be performed on these tensors and then
1883
 * be used on different devices again.
1884
 *
1885
 * Limitations: right now, the way to reduce / allreduce tensors only supports "sum". The data parallel
1886
 * only supports GPU, thus, the nodes will be duplicated are GPU computations and GPU memory backed
1887
 * tensors. Also, right now, the tensors to be broadcasted / allreduced / reduced should have no aliases.
1888
 *
1889
 * @param graph The symbolic graph.
1890
 * @param parallel Number of devices we want to run on. 0 will use all devices available. 1 will skip.
1891
 * @param broadcasts The tensor symbols to be broadcasted.
1892
 * @param broadcast_size The size of the broadcast tensor symbols array.
1893
 * @param allreducers The tensor symbols that to be allreduced.
1894
 * @param allreducer_size The size of the allreducer tensor symbols array.
1895
 * @param reducers The tensor symbols to be reduced.
1896
 * @param reducer_size The size of the reducer tensor symbols array.
1897
 * @param reduce_op_type The reduce op for reducer / allreducer.
1898
 * @param sources The source execution node symbols array.
1899
 * @param source_size The size of source node symbols array.
1900
 * @param destinations The destinations execution node symbols array.
1901
 * @param destination_size The size of destination node symbols array.
1902
 */
1903
void ccv_nnc_symbolic_graph_data_parallel(ccv_nnc_symbolic_graph_t* const graph, const int parallel, const ccv_nnc_tensor_symbol_t* const broadcasts, const int broadcast_size, const ccv_nnc_tensor_symbol_t* const allreducers, const int allreducer_size, const ccv_nnc_tensor_symbol_t* const reducers, const int reducer_size, const int reduce_op_type, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
1904
/**
1905
 * Get the symbol that is on a device other than the default one. The list will be flushed if the
1906
 * ccv_nnc_symbolic_graph_data_parallel function is called again.
1907
 * @param graph The symbolic graph.
1908
 * @param symbol The tensor symbol we want to retrieve its counterparts on a different device.
1909
 * @param device_id The device numeric id for this symbol.
1910
 * @return A tensor symbol that is on a different device.
1911
 */
1912
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_copy(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol, const int device_id);
1913
/**
1914
 * Get the execution node that is on a device other than the default one. The list will be flushed
1915
 * if the ccv_nnc_symbolic_graph_data_parallel function is called again.
1916
 * @param graph The symbolic graph.
1917
 * @param symbol The execution node we want to retrieve its counterparts on a different device.
1918
 * @param device_id The device numeric id for this symbol.
1919
 * @return A execution node that is on a different device.
1920
 */
1921
CCV_WARN_UNUSED(ccv_nnc_graph_exec_symbol_t) ccv_nnc_graph_exec_symbol_copy(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, const int device_id);
1922
1923
/** @} */
1924
1925
/** @} */
1926
1927
/**
1928
 * @defgroup level_4 Level-4 API
1929
 * @{
1930
 */
1931
1932
/**
1933
 * Opaque pointer to the dynamic graph structure.
1934
 */
1935
typedef struct ccv_nnc_dynamic_graph_s ccv_nnc_dynamic_graph_t;
1936
1937
/**
1938
 * Masquerade this as if it is a on stack variable, there is a heap allocation but managed by the dynamic graph.
1939
 * The fact that ccv_nnc_tensor_variable_t is a pointer is an implementation detail. It should be treated as an
1940
 * opaque type throughout. We may later extends this to be some on-stack information or even just a uid.
1941
 */
1942
typedef struct ccv_nnc_tensor_variable_s* ccv_nnc_tensor_variable_t;
1943
1944
/**
1945
 * Create a dynamic graph.
1946
 * @return A newly created dynamic graph.
1947
 */
1948
CCV_WARN_UNUSED(ccv_nnc_dynamic_graph_t*) ccv_nnc_dynamic_graph_new(void);
1949
1950
/** @cond ALL */
1951
// Get a new tensor variable.
1952
CCV_WARN_UNUSED(ccv_nnc_tensor_variable_t) ccv_nnc_tensor_variable_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info);
1953
3.05k
#define CCV_NNC_TENSOR_VARIABLE_NEW_X_1(graph) ccv_nnc_tensor_variable_new_impl(graph, ccv_nnc_tensor_auto)
1954
3.03k
#define CCV_NNC_TENSOR_VARIABLE_NEW_X_SEL(_1, _2, _FX, ...) _FX
1955
// Making so that this new method can take parameters for both no parameter or with tensor_param.
1956
6.08k
#define ccv_nnc_tensor_variable_new(graph, ...) CCV_NNC_TENSOR_VARIABLE_NEW_X_SEL(graph, ##__VA_ARGS__, ccv_nnc_tensor_variable_new_impl, 
CCV_NNC_TENSOR_VARIABLE_NEW_X_13.05k
)(graph, ##
__VA_ARGS__2.02k
)
1957
CCV_WARN_UNUSED(ccv_nnc_tensor_variable_t) ccv_nnc_tensor_constant_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info);
1958
#define CCV_NNC_TENSOR_CONSTANT_NEW_X_1(graph) ccv_nnc_tensor_constant_new_impl(graph, ccv_nnc_tensor_auto)
1959
2
#define CCV_NNC_TENSOR_CONSTANT_NEW_X_SEL(_1, _2, _FX, ...) _FX
1960
// Making so that this new method can take parameters for both no parameter or with tensor_param.
1961
2
#define ccv_nnc_tensor_constant_new(graph, ...) CCV_NNC_TENSOR_CONSTANT_NEW_X_SEL(graph, ##__VA_ARGS__, ccv_nnc_tensor_constant_new_impl, CCV_NNC_TENSOR_CONSTANT_NEW_X_1)(graph, ##__VA_ARGS__)
1962
/** @endcond */
1963
1964
/**
1965
 * Create a new tensor variable that is an alias of a given tensor variable.
1966
 * @param graph The dynamic graph.
1967
 * @param tensor_variable The tensor variable we are going to alias from.
1968
 * @param ofs The offset on each of the dimension.
1969
 * @param inc The line size of each dimension.
1970
 * @param info The tensor parameters for the new alias.
1971
 * @return New tensor variable that is an alias.
1972
 */
1973
CCV_WARN_UNUSED(ccv_nnc_tensor_variable_t) ccv_nnc_tensor_variable_alias_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int inc[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info);
1974
/**
1975
 * Get the underlying tensor for the tensor variable. The tensor allocation may be performed when calling this
1976
 * method.
1977
 * @param graph The dynamic graph.
1978
 * @param tensor_variable The tensor variable to get the underlying tensor.
1979
 * @return The underlying tensor.
1980
 */
1981
CCV_WARN_UNUSED(ccv_nnc_tensor_t*) ccv_nnc_tensor_from_variable(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable);
1982
/**
1983
 * Set a tensor on the tensor variable. Tensor variable doesn't take over the life-cycle management of the tensor
1984
 * (in similar way as the tensor binds).
1985
 * @param graph The dynamic graph.
1986
 * @param tensor_variable The tensor variable to set.
1987
 * @param tensor The tensor that is going to be associated with the tensor variable.
1988
 */
1989
void ccv_nnc_tensor_variable_set(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_t* const tensor);
1990
/**
1991
 * Execute a command with given tensor variables, the output is in the output tensor variables.
1992
 * @param graph The dynamic graph.
1993
 * @param cmd The wrapped command.
1994
 * @param hint The hint associated with the command.
1995
 * @param flags A reserved field for flags.
1996
 * @param inputs The input tensor variables array.
1997
 * @param input_size The size of the input tensor variables array.
1998
 * @param outputs The output tensor variables array.
1999
 * @param output_size The size of the output tensor variables array.
2000
 */
2001
int ccv_nnc_dynamic_graph_exec(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size);
2002
/**
2003
 * Compute the gradient of given tensor, with respect to the f. Thus, df / dt.
2004
 * @param dynamic_graph The dynamic graph.
2005
 * @param f_variable The output losses.
2006
 * @param df_optional The custom gradient for f. If not provided, will default to 1.
2007
 * @param inputs The input variables.
2008
 * @param input_size The size of the input variables array.
2009
 * @param outputs The gradients with respect to the inputs.
2010
 * @param output_size The size of the outputs array. Should be equal to the input_size.
2011
 */
2012
void ccv_nnc_dynamic_graph_backward(ccv_nnc_dynamic_graph_t* const dynamic_graph, const ccv_nnc_tensor_variable_t f_variable, const ccv_nnc_tensor_variable_t df_optional, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size);
2013
/**
2014
 * Apply one step of minimization (most likely, a gradient descent) to the parameters with a given loss (or
2015
 * losses).
2016
 * @param dynamic_graph The dynamic graph.
2017
 * @param minimizer The wrapped command that represents a particular optimization strategy.
2018
 * @param losses The losses we are trying to minimize.
2019
 * @param loss_size The size of the losses array.
2020
 * @param dlosses_optional The custom gradient for losses. If not provided, will default to 1.
2021
 * @param parameters The parameters to update.
2022
 * @param parameter_size The size of parameters array.
2023
 * @param saved_aux The aux variables to faciliate the minimizer. See ccv_nnc_minimizer_saved_aux_size.
2024
 */
2025
void ccv_nnc_dynamic_graph_minimize(ccv_nnc_dynamic_graph_t* const dynamic_graph, const ccv_nnc_cmd_t minimizer, const ccv_nnc_tensor_variable_t* const losses, const int loss_size, const ccv_nnc_tensor_variable_t* const dlosses_optional, ccv_nnc_tensor_variable_t* const parameters, const int parameter_size, ccv_nnc_tensor_variable_t* const saved_aux);
2026
/**
2027
 * Dispose a tensor variable. You cannot do any computation against this tensor variable afterwards.
2028
 * @param graph The dynamic graph.
2029
 * @param tensor_variable The tensor variable to be disposed.
2030
 */
2031
void ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable);
2032
/**
2033
 * Free the dynamic graph.
2034
 * @param graph The dynamic graph.
2035
 */
2036
void ccv_nnc_dynamic_graph_free(ccv_nnc_dynamic_graph_t* const graph);
2037
/**
2038
 * Generate output that can be parsed by GraphViz (DOT language).
2039
 * @param graph The dynamic graph.
2040
 * @param flags Either CCV_NNC_SHORT_DOT_GRAPH or CCV_NNC_LONG_DOT_GRAPH
2041
 * @param out The output file stream.
2042
 */
2043
void ccv_nnc_dynamic_graph_dot(const ccv_nnc_dynamic_graph_t* const graph, const int flags, FILE* out);
2044
2045
/** @} */
2046
2047
/**
2048
 * @defgroup level_5 Level-5 API
2049
 * @{
2050
 */
2051
2052
/**
2053
 * @page dataframe What is "dataframe" in ML?
2054
 *
2055
 * A large part of machine learning consists of go through data, process them to a shape / form that makes sense,
2056
 * and pass that into the model to train. Deep learning frameworks such as TensorFlow or PyTorch provides some
2057
 * dataset APIs for this purpose. It is convenient for these frameworks because by being Python, people can use
2058
 * Pandas to process the data. In Pandas, this is called Dataframe, which again, imitates R language.
2059
 *
2060
 * Another interesting observation comes from recent (2018) release of Create ML framework from Apple. It provides
2061
 * a very close to Pandas style data process API (MLDataTable) but in Swift. This implementation is important because
2062
 * it provides a survey point other than Python.
2063
 *
2064
 * Comparing to Python, Swift is a stronger typed language. Though all being high-level, they all have pretty good
2065
 * string support (of course!), operator overloading, and polymorphism. String support makes column naming natural,
2066
 * Operator overloading makes conditioning and filtering easier, and polymorphism makes column type representation
2067
 * straight-forward. These, unfortunately, are the challenges I need to face when implementing in C with the eye
2068
 * towards that later the similar ideas can be implemented on top on a high-level language based on this one.
2069
 *
2070
 * It seems I haven't answered the most crucial question yet: what's special about these data process APIs? It is
2071
 * easier to answer this to first see what Pandas or MLDataTable does.
2072
 *
2073
 * * They both represent data as tables. Each column represents different type of the data (time, nd-array, scalar
2074
 *   or string). As such, they both have API to add / remove / rename columns, and load tabular data from disk.
2075
 *
2076
 * * They both provide API to filter (remove / add) rows, and derive new column from existing columns.
2077
 *
2078
 * * Pandas provides more API for data alignment (merge columns from different tables into one table), and compute
2079
 *   statistics (group rows by some criteria, and compute min / max / std / mean within that group).
2080
 *
2081
 * * MLDataTable provides API to batching data (random split) which covered in TensorFlow / PyTorch's Dataset API
2082
 *   as well.
2083
 *
2084
 * It turns out when you have a noisy dataset, these functionalities are useful to remove unwanted data quickly.
2085
 * If you have a relatively clean dataset, it also allows you to prepare data in a more elegant way. For NNC,
2086
 * the interesting requirements are:
2087
 *
2088
 * 1. Represents scalars, tensors, string as columns; columns can be named.
2089
 *
2090
 * 2. New columns can be derived, from existing ones.
2091
 *
2092
 * 3. Rows can be filtered, grouped, and statistics can be computed.
2093
 *
2094
 * 4. Columns can be aligned, with some given indexes.
2095
 *
2096
 * 5. All these can be done efficiently, on a scale of hundreds of Gigabytes data.
2097
 */
2098
2099
/**
2100
 * @defgroup level_5_dataframe Dataframe API
2101
 * @{
2102
 */
2103
2104
/**
2105
 * A data enumeration function to supply data for given row indexes.
2106
 */
2107
typedef void (*ccv_cnnp_column_data_enum_f)(const int column_idx, const int* const row_idxs, const int row_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context);
2108
/**
2109
 * A destructor for data.
2110
 */
2111
typedef void (*ccv_cnnp_column_data_deinit_f)(void* const data, void* const context);
2112
/**
2113
 * A destructor for context.
2114
 */
2115
typedef void (*ccv_cnnp_column_data_context_deinit_f)(void* const context);
2116
/**
2117
 * Column data.
2118
 */
2119
typedef struct {
2120
  int stream_type; /**< The type of stream context for this column. Each column only compatible with one stream type. */
2121
  ccv_cnnp_column_data_enum_f data_enum; /**< The data enumeration function for this column. */
2122
  ccv_cnnp_column_data_deinit_f data_deinit; /**< The deinit function that will be used to destroy the data. */
2123
  void* context; /**< The context go along with this column. */
2124
  ccv_cnnp_column_data_context_deinit_f context_deinit; /**< The deinit function that will be used to destroy the context. */
2125
} ccv_cnnp_column_data_t;
2126
/**
2127
 * An opaque structure point to the dataframe object.
2128
 */
2129
typedef struct ccv_cnnp_dataframe_s ccv_cnnp_dataframe_t;
2130
/**
2131
 * Create a dataframe object with given column data.
2132
 * @param column_data The column data that can be loaded.
2133
 * @param column_size The size of column data array.
2134
 * @param row_count The number of rows in this dataframe.
2135
 */
2136
CCV_WARN_UNUSED(ccv_cnnp_dataframe_t*) ccv_cnnp_dataframe_new(const ccv_cnnp_column_data_t* const column_data, const int column_size, const int row_count);
2137
/**
2138
 * Add a new column to the dataframe.
2139
 * @param dataframe The dataframe object to add column to.
2140
 * @param data_enum The data provider function for the new column.
2141
 * @param stream_type The type of stream context for this derived column.
2142
 * @param data_deinit The deinit function will be used to destroy the derived data.
2143
 * @param context The context that can be used to generate new column.
2144
 * @param context_deinit The deinit function will be used to destroy the context.
2145
 * @return The new column index.
2146
 */
2147
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_add(ccv_cnnp_dataframe_t* const dataframe, ccv_cnnp_column_data_enum_f data_enum, const int stream_type, ccv_cnnp_column_data_deinit_f data_deinit, void* const context, ccv_cnnp_column_data_context_deinit_f context_deinit);
2148
/**
2149
 * A map function that takes the data from multiple columns and derive new data out of it.
2150
 */
2151
typedef void (*ccv_cnnp_column_data_map_f)(void*** const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context);
2152
/**
2153
 * Derive a new column out of existing columns in the dataframe.
2154
 * @param dataframe The dataframe object that contains existing columns.
2155
 * @param map The map function used to derive new column from existing columns.
2156
 * @param stream_type The type of stream context for this derived column.
2157
 * @param data_deinit The deinit function will be used to destroy the derived data.
2158
 * @param column_idxs The columns that will be used to derive new column.
2159
 * @param column_idx_size The size of existing columns array.
2160
 * @param context The context that can be used to generate new column.
2161
 * @param context_deinit The deinit function will be used to destroy the context.
2162
 * @return The new column index.
2163
 */
2164
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_map(ccv_cnnp_dataframe_t* const dataframe, ccv_cnnp_column_data_map_f map, const int stream_type, ccv_cnnp_column_data_deinit_f data_deinit, const int* const column_idxs, const int column_idx_size, void* const context, ccv_cnnp_column_data_context_deinit_f context_deinit);
2165
/**
2166
 * Shuffle an existing dataframe.
2167
 * @param dataframe The dataframe that is about to be shuffled.
2168
 */
2169
void ccv_cnnp_dataframe_shuffle(ccv_cnnp_dataframe_t* const dataframe);
2170
/**
2171
 * Query row count of the dataframe.
2172
 * @param dataframe The dataframe we want to query row count.
2173
 * @return The row count of the dataframe.
2174
 */
2175
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_row_count(ccv_cnnp_dataframe_t* const dataframe);
2176
/**
2177
 * A reduce function that takes multiple rows of one column, and reduce to one row.
2178
 */
2179
typedef void (*ccv_cnnp_column_data_reduce_f)(void** const input_data, const int batch_size, void** const output_data, void* const context, ccv_nnc_stream_context_t* const stream_context);
2180
/**
2181
 * Reduce a dataframe by batch size. Thus, n rows are reduced to 1 row per reduce function on
2182
 * one specific column. This will also reduce the multi-column dataframe down to 1 column
2183
 * by selecting the one column to reduce.
2184
 * @param dataframe The dataframe that is about to be reduced.
2185
 * @param reduce The reduce function used to reduce n rows into 1.
2186
 * @param data_deinit The deinit function will be used to destroy the derived data.
2187
 * @param column_idx The column we selected to reduce.
2188
 * @param batch_size How many rows will be reduced to 1 row from the original data.
2189
 * @param context The context that can be used in reduce function.
2190
 * @param context_deinit The deinit function will be used to destroy the context.
2191
 * @return The reduced dataframe.
2192
 */
2193
CCV_WARN_UNUSED(ccv_cnnp_dataframe_t*) ccv_cnnp_dataframe_reduce_new(ccv_cnnp_dataframe_t* const dataframe, ccv_cnnp_column_data_reduce_f reduce, ccv_cnnp_column_data_deinit_f data_deinit, const int column_idx, const int batch_size, void* const context, ccv_cnnp_column_data_context_deinit_f context_deinit);
2194
/**
2195
 * Extract a value out of a struct. Assuming the data points to a struct. This method extract
2196
 * n-offset value of that struct. For example, if you have struct { ccv_nnc_tensor_t* a; ccv_nnc_tensor_t* b; } S;
2197
 * if you want to extract the b tensor to a different column, you can call this function with
2198
 * offsetof(S, b).
2199
 * @param dataframe The dataframe object to be extracted.
2200
 * @param column_idx The column that we want to extract value of.
2201
 * @param offset The offset. For example, offsetof(S, b).
2202
 * @return The new column that contains the extracted value.
2203
 */
2204
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_extract_value(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t offset);
2205
/**
2206
 * Make a tuple out of columns specified. Thus, the new derived column will contains a tuple
2207
 * with data from all the columns specified here. Tuple here represented as void* tuple[], an
2208
 * array of void* pointers.
2209
 * @param dataframe The dataframe that will contain the new column.
2210
 * @param column_idxs The columns to be tupled.
2211
 * @param column_idx_size The number of columns.
2212
 * @return The derived column with the tuple.
2213
 */
2214
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_make_tuple(ccv_cnnp_dataframe_t* const dataframe, const int* const column_idxs, const int column_idx_size);
2215
/**
2216
 * The size of the tuple. It is equal to the number of columns we specified. The behavior of
2217
 * calling this method on a column that is not a tuple is undefined.
2218
 * @param dataframe The dataframe that contains the tuple column.
2219
 * @param column_idx The tuple column we are going to inspect.
2220
 * @return The tuple size of the column.
2221
 */
2222
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_tuple_size(const ccv_cnnp_dataframe_t* const dataframe, const int column_idx);
2223
/**
2224
 * Extract a data out of a tuple.
2225
 * @param dataframe The dataframe that will contain the new column.
2226
 * @param column_idx The column that is a tuple.
2227
 * @param index The index into the tuple.
2228
 * @return The derived column with the extracted value.
2229
 */
2230
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_extract_tuple(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const int index);
2231
/**
2232
 * The opaque pointer to the iterator.
2233
 */
2234
typedef struct ccv_cnnp_dataframe_iter_s ccv_cnnp_dataframe_iter_t;
2235
/**
2236
 * Get a new iterator of the dataframe.
2237
 * @param dataframe The dataframe object to iterate through.
2238
 * @param column_idxs The columns that will be iterated.
2239
 * @param column_idx_size The size of columns array.
2240
 * @return The opaque iterator object.
2241
 */
2242
CCV_WARN_UNUSED(ccv_cnnp_dataframe_iter_t*) ccv_cnnp_dataframe_iter_new(ccv_cnnp_dataframe_t* const dataframe, const int* const column_idxs, const int column_idx_size);
2243
/**
2244
 * Get the next item from the iterator.
2245
 * @param iter The iterator to go through.
2246
 * @param data_ref The output for the data.
2247
 * @param column_idx_size The size of the data_ref array.
2248
 * @param stream_context The stream context to extract data asynchronously.
2249
 * @return 0 if the iteration is successful, -1 if it is ended.
2250
 */
2251
int ccv_cnnp_dataframe_iter_next(ccv_cnnp_dataframe_iter_t* const iter, void** const data_ref, const int column_idx_size, ccv_nnc_stream_context_t* const stream_context);
2252
/**
2253
 * Prefetch next item on the iterator with the given stream context. You can call this method multiple times
2254
 * to prefetch multiple items ahead of time.
2255
 * @param iter The iterator to go through.
2256
 * @param prefetch_count How much ahead we should advance for.
2257
 * @param stream_context The stream context to extract data asynchronously.
2258
 * @return 0 if the prefetch is successful, -1 if it is ended.
2259
 */
2260
int ccv_cnnp_dataframe_iter_prefetch(ccv_cnnp_dataframe_iter_t* const iter, const int prefetch_count, ccv_nnc_stream_context_t* const stream_context);
2261
/**
2262
 * Set the cursor of the iterator. When set to 0, the iterator effectively restarts.
2263
 * @param iter The iterator to go through.
2264
 * @param idx The index of the cursor.
2265
 * @return 0 if it is successful, -1 if it is not (exceed the range).
2266
 */
2267
int ccv_cnnp_dataframe_iter_set_cursor(ccv_cnnp_dataframe_iter_t* const iter, const int idx);
2268
/**
2269
 * Free the dataframe iterator object.
2270
 * @param iter The dataframe iterator to be freed.
2271
 */
2272
void ccv_cnnp_dataframe_iter_free(ccv_cnnp_dataframe_iter_t* const iter);
2273
/**
2274
 * Free the dataframe object.
2275
 * @param dataframe The dataframe object to be freed.
2276
 */
2277
void ccv_cnnp_dataframe_free(ccv_cnnp_dataframe_t* const dataframe);
2278
2279
/** @} */
2280
2281
/**
2282
 * @defgroup level_5_dataframe_add_ons Dataframe Add-ons
2283
 * @{
2284
 */
2285
2286
/**
2287
 * Turn a ccv_array_t to a dataframe object.
2288
 * @param array The array we want to turn into a dataframe object.
2289
 * @return The new dataframe object.
2290
 */
2291
CCV_WARN_UNUSED(ccv_cnnp_dataframe_t*) ccv_cnnp_dataframe_from_array_new(ccv_array_t* const array);
2292
/**
2293
 * Derive a new column that copies a tensor array from given column to the derived column on GPU.
2294
 * @param dataframe The dataframe object that get the derived column.
2295
 * @param column_idx The original column contains tensor array on CPU.
2296
 * @param tensor_offset Only copy as outputs[i] = inputs[i + tensor_offset].
2297
 * @param tensor_size How many tensors in the tensor array.
2298
 * @param device_id The device we want to copy the tensors to.
2299
 * @return The index of the newly derived column.
2300
 */
2301
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_copy_to_gpu(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const int tensor_offset, const int tensor_size, const int device_id);
2302
/**
2303
 * Derive a new column by executing a generic command.
2304
 * @param dataframe The dataframe object that get the derived column.
2305
 * @param column_idx The original column contains tensor array.
2306
 * @param cmd The command for this operation.
2307
 * @param hint The hint to run the command.
2308
 * @param flags The flags with the command.
2309
 * @param input_offset Use inputs[i + input_offset] to inputs[i + input_offset + input_size - 1] as the inputs
2310
 * @param input_size How many tensors in the input array.
2311
 * @param output_params The parameters for the outputs.
2312
 * @param output_size How many tensors in the output array.
2313
 * @param stream_type The type of stream context we are going to use.
2314
 */
2315
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_cmd_exec(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const int input_offset, const int input_size, const ccv_nnc_tensor_param_t* const output_params, const int output_size, const int stream_type);
2316
/**
2317
 * Add a new column contains some tensors. This will add a new column that each row is the tensor specified
2318
 * as the parameters. It comes handy when you want to have some auxiliary tensors along with each row.
2319
 * @param dataframe The dataframe object that get the new column.
2320
 * @param params The parameters for the tensors.
2321
 * @return The index of the newly added column.
2322
 */
2323
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_add_aux(ccv_cnnp_dataframe_t* const dataframe, const ccv_nnc_tensor_param_t params);
2324
/**
2325
 * Read image off a said column. That column should contain the filename (as char array). The new column
2326
 * will contain the ccv_dense_matrix_t / ccv_nnc_tensor_t (both are toll-free bridging) of the image.
2327
 * @param dataframe The dataframe object that loads the images.
2328
 * @param column_idx The column which contains the filename.
2329
 * @param structof The offset to the filename (as char array) from that column. For example, the column
2330
 *        could be a struct and filename could be one of the field. In that case, you can pass offsetof(S, filename)
2331
 * @return The index of the newly derived column.
2332
 */
2333
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_read_image(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t structof);
2334
/**
2335
 * The structure to describe how to apply random jitter to the image.
2336
 */
2337
typedef struct {
2338
  float contrast; /**< The random contrast, the final contrast will be [1 / (1 + contrast), 1 + contrast] */
2339
  float saturation; /**< The saturation, the final saturation will be [1 / (1 + saturation), 1 + saturation] */
2340
  float brightness; /**< The brightness, the final brightness will be between [1 / (1 + brightness), 1 + brightness] */
2341
  float lighting; /**< AlexNet style PCA based image jitter */
2342
  float aspect_ratio; /**< Stretch aspect ratio between [1 / (1 + asepct_ratio), 1 + aspect_ratio] */
2343
  int symmetric; /**< Apply random flip on x-axis (around y-axis */
2344
  int seed; /**< The seed for random generator. */
2345
  int center_crop; /**< Enable crop to the center (otherwise do random crop). */
2346
  struct {
2347
    int min; /**< The minimal dimension of resize */
2348
    int max; /**< The maximal dimension of resize. The final resize can be computed from min + (max - min) * random_unit */
2349
  } resize;
2350
  struct {
2351
    int rows; /**< The height of the final image. */
2352
    int cols; /**< The width of the final image. */
2353
  } size;
2354
  struct {
2355
    int x; /**< The extra random offset on x-axis. */
2356
    int y; /**< The extra random offset on y-axis. */
2357
  } offset;
2358
  struct {
2359
    float mean[3]; /**< Normalize the image with mean. */
2360
    float std[3];/**< Normalize the image with std. pixel = (pixel - mean) / std */
2361
  } normalize;
2362
} ccv_cnnp_random_jitter_t;
2363
/**
2364
 * Apply random jitter on a image to generate a new image.
2365
 * @param dataframe The dataframe object that contains the original image.
2366
 * @param column_idx The column which contains the original image.
2367
 * @param datatype The final datatype of the image. We only support CCV_32F right now.
2368
 * @param random_jitter The random jitter parameters to be applied to.
2369
 * @return The index of the newly derived column.
2370
 */
2371
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_image_random_jitter(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const int datatype, const ccv_cnnp_random_jitter_t random_jitter);
2372
/**
2373
 * Generate a one-hot tensor off the label from a struct.
2374
 * @param dataframe The dataframe object that contains the label.
2375
 * @param column_idx The column which contains the label (as int).
2376
 * @param structof The offset to the label (as int) from that column. For example, the column
2377
 *        could be a struct and label could be one of the field. You can pass offsetof(S, filename)
2378
 * @param range The range of the label, from [0...range - 1]
2379
 * @param onval The value when it hit.
2380
 * @param offval The value for the others.
2381
 * @param datatype The datatype of the tensor.
2382
 * @param format The format of the tensor.
2383
 * @return The index of the newly derived column.
2384
 */
2385
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_one_hot(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t structof, const int range, const float onval, const float offval, const int datatype, const int format);
2386
/**
2387
 * Batch multiple tensors in a column into one tensor. This method can take multiple columns, which
2388
 * will result a tuple of tensors. Each tensor in the tuple is a batched one from a given column.
2389
 * @param dataframe The dataframe contains the columns of tensors to be batched.
2390
 * @param column_idxs The columns that contain the tensors.
2391
 * @param column_idx_size The number of columns that contain the tensors.
2392
 * @param batch_count How many tensors in one column to be batched together.
2393
 * @param group_count We can generate many groups of batched tensor. For example, if you have column A, B, C, each
2394
 *        have different tensors. If group_count is 1, the result tuple will be (A_b, B_b, C_b). If group count is
2395
 *        2, the result tuple will be (A_b1, B_b1, C_b1, A_b2, B_b2, C_b2). A_b1 etc. will still contain the same
2396
 *        number of batch_count tensors.
2397
 * @param format The result format of the tensor. We support simply transformation NCHW <=> NHWC with the source tensor.
2398
 * @return The newly created dataframe with the 0-th column is the tuple of batched tensors.
2399
 */
2400
CCV_WARN_UNUSED(ccv_cnnp_dataframe_t*) ccv_cnnp_dataframe_batching_new(ccv_cnnp_dataframe_t* const dataframe, const int* const column_idxs, const int column_idx_size, const int batch_count, const int group_count, const int format);
2401
2402
/** @} */
2403
2404
/**
2405
 * @page model Models, layers, and Keras
2406
 *
2407
 * With Keras API in mind, this model implementation essentially is a light-weight way to group neural network layers
2408
 * together. This is a rare case in NNC (or ccv in general) where Object-Oriented programming makes sense. I borrowed
2409
 * heavily from Objective-C / C++ to implement this Object-Oriented interface.
2410
 *
2411
 * Now back to elaboration of the Model interface. It is specifically designed with Keras in mind, asking question:
2412
 * If we are going to build Keras high-level API in any languages (Ruby, Python, Swift, Julia), what's the underlying
2413
 * C interface would look like? Here is your answer (hint: it looks very much like just Python Keras API).
2414
 *
2415
 * A model consists of a set of inputs and outputs. This sounds very much like what "Command" is in Level-1 APIs,
2416
 * however, they are different: a model is stateful. For example, a convolution command takes 3 inputs: image, kernel
2417
 * weight and bias, has 1 output: image. A convolution model takes 1 input: image, and 1 output: image. kernel weight
2418
 * and bias are internal states to the model (in Keras, it is called "layer" for convolution, and model means a set of
2419
 * layers. In NNC, that kind of differentiation feels superficial, therefore, a layer is a model).
2420
 *
2421
 * A model can be combined, and a new model can be a combination of other models.
2422
 *
2423
 * The simpler composed model is the sequential model. A sequential model is a model that consists a sequence of models
2424
 * that contains one input and one output. The output of the earlier model feed into the later one, thus, a sequential
2425
 * evaluation path.
2426
 */
2427
2428
/**
2429
 * @defgroup level_5_model Model API
2430
 * @{
2431
 */
2432
2433
/**
2434
 * model type is an abstract type, you won't interact with a naked model ever.
2435
 */
2436
typedef struct ccv_cnnp_model_s ccv_cnnp_model_t;
2437
/**
2438
 * With this type, now in NNC, we have 4 types that represents a "tensor":
2439
 * ccv_nnc_tensor_t / ccv_nnc_tensor_view_t / ccv_nnc_tensor_multiview_t: a concrete tensor with memory allocated.
2440
 * ccv_nnc_tensor_symbol_t: a symbol representation of a tensor, with its data layout, device affinity, and type
2441
 *                          specified.
2442
 * ccv_nnc_tensor_variable_t: in dynamic graph, this represents a concrete tensor with memory allocated, but also
2443
 *                            associated with a recorded execution.
2444
 * ccv_cnnp_model_io_t: this is the most flexible one. No data layout, device affinity or type specified, the format
2445
 *                      has to be c / h / w, no batch size needed. This is a handle used by model API to associates
2446
 *                      model inputs / outputs.
2447
 */
2448
typedef struct ccv_cnnp_model_io_s* ccv_cnnp_model_io_t;
2449
/**
2450
 * Create a naked input.
2451
 * @return A ccv_cnnp_model_io_t represents an input.
2452
 */
2453
CCV_WARN_UNUSED(ccv_cnnp_model_io_t) ccv_cnnp_input(void);
2454
/**
2455
 * This method mimics Keras callable for model (thus, override __call__ method in Python class).
2456
 * @param model A model that we can apply a set of inputs to get one output.
2457
 * @param inputs The set of inputs.
2458
 * @param input_size The size of inputs array.
2459
 * @return A ccv_cnnp_model_io_t that represents the output of the given model.
2460
 */
2461
CCV_WARN_UNUSED(ccv_cnnp_model_io_t) ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size);
2462
/**
2463
 * This method name is deceiving. It return a composed model, not a naked model.
2464
 * This composed model takes set of inputs, and run through various other models to arrive at
2465
 * the set of outputs.
2466
 * @param inputs The set of inputs.
2467
 * @param input_size The size of inputs array.
2468
 * @param outputs The set of outputs.
2469
 * @param output_size The size of outputs array.
2470
 * @return A composed model that takes inputs, and generate the outputs.
2471
 */
2472
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_model_new(const ccv_cnnp_model_io_t* const inputs, const int input_size, const ccv_cnnp_model_io_t* const outputs, const int output_size);
2473
/**
2474
 * This method returns a sequential model, which composed from a sequence of models.
2475
 * @param models The list of models, that takes one input, and emit one output, feeding into the subsequent one.
2476
 * @param model_size The size of the list.
2477
 * @return A composed model that applies these models one by one in sequence.
2478
 */
2479
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_sequential_new(ccv_cnnp_model_t* const* const models, const int model_size);
2480
/**
2481
 * Prepare the model to be trained, the input specifies the batch size etc.
2482
 * Input size technically is not needed, here is a safety check.
2483
 * @param model The model to be compiled.
2484
 * @param inputs The tensor parameters for the model's inputs, that can be used to derive all tensor shapes.
2485
 * @param input_size The size of the inputs array.
2486
 * @param minimizer The wrapped command that represents a particular optimization strategy.
2487
 * @param loss The wrapped command that computes the loss function.
2488
 */
2489
void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss);
2490
/**
2491
 * Generate output that can be parsed by GraphViz (DOT language).
2492
 * @param model The composed model.
2493
 * @param flags Either CCV_NNC_SHORT_DOT_GRAPH or CCV_NNC_LONG_DOT_GRAPH
2494
 * @param outs The output file streams.
2495
 * @param out_size The size of output file stream array.
2496
 */
2497
void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size);
2498
/**
2499
 * Fit a model to a given input / output. This is a combination of running ccv_cnnp_model_evaluate /
2500
 * ccv_cnnp_model_backward / ccv_cnnp_model_apply_gradients. The difference is that when calling
2501
 * individual functions, the graph is compiled piece by piece, thus, is less efficient than calling
2502
 * ccv_cnnp_model_fit directly. However, having the separate functions makes this implementation much
2503
 * more versatile, for example, can accumulate gradients for multiple batches, or using custom gradients
2504
 * etc.
2505
 * @param model The composed model.
2506
 * @param inputs The input tensors.
2507
 * @param input_size The size of the input tensors array.
2508
 * @param fits The target tensors.
2509
 * @param fit_size The size of the target tensors array.
2510
 * @param outputs The actual outputs from the model.
2511
 * @param output_size The size of the outputs array.
2512
 * @param stream_context The stream where the fit can be executed upon.
2513
 */
2514
void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
2515
2516
/**
2517
 * The parameters for how evaluation should behave.
2518
 */
2519
typedef struct {
2520
  int requires_grad; /**< Whether we need to keep intermediate results for gradient computations. */
2521
  int enable_outgrad; /**< Whether we can compute outflow gradients when call ccv_cnnp_model_backward later. */
2522
  int is_test; /**< Whether we evaluate it as test, or just as forward pass of the training process. */
2523
} ccv_cnnp_evaluate_param_t;
2524
/**
2525
 * Evaluate model with output.
2526
 * @param model The composed model.
2527
 * @param params The parameters for how evaluation should behave.
2528
 * @param inputs The input tensors.
2529
 * @param input_size The size of the input tensors array.
2530
 * @param outputs The actual outputs from the model.
2531
 * @param output_size The size of the outputs array.
2532
 * @param stream_context The stream where the evaluation can be executed upon.
2533
 */
2534
void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
2535
/**
2536
 * Based on the input gradients, compute the output gradients (w.r.t. the inputs). This also adds trainable gradients.
2537
 * @param model The composed model.
2538
 * @param ingrads The input gradients.
2539
 * @param ingrad_size The size of the input gradients array.
2540
 * @param outgrads The output gradients (w.r.t. the inputs).
2541
 * @param outgrad_size The size of the output gradients array.
2542
 * @param stream_context The stream where the gradient computation can be executed upon.
2543
 */
2544
void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_stream_context_t* const stream_context);
2545
/**
2546
 * Apply the computed gradients to the trainable tensors.
2547
 * @param model The composed model.
2548
 * @param stream_context The stream where the gradient computation can be executed upon.
2549
 */
2550
void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context);
2551
enum {
2552
  /**
2553
   * This is the default flag, if the model is not initialized, will attempt to read from the disk.
2554
   * Otherwise, will persist existing parameters to disk.
2555
   */
2556
  CCV_CNNP_MODEL_CHECKPOINT_READ_WRITE,
2557
  /**
2558
   * Only read parameters out of disk, even it is already initialized.
2559
   */
2560
  CCV_CNNP_MODEL_CHECKPOINT_READ_ONLY,
2561
  /**
2562
   * Only write parameters to disk.
2563
   */
2564
  CCV_CNNP_MODEL_CHECKPOINT_WRITE_ONLY,
2565
};
2566
/**
2567
 * This method checkpoint the given model. If the model is initialized, it will persist all parameters
2568
 * to the given file path. If it is not initialized, this method will try to load tensors off the
2569
 * disk.
2570
 * @param model The composed model.
2571
 * @param fn The file name.
2572
 * @param flags Whether we perform read / write on this checkpoint, or read only / write only.
2573
 */
2574
void ccv_cnnp_model_checkpoint(ccv_cnnp_model_t* const model, const char* const fn, const int flags);
2575
/**
2576
 * Apply data parallel to the composed model. This method has to be called before we call either
2577
 * evaluate or fit and after the model is compiled.
2578
 * @param model The composed model.
2579
 * @param parallel Number of devices we want to run on. 0 will use all devices available. 1 will skip.
2580
 */
2581
void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel);
2582
/**
2583
 * This method set the max workspace size. If the graph is already compiled. It will re-run
2584
 * autotune to use the new workspace size to find the best algorithm.
2585
 * @param model The composed model.
2586
 * @param workspace_size The size in bytes that we can use as workspace (scratch memory).
2587
 */
2588
void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size);
2589
/**
2590
 * Simple structure for group of command and the index for the variable.
2591
 */
2592
typedef struct {
2593
  const ccv_nnc_cmd_t cmd;
2594
  const int index; // The input index.
2595
} ccv_cnnp_trainable_index_t;
2596
/**
2597
 * The setter function prototype for ccv_cnnp_model_set_minimizer. This is useful because it helps to
2598
 * set different minimizer parameters for different trainables. The example would be disable weight decay
2599
 * for bias / scale variables. If I expand this idea a bit, I can also support for different trainables,
2600
 * have entirely different minimizer  function. However, I haven't seen anything that can be trained with
2601
 * different minimizer (most likely due to epoch updates learn rate, therefore, it is hard to manipulate
2602
 * proper learn rate if different minimizers are used for different trainables at the same time). If
2603
 * there is a model does that, I can add that (need some thinking though). Because we cannot attach names
2604
 * to it (hmm, in retrospect, we probably should), the way we identify the trainables is to through which
2605
 * node it is used (by the command type), and in which position. Also, it is only interesting if the
2606
 * trainable is the input of some command. Therefore, only show it if it is an input.
2607
 */
2608
typedef ccv_nnc_cmd_t(*ccv_cnnp_model_minimizer_set_f)(const ccv_cnnp_model_t* const model, const ccv_cnnp_trainable_index_t* const indexes, const int index_size, const void* const context);
2609
/**
2610
 * Set a new minimizer for the model. This is useful when you need to update learn rate for stochastic
2611
 * gradient descent for example. This method can be called any time during the training process (after
2612
 * compilation).
2613
 * @param model The composed model.
2614
 * @param minimizer The wrapped command that represents a new optimization strategy.
2615
 * @param minimizer_setter The function to be called to return minimizer for a particular trainable.
2616
 * @param context The context passed to the minimizer setter function.
2617
 */
2618
void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const ccv_cnnp_model_minimizer_set_f minimizer_setter, const void* const context);
2619
/**
2620
 * Get the default stream from a compiled model. If the model is not compiled, the default stream is
2621
 * 0.
2622
 * @param model The composed model.
2623
 * @return The default stream for this model.
2624
 */
2625
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model);
2626
/**
2627
 * Get the allocated memory size (exclude workspace) from a compiled model. If the model is not compiled
2628
 * the size is 0.
2629
 * @param model The composed model.
2630
 * @return The number of bytes for memory allocated.
2631
 */
2632
CCV_WARN_UNUSED(uint64_t) ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model);
2633
/**
2634
 * Free a given model.
2635
 * @param model The composed model.
2636
 */
2637
void ccv_cnnp_model_free(ccv_cnnp_model_t* const model);
2638
2639
enum {
2640
  CCV_CNNP_ACTIVATION_NONE,
2641
  CCV_CNNP_ACTIVATION_RELU,
2642
  CCV_CNNP_ACTIVATION_SOFTMAX,
2643
};
2644
2645
enum {
2646
  CCV_CNNP_NO_NORM,
2647
  CCV_CNNP_BATCH_NORM,
2648
};
2649
2650
typedef struct {
2651
  int no_bias; /**< No bias term. */
2652
  int norm; /**< The normalizations can be applied after activation such as CCV_CNNP_BATCH_NORM. */
2653
  int activation; /**< The activations  can be applied for the output, such as CCV_CNNP_ACTIVATION_RELU or CCV_CNNP_ACTIVATION_SOFTMAX. */
2654
  ccv_nnc_hint_t hint; /**< The hint for a particular operation */
2655
} ccv_cnnp_param_t;
2656
/**
2657
 * Add multiple input tensors together.
2658
 * @return A model that can be applied with multiple inputs, and generate output that is a sum of the inputs.
2659
 */
2660
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_add(void);
2661
/**
2662
 * Concatenate input tensors together.
2663
 * @return A model that can be applied with multiple inputs, and generate output that is a concatenation of the inputs.
2664
 */
2665
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_concat(void);
2666
/**
2667
 * An identity layer that takes input and do nothing pass it as the output. Realistically, we use this
2668
 * because we want to apply some normalization / activation function on top of the input.
2669
 * @param params Parameters (such as hint and activation or norm).
2670
 * @return A model that takes input and pass it as output.
2671
 */
2672
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_identity(const ccv_cnnp_param_t params);
2673
/**
2674
 * A convolution model.
2675
 * @param groups The number of kernel groups in the model.
2676
 * @param filters The total number of filters in the model (filters = groups * per group filters).
2677
 * @param kdim The dimensions of the kernel.
2678
 * @param params Other parameters (such as hint and activation or norm).
2679
 * @return A convolution model.
2680
 */
2681
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_convolution(const int groups, const int filters, const int kdim[CCV_NNC_MAX_DIM_ALLOC], const ccv_cnnp_param_t params);
2682
/**
2683
 * A dense layer model.
2684
 * @param count The output dimension.
2685
 * @param params Other parameters (such as hint and activation or norm).
2686
 * @return A dense layer model.
2687
 */
2688
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_dense(const int count, const ccv_cnnp_param_t params);
2689
/**
2690
 * A max pool model.
2691
 * @param kdim The pooling window dimension.
2692
 * @param params Other parameters (such as hint and activation or norm).
2693
 * @return A max pool model.
2694
 */
2695
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_max_pool(const int kdim[CCV_NNC_MAX_DIM_ALLOC], const ccv_cnnp_param_t params);
2696
/**
2697
 * An average pool model.
2698
 * @param kdim The pooling window dimension.
2699
 * @param params Other parameters (such as hint and activation or norm).
2700
 * @return An average pool model.
2701
 */
2702
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_average_pool(const int kdim[CCV_NNC_MAX_DIM_ALLOC], const ccv_cnnp_param_t params);
2703
/**
2704
 * Reshape an input into a different dimension.
2705
 * @param dim The new dimension for the input.
2706
 * @return A reshape layer model.
2707
 */
2708
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_reshape(const int dim[CCV_NNC_MAX_DIM_ALLOC]);
2709
/**
2710
 * Flatten an input tensor into a one dimensional array.
2711
 * @return A flatten layer model.
2712
 */
2713
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_flatten(void);
2714
2715
enum {
2716
  CCV_CNNP_IO, /**< The parameter is a ccv_cnnp_io_t. */
2717
  CCV_CNNP_NO_TENSOR, /**< The parameter is not used. */
2718
  CCV_CNNP_INIT_SHARED_TENSOR, /**< The parameter is a provided tensor for initialization. */
2719
  CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, /**< The parameter is a provided tensor that can be updated. */
2720
};
2721
2722
typedef struct {
2723
  int type; /**< The type of the parameter, could be CCV_CNNP_IO, NO_TENSOR, INIT_SHARED_TENSOR, or INIT_SHARED_TENSOR_TRAINABLE */
2724
  ccv_nnc_tensor_t* tensor; /**< The tensor that is going to be used for initialization. */
2725
} ccv_cnnp_tensor_param_t;
2726
/**
2727
 * A generic model based on the command. If the tensors are labeled as ccv_cnnp_io_t, it will participate
2728
 * as the input / output of the model. If it is a init tensor, the model will use this tensor for that parameter.
2729
 * More over, if it is marked as trainable, that tensor will be differentiated against when you call
2730
 * ccv_cnnp_model_fit. This model however doesn't take over ownership of the tensor. You should manage the life
2731
 * cycle of the given tensor and it is your responsibility to make sure they outlive the model. Also, all inputs and
2732
 * outputs marked as init tensors will be shared if you reuse this model in other places.
2733
 * @param cmd The command to generate this model.
2734
 * @param hint The hint to run the command.
2735
 * @param flags The flags with the command.
2736
 * @param inputs A list of ccv_cnnp_tensor_param_t identify each input as either a init tensor or a ccv_cnnp_io_t.
2737
 * @param input_size The size of input list.
2738
 * @param outputs A list of types identify each output as ccv_cnnp_io_t or a none tensor.
2739
 * @param output_size The size of the outputs. There is no need to give ccv_cnnp_tensor_param_t for outputs because
2740
 *        all of them are CCV_CNNP_IO type.
2741
 * @return A model based on the given command.
2742
 */
2743
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_cmd_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_cnnp_tensor_param_t* const inputs, const int input_size, const int* const outputs, const int output_size);
2744
2745
typedef struct {
2746
  ccv_nnc_tensor_symbol_t symbol; /**< The tensor symbol this is reference to. */
2747
  int type; /**< The type of the parameter, could be CCV_CNNP_IO, INIT_SHARED_TENSOR, or INIT_SHARED_TENSOR_TRAINABLE */
2748
  ccv_nnc_tensor_t* tensor; /**< The tensor that is going to be used for initialization. */
2749
} ccv_cnnp_tensor_symbol_param_t;
2750
/**
2751
 * A generic model based on the symbolic graph we provided. A list of tensor symbols are labeled whether it
2752
 * is ccv_cnnp_io_t or not (we identify whether this is a input or output based on whether it is in the graph).
2753
 * If it is not, we init it with a given tensor. If it is marked as trainable, that tensor will be differentiated
2754
 * against when you call ccv_cnnp_model_fit. The model doesn't take ownership over the init tensors. You are
2755
 * responsible to make sure the init tensors outlive the model until the initialization occurred. Also, these
2756
 * tensors will be shared if the model is reused.
2757
 * @param graph The symbolic graph that is our blue print for this model.
2758
 * @param tensor_symbol_params The list of tensor symbol parameters that labels a given symbol.
2759
 * @param tensor_symbol_param_size The size of the list.
2760
 * @param inputs The inputs to this graph. We can figure out which ones are inputs, but this gives us the order.
2761
 * @param input_size The size of the input list.
2762
 * @param outputs The outputs from this graph. We can figure out which ones are outputs, but this gives us the order.
2763
 * @param output_size The size of the output list.
2764
 * @return A model based on the given symbolic graph.
2765
 */
2766
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_graph(const ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_tensor_symbol_param_t* const tensor_symbol_params, const int tensor_symbol_param_size, ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size);
2767
2768
/** @} */
2769
2770
/** @} */
2771
2772
#endif