Coverage Report

Created: 2026-04-14 19:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc.h
Line
Count
Source
1
/**********************************************************
2
 * C-based/Cached/Core Computer Vision Library
3
 * Liu Liu, 2010-02-01
4
 **********************************************************/
5
6
/**********************************************************
7
 * CCV - Neural Network Collection
8
 **********************************************************/
9
10
#ifndef GUARD_ccv_nnc_h
11
#define GUARD_ccv_nnc_h
12
13
#include "ccv.h"
14
#include <stddef.h>
15
16
// These are generated by cmd/build-cmd.rb
17
#include "cmd/ccv_nnc_cmd.h"
18
#include "cmd/ccv_nnc_backend.h"
19
20
/**
21
 * @defgroup level_0 Level-0 API
22
 * @{
23
 */
24
25
/**
26
 * Initialize the library.
27
 */
28
void ccv_nnc_init(void);
29
30
enum {
31
  CCV_NNC_DISABLE_MIXED_MPS_GEMM = 0x1,
32
  CCV_NNC_DISABLE_MIXED_MPS_SOFTMAX = 0x2,
33
  CCV_NNC_DISABLE_MMAP_MTL_BUFFER = 0x4,
34
  CCV_NNC_DISABLE_MFA = 0x8,
35
  CCV_NNC_DISABLE_MFA_GEMM = 0x10,
36
  CCV_NNC_DISABLE_MFA_ATTENTION = 0x20,
37
  CCV_NNC_DISABLE_MFA_NEURAL_ACCELERATORS = 0x40,
38
  CCV_NNC_DISABLE_MFA_ANE = 0x80,
39
};
40
/**
41
 * Enable system-wide specific flag.
42
 */
43
void ccv_nnc_enable_flag(uint64_t flag);
44
/**
45
 * Disable system-wide specific flag.
46
 */
47
void ccv_nnc_disable_flag(uint64_t flag);
48
/**
49
 * Get system-wide specific flag to check.
50
 */
51
uint64_t ccv_nnc_flags(void);
52
53
/** @} */
54
55
/**
56
 * @defgroup level_1 Level-1 API
57
 * @{
58
 */
59
60
/**
61
 * @defgroup level_1_cmd Commands
62
 * @{
63
 */
64
enum {
65
  // Attributes that enable symbolic graph simplification
66
  CCV_NNC_CMD_ATTR_PASSTHROUGH  = 0x01, /**< This doesn't compute anything, but pass the first n tensors to the output (useful for backprop that is identical). */
67
  CCV_NNC_CMD_ATTR_OUTPUT_ONES  = 0x02, /**< All the output tensors are 1s (unit). */
68
  CCV_NNC_CMD_ATTR_NULL_IS_ONES = 0x04, /**< Accept nullptr input as if these are tensors with 1s (unit). */
69
};
70
71
// Flags pass into cmd when executing.
72
enum {
73
  CCV_NNC_ACCUMULATE_OUTPUT = 0x01, /**< Enable accumulate outputs (unsupported). */
74
  CCV_NNC_ZERO_MEMORY_ALLOC = 0x02, /**< Don't allocate any extra memory for this operation. */
75
};
76
77
enum {
78
  CCV_NNC_EXEC_SUCCESS   = 0, /**< Successfully executed the command. */
79
  CCV_NNC_EXEC_INVALID   = -1, /**< Invalid inputs. */
80
  CCV_NNC_EXEC_NO_KERNEL = -2, /**< No kernel available for a given command / backend. */
81
  CCV_NNC_EXEC_OOM       = -3, /**< Out of memory error. */
82
};
83
84
enum {
85
  CCV_NNC_MSE_REDUCE_MEAN = 0, /**< Reduce with mean when computing MSE loss. */
86
  CCV_NNC_MSE_REDUCE_SUM = 1, /**< Reduce with sum when computing MSE loss. */
87
};
88
89
enum {
90
  CCV_NNC_HISTOGRAM_EVEN = 0, /**< The bins are evenly distributed from min to max. */
91
  CCV_NNC_HISTOGRAM_LOGARITHMIC = 1, /**< The bins are distributed follow exponentially curve, growing from min to max with ratio. */
92
  CCV_NNC_HISTOGRAM_BINS = 2, /**< The bins range will be supplied, such as [0, 2, 3, 10]. For result, [-inf, 0, 2, 3, 10, inf] implied. */
93
};
94
95
enum {
96
  CCV_NNC_UPSAMPLE_NEAREST = 0, /**< Using nearest value. */
97
  CCV_NNC_UPSAMPLE_BILINEAR = 1, /**< Using bilinear interpolation. */
98
};
99
100
enum {
101
  CCV_NNC_PAD_ZERO = 0, /**< Pad 0s. */
102
  CCV_NNC_PAD_REPLICATE = 1, /**< Pad by replicating the edge, (a, b) to (a, a, b). */
103
  CCV_NNC_PAD_REFLECT = 2, /**< Pad by reflecting the edge, (a, b) to (b, a, b). */
104
};
105
106
enum {
107
  CCV_NNC_GEMM_32F = 0x1, /**< For GEMM (or similar op), whether prefer to use FP32 for accumulator. */
108
  CCV_NNC_GEMM_32TF = 0x2, /**< For GEMM (or similar op), whether prefer to use TF32 for accumulator. */
109
  CCV_NNC_GEMM_16F = 0x4, /**< For GEMM (or similar op), whether prefer to use FP16 for accumulator. */
110
  CCV_NNC_GEMM_8I = 0x8, /**< For GEMM (or similar op), whether prefer to use INT8 inputs / accumulators when supported. */
111
};
112
113
/**
114
 * Parameters for command.
115
 */
116
typedef struct {
117
  struct {
118
    int dim[CCV_NNC_MAX_DIM_ALLOC]; /**< [size.dim] The window size for the layer. For full connect layer, it is 1 because it is 1x1 convolutional layer with count of filters */
119
  } size;
120
  union {
121
    struct {
122
      int count; /**< [convolution.count] The number of filters for convolutional layer. */
123
      int groups; /**< [convolution.groups] The number of groups for convolutional layer. */
124
      int dilation[CCV_NNC_MAX_DIM_ALLOC]; /**< [convolution.dilation[]] The dilation factor for convolutional layer. Default to 1. */
125
    } convolution;
126
    struct {
127
      int count; /**< [convolution_transpose.count] The number of filters for convolutional layer. */
128
      int groups; /**< [convolution_transpose.groups] The number of groups for convolutional layer. */
129
      int dilation[CCV_NNC_MAX_DIM_ALLOC]; /**< [convolution_transpose.dilation[]] The dilation factor for convolutional layer. Default to 1. */
130
      int output_padding; /**< [convolution_transpose.output_padding] The output padding to resolve ambiguity when treat this as inverse of convolution. */
131
    } convolution_transpose;
132
    struct {
133
      int hidden_size; /**< [rnn.hidden_size] The number of features in the hidden state h. */
134
      int proj_size; /**< [rnn.proj_size] The number of features in the hidden state h. */
135
      int num_layers; /**< [rnn.num_layers] The number of layers for RNN. */
136
      int bias; /**< [rnn.bias] If 0, the layer won't use bias weights. */
137
      int batch_first; /**< [rnn.batch_first] If 1, will batch before sequence. */
138
      int bidirectional; /**< [rnn.bidrectional] Enable bidirectional mode of RNN.*/
139
      float dropout; /**< [rnn.dropout] If non-zero, enable dropout at each layer of RNN.*/
140
      int is_test; /**< [rnn.is_test] Whether running this kernel in test mode or not. */
141
    } rnn;
142
    struct {
143
      int reserved; /**< [pool.reserved] A reserved field. */
144
    } pool;
145
    struct {
146
      float kappa; /**< [rnorm.kappa] As of b[i] = a[i] / (rnorm.kappa + rnorm.alpha * sum(a, i - rnorm.size / 2, i + rnorm.size / 2)) ^ rnorm.beta */
147
      float alpha; /**< [rnorm.alpha] See **rnorm.kappa**. */
148
      float beta; /**< [rnorm.beta] See **rnorm.kappa**. */
149
    } rnorm;
150
    struct {
151
      int axis[CCV_NNC_MAX_DIM_ALLOC]; /**< [bnorm.axis[]] The axis selected to compute mean / variance. */
152
      int count; /**< [bnorm.count] The number of axis selected. */
153
      float epsilon; /**< [bnorm.epsilon] The epsilon for standard derivation. */
154
      int is_test; /**< [bnorm.is_test] Whether in test mode. */
155
      float momentum; /**< [bnorm.momentum] running_mean = running_mean * momentum + mean * (1 - momentum). */
156
    } bnorm;
157
    struct {
158
      int axis[CCV_NNC_MAX_DIM_ALLOC]; /**< [lnorm.axis[]] The axis selected to compute mean / variance. */
159
      int count; /**< [lnorm.count] The number of axis selected. */
160
      float epsilon; /**< [lnorm.epsilon] The epsilon for standard derivation. */
161
      int elementwise_affine; /**< [lnorm.elementwise_affine] Whether it supports scale / bias. */
162
    } lnorm;
163
    struct {
164
      int group_axis; /**< [gnorm.group_axis] The axis selected to be grouped. */
165
      int reduce_axis[CCV_NNC_MAX_DIM_ALLOC]; /**< [gnorm.reduce_axis[]] The other axis selected to compute mean / variance. */
166
      int reduce_count; /**< [gnorm.reduce_count] The number of other axis selected. */
167
      int groups; /**< [gnorm.group] The number of groups that separates channels. */
168
      float epsilon; /**< [gnorm.epsilon] The epsilon for standard derivation. */
169
      int elementwise_affine; /**< [lnorm.elementwise_affine] Whether it supports scale / bias. */
170
    } gnorm;
171
    struct {
172
      int axis[CCV_NNC_MAX_DIM_ALLOC]; /**< [rmsnorm.axis[]] The axis selected to compute mean / variance. */
173
      int count; /**< [rmsnorm.count] The number of axis selected. */
174
      float epsilon; /**< [rmsnorm.epsilon] The epsilon for standard derivation. */
175
      int elementwise_affine; /**< [rmsnorm.elementwise_affine] Whether it supports scale. */
176
    } rmsnorm;
177
    struct {
178
      int nesterov; /**< [sgd.nesterov] Nesterov accelerated gradient. */
179
      float rate; /**< [sgd.rate] The learning rate. */
180
      float scale; /**< [sgd.scale] The scale to be applied to the gradient before doing any minimization. */
181
      float decay; /**< [sgd.decay] This is the weight decay parameter, which represents L2 regularization after momentum applied. */
182
      float momentum; /**< [sgd.momentum] For SGD, this follows http://www.cs.toronto.edu/%7Ehinton/absps/momentum.pdf. */
183
      float dampening; /**< [sgd.dampening] This usually == momentum, however, it can be changed. */
184
    } sgd;
185
    struct {
186
      int step; /**< [adam.step] Step t in adam optimizer. */
187
      float rate; /**< [adam.rate] The learning rate. */
188
      float scale; /**< [adam.scale] The scale to be applied to the gradient before doing any minimization. */
189
      float beta1; /**< [adam.beta1] The beta1 hyper-parameter in adam optimizer. */
190
      float beta2; /**< [adam.beta2] The beta2 hyper-parameter in adam optimizer. */
191
      float decay; /**< [adam.decay] This is the weight decay parameter, which represents L2 regularization. */
192
      float epsilon; /**< [adam.epsilon] The epsilon for standard derivation. */
193
      int amsgrad; /**< [adam.amsgrad] Whether use amsgrad. */
194
    } adam;
195
    struct {
196
      int step; /**< [lamb.step] Step t in lamb optimizer. */
197
      float rate; /**< [lamb.rate] The learning rate. */
198
      float scale; /**< [lamb.scale] The scale to be applied to the gradient before doing any minimization. */
199
      float beta1; /**< [lamb.beta1] The beta1 hyper-parameter in lamb optimizer. */
200
      float beta2; /**< [lamb.beta2] The beta2 hyper-parameter in lamb optimizer. */
201
      float decay; /**< [lamb.decay] This is the weight decay parameter, which represents L2 regularization. */
202
      float epsilon; /**< [lamb.epsilon] The epsilon for standard derivation. */
203
    } lamb;
204
    struct {
205
      float rate; /**< [rmsprop.rate] The learning rate. */
206
      float scale; /**< [rmsprop.scale] The scale to be applied to the gradient before doing any minimization. */
207
      float decay; /**< [rmsprop.decay] This is the weight decay parameter, which represents L2 regularization after momentum applied. */
208
      float alpha; /**< [rmsprop.momentum] The alpha hyper-parameter. */
209
      float momentum; /**< [rmsprop.momentum] The momentum hyper-parameter. */
210
      float epsilon; /**< [rmsprop.epsilon] The epsilon for standard derivation. */
211
    } rmsprop;
212
    struct {
213
      int transpose_a[2]; /**< [blas.transpose_a[2]] The axis we'd like to transpose for input a. */
214
      int transpose_b[2]; /**< [blas.transpose_b[2]] The axis we'd like to transpose for input b. */
215
      float a[3]; /**< [blas.a[3]] BLAS scalars. */
216
      int flags; /**< [blas.flags] Auxiliary flags to enable certain features for BLAS operation. */
217
    } blas;
218
    struct {
219
      float trim0; /**< [label_smoothing.trim0] The smoothed label for 0. */
220
      float trim1; /**< [label_smoothing.trim1] The smoothed label for 1. */
221
    } label_smoothing;
222
    struct {
223
      float pos_weight; /**< [binary_crossentropy.pos_weight] The pos_weight on the loss: -(pos_weight * y * log(x) + (1 - y) * log(1 - x)) */
224
    } binary_crossentropy;
225
    struct {
226
      float beta; /**< [smooth_l1.beta] The beta on the smooth L1 loss (or Huber loss) */
227
    } smooth_l1;
228
    struct {
229
      int reduce_op; /**< [mse.reduce_op] Whether reduce with mean or with sum */
230
    } mse;
231
    struct {
232
      int tanh; /**< [gelu.tanh] Use tanh approximation */
233
    } gelu;
234
    struct {
235
      int axis[CCV_NNC_MAX_DIM_ALLOC]; /**< [reduce.axis[]] The axis selected to reduce. */
236
      int count; /**< [reduce.count] The number of axis selected. */
237
    } reduce;
238
    struct {
239
      int axis[2]; /**< [transpose.axis[2]] The axis we'd like to transpose for input. */
240
    } transpose;
241
    struct {
242
      float p; /**< [dropout.p] Dropout probability. */
243
      int entirety; /**< [dropout.entirety] Drop the whole layer with the given probability. */
244
    } dropout;
245
    struct {
246
      int type; /**< [upsample.type] 0 - nearest, 1 - bilinear. */
247
      float width_scale; /**< [upsample.width_scale] scale for width parameter. It is between 1 and 2 at the moment. */
248
      float height_scale; /**< [upsample.height_scale] scale for height parameter. It is between 1 and 2 at the moment. */
249
      int align_corners; /**< [upsample.align_corners] Whether to scale to align corners. Thus, for 0...1, if false, it will align to -0.25, 0.25, 0.75, 1.25, if true, it will align to 0, 0.3333, 0.6666, 1.0 */
250
    } upsample;
251
    struct {
252
      int align_corners;
253
    } grid_sample;
254
    struct {
255
      float min; /**< [clamp.min] The minimum, NaN is no min. */
256
      float max; /**< [clamp.max] The maximum, NaN is no max. */
257
    } clamp;
258
    struct {
259
      float iou_threshold; /**< [nms.iou_threshold] Threshold between 0 to 1 for IoU threshold. */
260
    } nms;
261
    struct {
262
      int type; /**< [histogram.type] The type, can be even, logarithmic, or bins. */
263
      int bins; /**< [histogram.bins] The number of bins, only applied to even. */
264
      float min; /**< [histogram.min] The minimal number, for even or logarithmic. */
265
      float max; /**< [histogram.min] The maximal number, for even or logarithmic. */
266
      float rate; /**< [histogram.ratio] The rate from min to max, only applied to logarithmic. */
267
    } histogram;
268
    struct {
269
      float negative_slope; /**< [leaky_relu.negative_slop] The negative slope to be applied when activation < 0. */
270
    } leaky_relu;
271
    struct {
272
      float beta; /**< [swish.beta] The beta parameter in swish: x * sigmoid(beta * x). */
273
    } swish;
274
    struct {
275
      float exponent; /**< [pow.exponent] The exponent in y = x ^ exponent. */
276
    } pow;
277
    struct {
278
      float scale; /**< [scaled_dot_product_attention.scale] The scale we multiple to the dot product of Q & K */
279
      int is_causal; /**< [scaled_dot_product_attention.is_causal] Whether we have causal matrix associated with the attention. The attention mask will be cut to triangular if provided. */
280
      int flags; /**< [scaled_dot_product_attention.flags] Which precision is preferred for accumulator, FP16 or FP32, and whether to opt into quantized attention on supported backends. */
281
      int deterministic; /**< [scaled_dot_product_attention.deterministic] Whether we want the attention computation to be deterministic (CUDA only). */
282
    } scaled_dot_product_attention;
283
    struct {
284
      int type; /**< [pad.type] The type of pad, can be either zeros or replicating edge. */
285
      int end[CCV_NNC_MAX_DIM_ALLOC]; /**< [pad.end] Work together with size.dim. size.dim is how much to add at the beginning and pad.end is how much to add at the end. */
286
    } pad;
287
    struct {
288
      int along_axis; /**< [sort.along_axis] Which axis to sort along with. */
289
      int descending; /**< [sort.descending] Whether sorting by descending order. */
290
    } sort;
291
    struct {
292
      int kth; /**< [partition.kth] How many items to retain after partition. */
293
      int along_axis; /**< [partition.along_axis] Which axis to partition along with. */
294
      int descending; /**< [partition.descending] Whether partitioning by descending order. */
295
    } partition;
296
    struct {
297
      int bincount; /**< [unique_consecutive.bincount] Potentially how many unique items there will be, 0 if unknown. */
298
    } unique_consecutive;
299
    struct {
300
      int bincount; /**< [scatter_add.bincount] Potentially how many unique items there will be, 0 if unknown. */
301
    } scatter_add;
302
    void* userdata;
303
  };
304
} ccv_nnc_cmd_param_t;
305
306
/*
307
 * Hints for command.
308
 */
309
typedef struct {
310
  struct {
311
    int dim[CCV_NNC_MAX_DIM_ALLOC]; /**< Stride for each dimension. */
312
  } stride;
313
  struct {
314
    int begin[CCV_NNC_MAX_DIM_ALLOC]; /**< Padding at the beginning of a dimension. */
315
    int end[CCV_NNC_MAX_DIM_ALLOC]; /**< Padding at the end of a dimension. */
316
  } border;
317
} ccv_nnc_hint_t;
318
319
/**
320
 * Opaque pointer to a stream object.
321
 */
322
typedef struct ccv_nnc_stream_context_s ccv_nnc_stream_context_t;
323
324
typedef struct ccv_nnc_cmd_vtab_s ccv_nnc_cmd_vtab_t;
325
326
typedef struct ccv_nnc_cmd_s {
327
  uint32_t cmd; /**< The identifier for command. */
328
  uint32_t backend; /**< The identifier for backend. */
329
  int algorithm; /**< The algorithm selector (as defined by backend). */
330
  ccv_nnc_cmd_param_t info; /**< The command parameters. */
331
  /**
332
   * This is for type CCV_NNC_CUSTOM_FORWARD / CCV_NNC_CUSTOM_BACKWARD
333
   */
334
  ccv_nnc_cmd_vtab_t* isa;
335
  void* data;
336
} ccv_nnc_cmd_t;
337
338
/**
339
 * For forward functions, the input tensors and output tensors can be arbitrary.
340
 * However, for backward functions (backpropagation, or gradient functions in other libs),
341
 * the input is: 0~m-1: gradient for output tensors, 1~n: input tensors for forward functions, n+1~n+m: output tensors for forward functions,
342
 * the output is: 0~n-1: output gradients w.r.t. input tensors.
343
 * Which input / output tensors can be ignored can be specified in the cmd config structs.
344
 */
345
typedef int(*ccv_nnc_cmd_exec_f)(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
346
347
/**
348
 * The function prototype for autotune. The only difference is the max_workspace_size.
349
 * Whoever implement this function prototype means we handled over autotune task to the
350
 * command itself, you are responsible to select the best algorithm.
351
 * @return The selected algorithm.
352
 */
353
typedef int(*ccv_nnc_cmd_autotune_f)(const ccv_nnc_cmd_t cmd, const size_t max_workspace_size, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
354
355
/**
356
 * The function prototype is for automatically deduce tensor shapes.
357
 */
358
359
typedef struct ccv_nnc_cmd_vtab_s {
360
  ccv_nnc_cmd_exec_f exec;
361
  void (*tensor_auto)(const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size);
362
} ccv_nnc_cmd_vtab_t;
363
364
/** @} */
365
366
/**
367
 * @defgroup level_1_uops Micro Commands to Define Commands
368
 * @{
369
 */
370
371
/**
372
 * @page micro_jittor The concept of meta-ops in Jittor is amazing
373
 *
374
 * NNC will never do JIT. Particularly, I will never do codegen and compile at runtime, especially with static shapes.
375
 * The reason is pretty simple. JIT would be too much architectural dependent and with that, almost impossible for NNC
376
 * to be this small embeddable library that you can carry everywhere. However, this shouldn't prevent NNC to generate
377
 * proper descriptions of each command so a JIT version can be built if there are architectural support for it. In this
378
 * way, the core of NNC can be small and embeddable, but a new backend (identified by the backend attribute) can implement
379
 * more sophisticated JIT mechanism.
380
 *
381
 * More over, I need to generate some code for reference implementations, ideally from some descriptions. This is important
382
 * because with 90+ ops, having a correctly implemented command turns out to be more challenging than I expected.
383
 * Especially if I want them to be compliant with the metadata describes it (what shape it accepts, what datatype works,
384
 * whether it can accept tensor views, and how in-place tensors supported). Many of reference commands are not supporting
385
 * all datatypes and tensor views, and this has to be rectified because these are "reference commands", they must be.
386
 *
387
 * Jittor introduced to the world the idea of meta-ops. Basically, it claims every ops (or macro ops) can be break down to
388
 * 3 types of micro ops (they call them meta-ops): a reindex op that can map tensor from one dimensionality to another, an
389
 * element-wise op that does element-wise primitive math, and finally, a reduce op that can reduce along particular axis
390
 * of a tensor with some elementary math. This feels rather limited initially, but when thinking through it, I am convinced
391
 * it should be enough to describe all commands presented in NNC (this shouldn't be a surprise actually).
392
 *
393
 * Thus, the plan now is to use the meta-ops idea, implementing new micro commands that can describe other commands in
394
 * NNC. In this way, I can generate reference implementation from these descriptions and hopefully have better coverage
395
 * than my existing CPU / GPU reference implementations.
396
 *
397
 * To build on-top what Jittor did, if you need to have my dynamism in the ops, it is essential to index with the provided
398
 * tensor. With just reindex, binary operands and reduce, you cannot do that. Thus, on top of these 3, we added the 4th
399
 * micro op (meta-op) that is "select". This will be sufficient to implement ops such as masking.
400
 *
401
 */
402
403
/**
404
 * Abstract vtab for different ccv_nnc_micro_io_t.
405
 */
406
typedef struct ccv_nnc_micro_io_vtab_s ccv_nnc_micro_io_vtab_t;
407
408
enum {
409
  // These could be much more unary ops.
410
  CCV_NNC_MICRO_UNARY_OP_NEG,
411
  CCV_NNC_MICRO_UNARY_OP_LOG,
412
  CCV_NNC_MICRO_UNARY_OP_EXP,
413
};
414
415
enum {
416
  CCV_NNC_MICRO_BINARY_OP_PLUS,
417
  CCV_NNC_MICRO_BINARY_OP_MINUS,
418
  CCV_NNC_MICRO_BINARY_OP_MUL,
419
  CCV_NNC_MICRO_BINARY_OP_DIV,
420
  CCV_NNC_MICRO_BINARY_OP_MAX,
421
  CCV_NNC_MICRO_BINARY_OP_MIN,
422
  CCV_NNC_MICRO_BINARY_OP_EQUAL_TO,
423
  CCV_NNC_MICRO_BINARY_OP_LESS_THAN,
424
};
425
426
enum {
427
  CCV_NNC_MICRO_REDUCE_OP_MAX,
428
  CCV_NNC_MICRO_REDUCE_OP_MIN,
429
  CCV_NNC_MICRO_REDUCE_OP_ARGMAX,
430
  CCV_NNC_MICRO_REDUCE_OP_ARGMIN,
431
  CCV_NNC_MICRO_REDUCE_OP_MEAN, // Mean is complicated, we need a way to compute total for loops after this. It has to be done statically, and that is "interesting".
432
  CCV_NNC_MICRO_REDUCE_OP_SUM,
433
  CCV_NNC_MICRO_REDUCE_OP_PROD,
434
};
435
436
/**
437
 * Abstract micro op representation.
438
 */
439
typedef struct ccv_nnc_micro_io_s* ccv_nnc_micro_io_t;
440
441
struct ccv_nnc_micro_io_s {
442
  const ccv_nnc_micro_io_vtab_t* isa;
443
  ccv_nnc_micro_io_t* inputs;
444
  int input_size;
445
  int dimensions;
446
  int id;
447
};
448
449
typedef struct {
450
  // Type of the scalar is about precision, nothing to restrict the tensor's type. For example, we may assign a int32_t 0
451
  // to a float16 tensor element, this is perfectly fine.
452
  int type;
453
  union {
454
    unsigned char u8;
455
    int i32;
456
    ccv_float16_t f16;
457
    float f32;
458
    int64_t i64;
459
    uint64_t u64;
460
    double f64;
461
  };
462
} ccv_nnc_micro_scalar_t;
463
464
/**
465
 * Create a free-form input that represent a tensor.
466
 * @param dimensions The maximum dimension of the input.
467
 */
468
CCV_WARN_UNUSED(ccv_nnc_micro_io_t) ccv_nnc_micro_input(const int dimensions);
469
/**
470
 * Use shape and reindex expression to reindex the given tensor into a different shape.
471
 * The expressions can bind integer parameters which starts with $.
472
 *
473
 * The expression follows specific pattern, integer parameters starts with $. Dimensions are represented as dXn, such
474
 * as dA0, dA1, dA2 ... Index into the provided tensor can be represented as i0, i1, i2. These are all 0-indexed.
475
 *
476
 * Constants are supported, such as 235, 431 etc. Operators supported currently are -, +, /, *.
477
 *
478
 * Thus, for broadcast a tensor x[w, h] to y[w, h, h], it can be represented as:
479
 * shape: { "dA0", "dA1", "dA1" }, reindex: { "i0", "i1", "0" }.
480
 * For example, transpose can be represented as:
481
 * shape: { "dA1", "dA0" }, reindex: { "i1", "i0" }
482
 *
483
 * @param shape The shape expressions per axis.
484
 * @param shape_count The dimensions of the output.
485
 * @param ss The tensors to reference shape dimensions.
486
 * @param s_count The number of tensors to reference shape dimensions.
487
 * @param reindex The reindex expressions per axis.
488
 * @param reindex_count The dimensions of the input.
489
 * @param x The input for reindex operation.
490
 * @return The reindexed tensor.
491
 */
492
CCV_WARN_UNUSED(ccv_nnc_micro_io_t) ccv_nnc_micro_reindex(const char* const* const shape, const int shape_count, const ccv_nnc_micro_io_t* const ss, const int s_count, const char* const* const reindex, const int reindex_count, const ccv_nnc_micro_io_t x);
493
/**
494
 * Apply element-wise computations with one tensor.
495
 * @param op The binary operand.
496
 * @param x The input.
497
 * @return The result tensor.
498
 */
499
CCV_WARN_UNUSED(ccv_nnc_micro_io_t) ccv_nnc_micro_unary(const uint32_t op, const ccv_nnc_micro_io_t x);
500
/**
501
 * Apply pair-wise computations with two tensors. They has to match shape exactly.
502
 * @param op The binary operand.
503
 * @param left The left input.
504
 * @param right The right input.
505
 * @return The result tensor.
506
 */
507
CCV_WARN_UNUSED(ccv_nnc_micro_io_t) ccv_nnc_micro_binary(const uint32_t op, const ccv_nnc_micro_io_t left, const ccv_nnc_micro_io_t right);
508
/**
509
 * Apply reduction computation against some dimensions and generate the final reduced tensor.
510
 * @param op The reduction operand.
511
 * @param axis The axis to reduce.
512
 * @param axis_count Number of axes.
513
 * @param x The input tensor.
514
 * @return The result tensor after reduction.
515
 */
516
CCV_WARN_UNUSED(ccv_nnc_micro_io_t) ccv_nnc_micro_reduce(const uint8_t op, const int* const axis, const int axis_count, const ccv_nnc_micro_io_t x);
517
/**
518
 * Use the index tensor to select one value from the x per axis.
519
 * @param axis The axis to select.
520
 * @param x The tensor to be indexed.
521
 * @param index The integer tensor of indexes.
522
 * @return The result tensor with values selected from x with index from index tensor.
523
 */
524
CCV_WARN_UNUSED(ccv_nnc_micro_io_t) ccv_nnc_micro_select(const int axis, const ccv_nnc_micro_io_t x, const ccv_nnc_micro_io_t index);
525
/**
526
 * Return the gradient for a particular output. For example, if x is ccv_nnc_micro_unary(exp, input),
527
 * this represents the gradient of x, not the input. This method is used to generate representation
528
 * of gradients for ccv_nnc_micro_combine_new method.
529
 * @param x The tensor to take a gradient of.
530
 * @return The result tensor that represents the gradient of x.
531
 */
532
CCV_WARN_UNUSED(ccv_nnc_micro_io_t) ccv_nnc_micro_grad(const ccv_nnc_micro_io_t x);
533
/**
534
 * The combined op from micro ops.
535
 */
536
typedef struct ccv_nnc_micro_combine_s ccv_nnc_micro_combine_t;
537
/**
538
 * Combine micro ops into one, and do some optimization passes. The combined one can be then processed to generate
539
 * optimized kernels. Particularly, we can processed the combined one into C code and CUDA code as reference
540
 * implementations.
541
 * @param inputs The inputs for the combined ops.
542
 * @param input_size The number of the inputs.
543
 * @param parameters The name of the parameters, this determines the order of the these parameters.
544
 * @param parameter_size The number of parameters.
545
 * @param outputs The outputs for the combined ops.
546
 * @param output_size The number of the outputs.
547
 * @param ingrads The gradient inputs for the combined ops, including any inputs / outputs if there are any.
548
 * @param ingrad_size The number of ingrads.
549
 * @param outgrads The gradient outputs for the combined ops.
550
 * @param outgrad_size The number of outgrads.
551
 */
552
CCV_WARN_UNUSED(ccv_nnc_micro_combine_t*) ccv_nnc_micro_combine_new(const ccv_nnc_micro_io_t* const inputs, const int input_size, const char* const* const parameters, const int parameter_size, const ccv_nnc_micro_io_t* const outputs, const int output_size, const ccv_nnc_micro_io_t* const ingrads, const int ingrad_size, const ccv_nnc_micro_io_t* const outgrads, const int outgrad_size);
553
/**
554
 * Free the combined op.
555
 * @param combine The op to be freed.
556
 */
557
void ccv_nnc_micro_combine_free(ccv_nnc_micro_combine_t* const combine);
558
/**
559
 * Run combined op in interpret mode. This is only useful for debug internals. Because this is for
560
 * generic combined op, there is no hint, or flags, or stream context, or cmd.
561
 * @param combine The op.
562
 * @param cmd Choice between CMD_CUSTOM_FORWARD and CMD_CUSTOM_BACKWARD.
563
 * @param inputs The input tensors.
564
 * @param input_size The size of input tensors.
565
 * @param values The value corresponding to the parameters when call ccv_nnc_micro_combine_new.
566
 * @param parameter_size How many parameters. It must match when called ccv_nnc_micro_combine_new.
567
 * @param outputs The output tensors.
568
 * @param output_size The size of output tensors.
569
 */
570
void ccv_nnc_micro_combine_interpret(ccv_nnc_micro_combine_t* const combine, const uint32_t cmd, ccv_nnc_tensor_t* const* const inputs, const int input_size, const ccv_nnc_micro_scalar_t* const values, const int parameter_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
571
/**
572
 * Generate C code from the combined op.
573
 * @param combine The combined op to generate some C code.
574
 * @return The generated C code string.
575
 */
576
char* ccv_nnc_micro_combine_c(ccv_nnc_micro_combine_t* const combine);
577
578
/** @} */
579
580
/**
581
 * @defgroup level_1_tensor Tensors
582
 * @{
583
 */
584
585
/**
586
 * Count the dimensionality of a tensor.
587
 */
588
static inline int ccv_nnc_tensor_nd(const int dim[CCV_NNC_MAX_DIM_ALLOC])
589
2.51M
{
590
2.51M
  int i;
591
6.68M
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++4.17M
)
592
6.68M
    if (dim[i] == 0)
593
2.51M
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
2.51M
}
Unexecuted instantiation: adam.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: cifar.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: cnnp.core.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: compare.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: compression.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: concat.tests.c:ccv_nnc_tensor_nd
cublas.tests.c:ccv_nnc_tensor_nd
Line
Count
Source
589
2
{
590
2
  int i;
591
6
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++4
)
592
6
    if (dim[i] == 0)
593
2
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
2
}
Unexecuted instantiation: cudnn.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: datatype.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: dense.net.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: dynamic.graph.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: gelu.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: graph.vgg.d.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: imdb.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: index.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: lamb.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: leaky_relu.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: loss.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: lstm.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: mpsblas.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: mpsdnn.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: nccl.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: nms.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: pad.tests.c:ccv_nnc_tensor_nd
palettize.tests.c:ccv_nnc_tensor_nd
Line
Count
Source
589
4
{
590
4
  int i;
591
13
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++9
)
592
13
    if (dim[i] == 0)
593
4
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
4
}
Unexecuted instantiation: parallel.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: partition.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: random.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: reduce.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: rmsprop.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: roi_align.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: scatter_add.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: schedule.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: sgd.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: smooth_l1.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: sort.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: swish.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: symbolic.graph.vgg.d.tests.c:ccv_nnc_tensor_nd
tensor.tests.c:ccv_nnc_tensor_nd
Line
Count
Source
589
18
{
590
18
  int i;
591
36
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++18
)
592
36
    if (dim[i] == 0)
593
18
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
18
}
Unexecuted instantiation: transform.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: unique_consecutive.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: upsample.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: attention.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: autograd.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: autograd.vector.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: backward.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: batch.norm.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: broadcast.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: case_of.backward.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: case_of.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: cblas.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: complex.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: crossentropy.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: custom.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: dataframe.addons.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: dataframe.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: dropout.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: forward.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: gemm.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: gradient.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: graph.io.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: graph.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: group.norm.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: histogram.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: layer.norm.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: micro.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: minimize.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: numa.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: rand.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: rmsnorm.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: simplify.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: symbolic.graph.compile.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: symbolic.graph.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: tape.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: tensor.bind.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: tfb.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: while.backward.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: while.tests.c:ccv_nnc_tensor_nd
Unexecuted instantiation: winograd.tests.c:ccv_nnc_tensor_nd
ccv_nnc_cmd.c:ccv_nnc_tensor_nd
Line
Count
Source
589
347k
{
590
347k
  int i;
591
845k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++497k
)
592
845k
    if (dim[i] == 0)
593
347k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
347k
}
ccv_nnc_tensor.c:ccv_nnc_tensor_nd
Line
Count
Source
589
374
{
590
374
  int i;
591
1.36k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++991
)
592
1.36k
    if (dim[i] == 0)
593
374
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
374
}
ccv_nnc_tensor_io.c:ccv_nnc_tensor_nd
Line
Count
Source
589
46
{
590
46
  int i;
591
175
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++129
)
592
175
    if (dim[i] == 0)
593
46
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
46
}
Unexecuted instantiation: ccv_nnc_stream.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_micro.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_micro_core.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_micro_interpret.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_micro_simplify.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_graph.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_io.c:ccv_nnc_tensor_nd
ccv_nnc_symbolic_graph_compile.c:ccv_nnc_tensor_nd
Line
Count
Source
589
4.66k
{
590
4.66k
  int i;
591
10.4k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++5.79k
)
592
10.4k
    if (dim[i] == 0)
593
4.66k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
4.66k
}
ccv_nnc_symbolic_graph_backward.c:ccv_nnc_tensor_nd
Line
Count
Source
589
2.18k
{
590
2.18k
  int i;
591
4.62k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++2.44k
)
592
4.62k
    if (dim[i] == 0)
593
2.18k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
2.18k
}
Unexecuted instantiation: ccv_nnc_symbolic_graph_while.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_graph_while.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_tensor_tape.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_case_of.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_graph_case_of.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_minimize.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_parallel.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_simplify.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_memory_compression.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_memory_reduction.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_graph_run.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_xpu_alloc.c:ccv_nnc_tensor_nd
ccv_nnc_dynamic_graph.c:ccv_nnc_tensor_nd
Line
Count
Source
589
1.05k
{
590
1.05k
  int i;
591
2.15k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.10k
)
592
2.15k
    if (dim[i] == 0)
593
1.05k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
1.05k
}
Unexecuted instantiation: ccv_nnc_dynamic_graph_alloc.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_dynamic_graph_backward.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_dynamic_graph_apply_gradients.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_dynamic_graph_minimize.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_dynamic_graph_evaluate.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_cnnp_dataframe.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_cnnp_dataframe_core.c:ccv_nnc_tensor_nd
ccv_cnnp_dataframe_addons.c:ccv_nnc_tensor_nd
Line
Count
Source
589
183k
{
590
183k
  int i;
591
730k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++546k
)
592
730k
    if (dim[i] == 0)
593
183k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
183k
}
Unexecuted instantiation: ccv_cnnp_dataframe_csv.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_cnnp_model.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_cnnp_model_io.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_cnnp_model_core.c:ccv_nnc_tensor_nd
ccv_cnnp_model_addons.c:ccv_nnc_tensor_nd
Line
Count
Source
589
3.97k
{
590
3.97k
  int i;
591
9.65k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++5.67k
)
592
9.65k
    if (dim[i] == 0)
593
3.97k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
3.97k
}
Unexecuted instantiation: ccv_nnc_palettize.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_cnnp_model_gradient_checkpointing.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_symbolic_graph_chain_decomposition.c:ccv_nnc_tensor_nd
ccv_nnc_adam_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
16.1k
{
590
16.1k
  int i;
591
48.2k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++32.1k
)
592
48.2k
    if (dim[i] == 0)
593
16.1k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
16.1k
}
ccv_nnc_adamw_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
16.1k
{
590
16.1k
  int i;
591
48.2k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++32.1k
)
592
48.2k
    if (dim[i] == 0)
593
16.1k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
16.1k
}
ccv_nnc_gemm_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
302k
{
590
302k
  int i;
591
810k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++508k
)
592
810k
    if (dim[i] == 0)
593
302k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
302k
}
ccv_nnc_gemm_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
589
1.09k
{
590
1.09k
  int i;
591
2.68k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.58k
)
592
2.68k
    if (dim[i] == 0)
593
1.09k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
1.09k
}
ccv_nnc_add_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
205k
{
590
205k
  int i;
591
479k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++274k
)
592
479k
    if (dim[i] == 0)
593
205k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
205k
}
ccv_nnc_mul_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
109k
{
590
109k
  int i;
591
221k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++111k
)
592
221k
    if (dim[i] == 0)
593
109k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
109k
}
ccv_nnc_cmul_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
173
{
590
173
  int i;
591
569
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++396
)
592
569
    if (dim[i] == 0)
593
173
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
173
}
ccv_nnc_segmented_gemm_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
106
{
590
106
  int i;
591
348
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++242
)
592
348
    if (dim[i] == 0)
593
106
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
106
}
ccv_nnc_min_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
82
{
590
82
  int i;
591
312
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++230
)
592
312
    if (dim[i] == 0)
593
82
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
82
}
ccv_nnc_max_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
82
{
590
82
  int i;
591
312
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++230
)
592
312
    if (dim[i] == 0)
593
82
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
82
}
ccv_nnc_lssc_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
96
{
590
96
  int i;
591
448
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++352
)
592
448
    if (dim[i] == 0)
593
96
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
96
}
ccv_nnc_conv_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
10.1k
{
590
10.1k
  int i;
591
47.1k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++37.0k
)
592
47.1k
    if (dim[i] == 0)
593
10.1k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
10.1k
}
ccv_nnc_conv_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
589
3.13k
{
590
3.13k
  int i;
591
14.8k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++11.7k
)
592
14.8k
    if (dim[i] == 0)
593
3.13k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
3.13k
}
ccv_nnc_conv_transpose_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
40
{
590
40
  int i;
591
176
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++136
)
592
176
    if (dim[i] == 0)
593
40
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
40
}
ccv_nnc_dropout_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
24
{
590
24
  int i;
591
72
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++48
)
592
72
    if (dim[i] == 0)
593
24
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
24
}
ccv_nnc_ew_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
765k
{
590
765k
  int i;
591
1.89M
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.13M
)
592
1.89M
    if (dim[i] == 0)
593
765k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
765k
}
Unexecuted instantiation: ccv_nnc_gelu_cpu_ref.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_grid_sample_cpu_ref.c:ccv_nnc_tensor_nd
ccv_nnc_histogram_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
4
{
590
4
  int i;
591
20
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++16
)
592
20
    if (dim[i] == 0)
593
4
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
4
}
ccv_nnc_index_select_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
33
{
590
33
  int i;
591
84
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++51
)
592
84
    if (dim[i] == 0)
593
33
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
33
}
ccv_nnc_reduce_isnan_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
21
{
590
21
  int i;
591
55
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++34
)
592
55
    if (dim[i] == 0)
593
21
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
21
}
ccv_nnc_lamb_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
48
{
590
48
  int i;
591
96
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++48
)
592
96
    if (dim[i] == 0)
593
48
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
48
}
Unexecuted instantiation: ccv_nnc_leaky_relu_cpu_ref.c:ccv_nnc_tensor_nd
ccv_nnc_binary_crossentropy_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
194
{
590
194
  int i;
591
550
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++356
)
592
550
    if (dim[i] == 0)
593
194
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
194
}
ccv_nnc_categorical_crossentropy_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
35
{
590
35
  int i;
591
93
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++58
)
592
93
    if (dim[i] == 0)
593
35
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
35
}
ccv_nnc_mse_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
2.19k
{
590
2.19k
  int i;
591
4.48k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++2.28k
)
592
4.48k
    if (dim[i] == 0)
593
2.19k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
2.19k
}
ccv_nnc_smooth_l1_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
63
{
590
63
  int i;
591
177
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++114
)
592
177
    if (dim[i] == 0)
593
63
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
63
}
ccv_nnc_nms_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
18
{
590
18
  int i;
591
48
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++30
)
592
48
    if (dim[i] == 0)
593
18
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
18
}
ccv_nnc_batch_norm_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
438
{
590
438
  int i;
591
1.41k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++975
)
592
1.41k
    if (dim[i] == 0)
593
438
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
438
}
ccv_nnc_layer_norm_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
208
{
590
208
  int i;
591
1.04k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++832
)
592
1.04k
    if (dim[i] == 0)
593
208
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
208
}
ccv_nnc_group_norm_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
331
{
590
331
  int i;
591
1.57k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.24k
)
592
1.57k
    if (dim[i] == 0)
593
331
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
331
}
ccv_nnc_rmsnorm_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
166
{
590
166
  int i;
591
830
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++664
)
592
830
    if (dim[i] == 0)
593
166
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
166
}
ccv_nnc_pad_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
140
{
590
140
  int i;
591
490
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++350
)
592
490
    if (dim[i] == 0)
593
140
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
140
}
ccv_nnc_partition_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
117
{
590
117
  int i;
591
369
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++252
)
592
369
    if (dim[i] == 0)
593
117
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
117
}
ccv_nnc_max_pool_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
4.25k
{
590
4.25k
  int i;
591
21.1k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++16.9k
)
592
21.1k
    if (dim[i] == 0)
593
4.25k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
4.25k
}
ccv_nnc_avg_pool_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
5.47k
{
590
5.47k
  int i;
591
27.2k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++21.8k
)
592
27.2k
    if (dim[i] == 0)
593
5.47k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
5.47k
}
Unexecuted instantiation: ccv_nnc_rand_uniform_cpu_ref.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_rand_normal_cpu_ref.c:ccv_nnc_tensor_nd
ccv_nnc_reduce_sum_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
78.6k
{
590
78.6k
  int i;
591
236k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++158k
)
592
236k
    if (dim[i] == 0)
593
78.6k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
78.6k
}
ccv_nnc_reduce_mean_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
41
{
590
41
  int i;
591
112
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++71
)
592
112
    if (dim[i] == 0)
593
41
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
41
}
ccv_nnc_reduce_max_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
41
{
590
41
  int i;
591
99
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++58
)
592
99
    if (dim[i] == 0)
593
41
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
41
}
ccv_nnc_reduce_min_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
21
{
590
21
  int i;
591
59
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++38
)
592
59
    if (dim[i] == 0)
593
21
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
21
}
ccv_nnc_reduce_norm2_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
115
{
590
115
  int i;
591
299
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++184
)
592
299
    if (dim[i] == 0)
593
115
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
115
}
ccv_nnc_argmax_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
8
{
590
8
  int i;
591
27
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++19
)
592
27
    if (dim[i] == 0)
593
8
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
8
}
ccv_nnc_argmin_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
6
{
590
6
  int i;
591
21
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++15
)
592
21
    if (dim[i] == 0)
593
6
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
6
}
Unexecuted instantiation: ccv_nnc_relu_cpu_ref.c:ccv_nnc_tensor_nd
ccv_nnc_rmsprop_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
16.0k
{
590
16.0k
  int i;
591
48.0k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++32.0k
)
592
48.0k
    if (dim[i] == 0)
593
16.0k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
16.0k
}
ccv_nnc_roi_align_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
104
{
590
104
  int i;
591
406
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++302
)
592
406
    if (dim[i] == 0)
593
104
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
104
}
ccv_nnc_scaled_dot_product_attention_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
145
{
590
145
  int i;
591
710
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++565
)
592
710
    if (dim[i] == 0)
593
145
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
145
}
ccv_nnc_scatter_add_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
27
{
590
27
  int i;
591
68
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++41
)
592
68
    if (dim[i] == 0)
593
27
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
27
}
ccv_nnc_sgd_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
220k
{
590
220k
  int i;
591
570k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++349k
)
592
570k
    if (dim[i] == 0)
593
220k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
220k
}
Unexecuted instantiation: ccv_nnc_sigmoid_cpu_ref.c:ccv_nnc_tensor_nd
ccv_nnc_sigmoid_binary_crossentropy_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
238
{
590
238
  int i;
591
698
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++460
)
592
698
    if (dim[i] == 0)
593
238
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
238
}
ccv_nnc_softmax_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
514
{
590
514
  int i;
591
1.04k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++531
)
592
1.04k
    if (dim[i] == 0)
593
514
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
514
}
ccv_nnc_softmax_crossentropy_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
1.24k
{
590
1.24k
  int i;
591
3.12k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.87k
)
592
3.12k
    if (dim[i] == 0)
593
1.24k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
1.24k
}
ccv_nnc_sort_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
51
{
590
51
  int i;
591
129
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++78
)
592
129
    if (dim[i] == 0)
593
51
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
51
}
Unexecuted instantiation: ccv_nnc_swish_cpu_ref.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_tanh_cpu_ref.c:ccv_nnc_tensor_nd
ccv_nnc_unique_consecutive_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
18
{
590
18
  int i;
591
36
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++18
)
592
36
    if (dim[i] == 0)
593
18
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
18
}
ccv_nnc_upsample_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
114
{
590
114
  int i;
591
456
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++342
)
592
456
    if (dim[i] == 0)
593
114
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
114
}
ccv_nnc_util_cpu_ref.c:ccv_nnc_tensor_nd
Line
Count
Source
589
310
{
590
310
  int i;
591
1.30k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++994
)
592
1.30k
    if (dim[i] == 0)
593
310
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
310
}
Unexecuted instantiation: ccv_nnc_adam.c:ccv_nnc_tensor_nd
ccv_nnc_blas.c:ccv_nnc_tensor_nd
Line
Count
Source
589
120k
{
590
120k
  int i;
591
303k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++183k
)
592
303k
    if (dim[i] == 0)
593
120k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
120k
}
_ccv_nnc_gemm_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
589
603
{
590
603
  int i;
591
1.47k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++867
)
592
1.47k
    if (dim[i] == 0)
593
603
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
603
}
_ccv_nnc_gemm_cpu_sys.c:ccv_nnc_tensor_nd
Line
Count
Source
589
67.9k
{
590
67.9k
  int i;
591
177k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++109k
)
592
177k
    if (dim[i] == 0)
593
67.9k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
67.9k
}
Unexecuted instantiation: ccv_nnc_comm.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_cmp.c:ccv_nnc_tensor_nd
ccv_nnc_compression.c:ccv_nnc_tensor_nd
Line
Count
Source
589
10
{
590
10
  int i;
591
48
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++38
)
592
48
    if (dim[i] == 0)
593
10
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
10
}
_ccv_nnc_conv_cpu_4x4_3x3_winograd.c:ccv_nnc_tensor_nd
Line
Count
Source
589
380
{
590
380
  int i;
591
1.52k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.14k
)
592
1.52k
    if (dim[i] == 0)
593
380
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
380
}
Unexecuted instantiation: _ccv_nnc_conv_cpu_fft.c:ccv_nnc_tensor_nd
_ccv_nnc_conv_cpu_gemm.c:ccv_nnc_tensor_nd
Line
Count
Source
589
8
{
590
8
  int i;
591
32
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++24
)
592
32
    if (dim[i] == 0)
593
8
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
8
}
_ccv_nnc_conv_cpu_opt.c:ccv_nnc_tensor_nd
Line
Count
Source
589
4.46k
{
590
4.46k
  int i;
591
21.8k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++17.3k
)
592
21.8k
    if (dim[i] == 0)
593
4.46k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
4.46k
}
ccv_nnc_convolution.c:ccv_nnc_tensor_nd
Line
Count
Source
589
13.1k
{
590
13.1k
  int i;
591
61.2k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++48.0k
)
592
61.2k
    if (dim[i] == 0)
593
13.1k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
13.1k
}
Unexecuted instantiation: ccv_nnc_dropout.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_ew.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_gelu.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_grid_sample.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_histogram.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_index_select.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_reduce_isnan.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_lamb.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_leaky_relu.c:ccv_nnc_tensor_nd
ccv_nnc_binary_crossentropy.c:ccv_nnc_tensor_nd
Line
Count
Source
589
12
{
590
12
  int i;
591
36
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++24
)
592
36
    if (dim[i] == 0)
593
12
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
12
}
ccv_nnc_categorical_crossentropy.c:ccv_nnc_tensor_nd
Line
Count
Source
589
31
{
590
31
  int i;
591
96
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++65
)
592
96
    if (dim[i] == 0)
593
31
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
31
}
ccv_nnc_mse.c:ccv_nnc_tensor_nd
Line
Count
Source
589
18
{
590
18
  int i;
591
36
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++18
)
592
36
    if (dim[i] == 0)
593
18
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
18
}
ccv_nnc_smooth_l1.c:ccv_nnc_tensor_nd
Line
Count
Source
589
4
{
590
4
  int i;
591
12
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++8
)
592
12
    if (dim[i] == 0)
593
4
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
4
}
Unexecuted instantiation: ccv_nnc_nms.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_norm.c:ccv_nnc_tensor_nd
ccv_nnc_pad.c:ccv_nnc_tensor_nd
Line
Count
Source
589
3
{
590
3
  int i;
591
15
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++12
)
592
15
    if (dim[i] == 0)
593
3
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
3
}
Unexecuted instantiation: ccv_nnc_partition.c:ccv_nnc_tensor_nd
ccv_nnc_pool.c:ccv_nnc_tensor_nd
Line
Count
Source
589
4.07k
{
590
4.07k
  int i;
591
18.3k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++14.2k
)
592
18.3k
    if (dim[i] == 0)
593
4.07k
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
4.07k
}
Unexecuted instantiation: ccv_nnc_rand.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_reduce.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_relu.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_rmsprop.c:ccv_nnc_tensor_nd
ccv_nnc_lstm.c:ccv_nnc_tensor_nd
Line
Count
Source
589
11
{
590
11
  int i;
591
44
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++33
)
592
44
    if (dim[i] == 0)
593
11
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
11
}
Unexecuted instantiation: ccv_nnc_roi_align.c:ccv_nnc_tensor_nd
ccv_nnc_scaled_dot_product_attention.c:ccv_nnc_tensor_nd
Line
Count
Source
589
57
{
590
57
  int i;
591
285
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++228
)
592
285
    if (dim[i] == 0)
593
57
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
57
}
Unexecuted instantiation: ccv_nnc_scatter_add.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_sgd.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_sigmoid.c:ccv_nnc_tensor_nd
ccv_nnc_sigmoid_binary_crossentropy.c:ccv_nnc_tensor_nd
Line
Count
Source
589
36
{
590
36
  int i;
591
108
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++72
)
592
108
    if (dim[i] == 0)
593
36
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
36
}
Unexecuted instantiation: ccv_nnc_softmax.c:ccv_nnc_tensor_nd
ccv_nnc_softmax_crossentropy.c:ccv_nnc_tensor_nd
Line
Count
Source
589
860
{
590
860
  int i;
591
2.56k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.70k
)
592
2.56k
    if (dim[i] == 0)
593
860
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
860
}
Unexecuted instantiation: ccv_nnc_sort.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_swish.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_tanh.c:ccv_nnc_tensor_nd
Unexecuted instantiation: ccv_nnc_unique_consecutive.c:ccv_nnc_tensor_nd
ccv_nnc_upsample.c:ccv_nnc_tensor_nd
Line
Count
Source
589
12
{
590
12
  int i;
591
60
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++48
)
592
60
    if (dim[i] == 0)
593
12
      return i;
594
0
  return CCV_NNC_MAX_DIM_ALLOC;
595
12
}
Unexecuted instantiation: ccv_nnc_util.c:ccv_nnc_tensor_nd
596
597
/**
598
 * Create a new tensor.
599
 * @param ptr If 0, nnc will allocate the tensor ourselves. Otherwise, will use the memory region referenced by 'ptr'.
600
 * @param params Tensor parameters.
601
 * @param flags Reserved flags for the allocation.
602
 * @return The newly created tensor.
603
 */
604
CCV_WARN_UNUSED(ccv_nnc_tensor_t*) ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags);
605
enum {
606
  CCV_NNC_TENSOR_MEMORY_MAP_EAGER = 0x1, /**< Load tensor mapped directly. */
607
  CCV_NNC_TENSOR_MEMORY_MAP_ON_DEMAND = 0x2, /**< Defer tensor map until read on supported devices. */
608
};
609
/**
610
 * Create a new tensor with data from a file. This will create a mmap tensor if that is preferred.
611
 * @param params Tensor parameters.
612
 * @param filename The file to load tensor content from.
613
 * @param offset The offset to the tensor content from the file.
614
 * @param flags Reserved flags for this loading.
615
 * @return The newly created tensor.
616
 */
617
CCV_WARN_UNUSED(ccv_nnc_tensor_t*) ccv_nnc_tensor_new_from_file(const ccv_nnc_tensor_param_t params, const char* const filename, const off_t offset, const int flags);
618
/**
619
 * Create a new tensor with data from a pointer. This method handles copy to GPU implicitly.
620
 * @param params Tensor parameters.
621
 * @param bufptr The pointer to load tensor content from.
622
 * @param flags Reserved flags for this loading.
623
 * @return The newly created tensor.
624
 */
625
CCV_WARN_UNUSED(ccv_nnc_tensor_t*) ccv_nnc_tensor_new_from_raw(const ccv_nnc_tensor_param_t params, const void* const bufptr, const size_t buf_size, const int flags);
626
/**
627
 * Create a new tensor on stack.
628
 * @param ptr If 0, nnc will allocate the tensor ourselves. Otherwise, will use the memory region referenced by 'ptr'.
629
 * @param params Tensor parameters.
630
 * @param flags Reserved flags for the allocation.
631
 * @return The tensor struct.
632
 */
633
CCV_WARN_UNUSED(ccv_nnc_tensor_t) ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags);
634
/**
635
 * Resize an existing tensor to a new dimension.
636
 * @param tensor The old tensor to be resized.
637
 * @param params Tensor parameters.
638
 * @return Potentially a new tensor, but if the size is sufficient, it will be in-place operation.
639
 */
640
CCV_WARN_UNUSED(ccv_nnc_tensor_t*) ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params);
641
/**
642
 * Pin the tensor memory for faster access on GPU.
643
 * @param tensor A tensor that we want to pin the memory.
644
 * @return 0 for success.
645
 */
646
int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor);
647
/**
648
 * Free a tensor object.
649
 * @param tensor The tensor to be freed.
650
 */
651
void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor);
652
/**
653
 * Create a tensor view. A tensor view can be non-continuous. Essentially, it provides a view into a tensor.
654
 * @param tensor The tensor that we want to view into.
655
 * @param params The tensor parameters for the tensor view.
656
 * @param ofs The offset on each of the dimension.
657
 * @param stride The stride of each dimension.
658
 * @return The newly created tensor view.
659
 */
660
CCV_WARN_UNUSED(ccv_nnc_tensor_view_t*) ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]);
661
/**
662
 * Create a tensor view on stack.
663
 * @param tensor The tensor that we want to view into.
664
 * @param params The tensor parameters for the tensor view.
665
 * @param ofs The offset on each of the dimension.
666
 * @param stride The line size of each dimension.
667
 * @return The tensor view struct.
668
 */
669
CCV_WARN_UNUSED(ccv_nnc_tensor_view_t) ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]);
670
/**
671
 * Free a tensor view object.
672
 * @param tensor_view The tensor view to be freed.
673
 */
674
void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view);
675
/**
676
 * Zero out a given tensor.
677
 * @param tensor The tensor to be zero out.
678
 */
679
void ccv_nnc_tensor_zero(void* const tensor);
680
/**
681
 * Compare whether two tensors are equal. This will tolerant some floating point issues follow http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
682
 * @param a Tensor a.
683
 * @param b Tensor b.
684
 * @return 0 if equal, -1 otherwise.
685
 */
686
CCV_WARN_UNUSED(int) ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b);
687
/**
688
 * Format a tensor output to string so that it can be used as debug output for other languages. This will look like:
689
 * [
690
 *   0.13, 0.44, 0.24, 0.24
691
 * ]
692
 * And format closely to what numpy looks like.
693
 * @param a The input tensor, it can be a tensor or a tensor view. It has to be accessible on CPU.
694
 * @return An allocated string that you can call ccfree to free it.
695
 */
696
CCV_WARN_UNUSED(char*) ccv_nnc_tensor_format_new(const ccv_nnc_tensor_t* const a);
697
/**
698
 * Method to decode tensor into a give buffer.
699
 * @param data The encoded data that needs to be decoded.
700
 * @param data_size The size of the encoded data.
701
 * @param datatype The expected data type of the encoded data.
702
 * @param dimensions The expected dimension for the data.
703
 * @param dimension_count The number of dimensions for the data.
704
 * @param identifier The identifier saved along the encoder (non-zero) that used to identify this decoder.
705
 * @param context The context associated with this decoder.
706
 * @param tensor_params The tensor parameters for the final container. This can be different from the expected values above.
707
 * @param tensor_out The final container for the tensor. It can be nil and you need to initialize it in that case.
708
 * @param decoded The buffer for data to be decoded.
709
 * @param decoded_size The size of the buffer to be decoded.
710
 * @return 1 if it is processed, 0 otherwise.
711
 */
712
typedef int (*ccv_nnc_tensor_io_option_decode_f)(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, const unsigned int identifier, void* const context, const ccv_nnc_tensor_param_t tensor_params, ccv_nnc_tensor_t** const tensor_out, void* const decoded, size_t* const decoded_size);
713
/**
714
 * Method to encode tensor into a give buffer.
715
 * @param data The data that needs to be encoded.
716
 * @param data_size The size of the data to be encoded.
717
 * @param datatype The expected data type of the data to be encoded.
718
 * @param dimensions The expected dimension for the data.
719
 * @param dimension_count The number of dimensions for the data.
720
 * @param context The context associated with this encoder.
721
 * @param encoded The buffer for encoded data.
722
 * @param encoded_size The size of the buffer.
723
 * @param tensor_params The tensor parameters that can be modified.
724
 * @param identifier The identifier identifies this encoder (non-zero).
725
 * @return 1 if it is processed, 0 otherwise.
726
 */
727
typedef int (*ccv_nnc_tensor_io_option_encode_f)(const void* const data, const size_t data_size, const int datatype, const int* const dimensions, const int dimension_count, void* const context, void* const encoded, size_t* const encoded_size, ccv_nnc_tensor_param_t* const tensor_params, unsigned int* const identifier);
728
/**
729
 * Additional options to regulate tensor write / read behavior. For example, you can pass
730
 * encryptor / compressor to encrypt / compress the data prior to write to disk. You can
731
 * also only store reference, and use external storage for tensors.
732
 */
733
typedef struct {
734
  ccv_nnc_tensor_io_option_decode_f decode;
735
  ccv_nnc_tensor_io_option_encode_f encode;
736
  void* context;
737
} ccv_nnc_tensor_io_option_t;
738
/**
739
 * Write tensor to a SQLite database with a given name.
740
 * @param tensor The tensor.
741
 * @param handle The SQLite handle.
742
 * @param name The name to find the tensor in the database.
743
 * @param options If provided, we will use this to encode tensor data.
744
 * @return CCV_IO_FINAL for success, otherwise error.
745
 */
746
int ccv_nnc_tensor_write(const ccv_nnc_tensor_t* const tensor, void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options);
747
748
enum {
749
  CCV_NNC_TENSOR_READ_METADATA_ONLY = CCV_NO_DATA_ALLOC, /**< Read tensor that data is nil, with only metadata. */
750
  CCV_NNC_TENSOR_READ_CPU_MEMORY = CCV_TENSOR_CPU_MEMORY, /**< Read tensor to CPU allocated buffer. */
751
};
752
/**
753
 * Read a tensor from a SQLite database with a given name.
754
 * @param handle The SQLite handle.
755
 * @param name The name to find the tensor in the database.
756
 * @param options If provided, we will use this to decode any data that identifier != 0.
757
 * @param flags Additional flag to configure how we read tensor.
758
 * @param tensor_params If provided, we will use this to create the tensor if tensor_out is not provided.
759
 * @param tensor_out The pointer to hold the tensor. If you supply the tensor yourself, we will read the data into the existing tensor.
760
 * @return CCV_IO_FINAL for success, otherwise error.
761
 */
762
int ccv_nnc_tensor_read(void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options, const int flags, const ccv_nnc_tensor_param_t* const tensor_params, ccv_nnc_tensor_t** const tensor_out);
763
/** @} */
764
765
/**
766
 * @addtogroup level_1_cmd
767
 * @{
768
 */
769
770
/**
771
 * Return a high precision time unit. What this time unit is is platform specific.
772
 * @return A monotonic increasing 64-bit integer w.r.t. passing of time.
773
 */
774
uint64_t ccv_nnc_cmd_mono_time(void);
775
/**
776
 * Return UTF-8 encoded name of a given command.
777
 * @return A UTF-8 string (pointing to a static constant).
778
 */
779
CCV_WARN_UNUSED(const char*) ccv_nnc_cmd_name(const uint32_t cmd);
780
/**
781
 * Return UTF-8 encoded name of a given backend.
782
 * @return A UTF-8 string (pointing to a static constant).
783
 */
784
CCV_WARN_UNUSED(const char*) ccv_nnc_cmd_backend_name(const uint32_t backend);
785
/**
786
 * Check whether a given backend is available for a given command.
787
 * @return 1 if it is available.
788
 */
789
CCV_WARN_UNUSED(int) ccv_nnc_cmd_ok(const uint32_t cmd, const uint32_t backend);
790
/**
791
 * Create a wrapped command with parameters.
792
 * @param cmd The command identifier.
793
 * @param isa If this is a CCV_NNC_CUSTOM_FORWARD / CCV_NNC_CUSTOM_BACKWARD command, this supplies the custom functions.
794
 * @param params The parameters for the command.
795
 * @param flags A reserved field for flags.
796
 * @return A wrapped ccv_nnc_cmd_t structure.
797
 */
798
CCV_WARN_UNUSED(ccv_nnc_cmd_t) ccv_nnc_cmd(const uint32_t cmd, ccv_nnc_cmd_vtab_t* const isa, const ccv_nnc_cmd_param_t params, const int flags);
799
/**
800
 * Verify whether a hint is compatible with a given command and a given input tensor parameters / output tensor parameters.
801
 * @param hint The hint for a given command. Hint defines things such as paddings, strides etc. for a given command.
802
 * @param cmd The wrapped command.
803
 * @param a The input tensor parameters.
804
 * @param b The output tensor parameters.
805
 * @return 1 if it passes.
806
 */
807
CCV_WARN_UNUSED(int) ccv_nnc_hint_verify(const ccv_nnc_hint_t hint, const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t a, const ccv_nnc_tensor_param_t b);
808
/**
809
 * Automatically find the best hint for a given input / output (on forward pass only).
810
 * @param cmd The wrapped command.
811
 * @param a The input tensor parameters.
812
 * @param b The output tensor parameters.
813
 * @return Best hint we can guess.
814
 */
815
CCV_WARN_UNUSED(ccv_nnc_hint_t) ccv_nnc_hint_auto(const ccv_nnc_cmd_param_t cmd, const ccv_nnc_tensor_param_t a, const ccv_nnc_tensor_param_t b);
816
/**
817
 * Automatically find the outputs for the given inputs / hint.
818
 * @param cmd The wrapped command.
819
 * @param inputs An array of input tensor parameters.
820
 * @param input_size The size of input array.
821
 * @param hint The hint for the given command.
822
 * @param outputs An array for the output tensor parameters.
823
 * @param output_size The size of the output array.
824
 */
825
void ccv_nnc_hint_tensor_auto(const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size);
826
/**
827
 * Find a suitable backend for a given command and tensor settings.
828
 * @param cmd The wrapped command.
829
 * @param tensor_memory The tensor memory setup (whether it is CPU or GPU).
830
 * @param tensor_formats The tensor layout format (NCHW, NHWC, CHWN etc.)
831
 * @param tensor_datatypes The datatype of a given tensor (FP32 etc.)
832
 * @return The backend identifier for the selected backend.
833
 */
834
CCV_WARN_UNUSED(uint32_t) ccv_nnc_cmd_find_backend(const ccv_nnc_cmd_t cmd, const int tensor_memory, const int tensor_formats, const int tensor_datatypes);
835
/**
836
 * Run autotune to find the best kernel and configuration for the given input.
837
 * @param cmd The original wrapped command.
838
 * @param max_workspace_size The maximum memory allowed for this command to execute.
839
 * @param hint The hint for the given command.
840
 * @param flags The reserved field for flags.
841
 * @param inputs An array of input tensors.
842
 * @param input_size The size of input array.
843
 * @param outputs An array of output tensors.
844
 * @param output_size The size of output array.
845
 * @param stream_context The stream we can do the autotune on. 0 uses default stream.
846
 * @return The modified cmd that contains the updated configuration.
847
 */
848
CCV_WARN_UNUSED(ccv_nnc_cmd_t) ccv_nnc_cmd_autotune(const ccv_nnc_cmd_t cmd, const size_t max_workspace_size, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
849
/**
850
 * Check whether a given tensor input / output pattern can be computed by the given command.
851
 * bitmasks encode whether a given input tensor / output tensor available at a position.
852
 * @param cmd The wrapped command to check.
853
 * @param input_size The intended size of the input tensor array.
854
 * @param output_size The intended size of the output tensor array.
855
 * @param input_bitmasks The input tensor array encoding in bitmap, 0: no tensor, 1: has a tensor.
856
 * @param input_bitmask_size The size of the input bitmask array.
857
 * @param output_bitmasks The output tensor array encoding in bitmap.
858
 * @param output_bitmask_size The size of the output bitmask array.
859
 * @return 1 if the command can be executed with the given input / output pattern.
860
 */
861
CCV_WARN_UNUSED(int) ccv_nnc_cmd_bitmask(const ccv_nnc_cmd_t cmd, const int input_size, const int output_size, const uint64_t* const input_bitmasks, const int input_bitmask_size, const uint64_t* const output_bitmasks, const int output_bitmask_size);
862
/**
863
 * Return auxillary information related to a particular command with a particular backend.
864
 * A backend is required to be useful for this method.
865
 * @param cmd The wrapped cmmand to check auxillary information.
866
 * @return The auxillary information specific to a particular command with a particular backend.
867
 */
868
CCV_WARN_UNUSED(void*) ccv_nnc_cmd_aux(const ccv_nnc_cmd_t cmd);
869
/**
870
 * Execute a given command.
871
 * @param cmd The wrapped command to be executed.
872
 * @param hint The hint provided for the command.
873
 * @param flags A reserved field for flags.
874
 * @param inputs The input tensor array.
875
 * @param input_size The size of input tensor array.
876
 * @param outputs The output tensor array.
877
 * @param output_size The size of output tensor array.
878
 * @param stream_context The stream which the command will be executed upon.
879
 * @return CCV_NNC_EXEC_SUCCESS if succeed.
880
 */
881
int ccv_nnc_cmd_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
882
/**
883
 * Check whether the command is a forward pass or not.
884
 * @param cmd The wrapped command.
885
 * @return 1 if it is a forward pass.
886
 */
887
CCV_WARN_UNUSED(int) ccv_nnc_cmd_is_forward(const ccv_nnc_cmd_t cmd);
888
/**
889
 * Check whether the command is a backward pass or not.
890
 * @param cmd The wrapped command.
891
 * @return 1 if it is a backward pass.
892
 */
893
CCV_WARN_UNUSED(int) ccv_nnc_cmd_is_backward(const ccv_nnc_cmd_t cmd);
894
/**
895
 * Check this command against listed attributes.
896
 * @param cmd The wrapped command.
897
 * @param flags The flags to check against the command (unsupported).
898
 * @return 1 if the flag is supported by the command.
899
 */
900
CCV_WARN_UNUSED(int) ccv_nnc_cmd_attr(const ccv_nnc_cmd_t cmd, const int flags);
901
/**
902
 * Check whether this command allow inplace operation against a particular input and output (index from 0).
903
 * @param cmd The wrapped command.
904
 * @param input_idx The index of the input tensor we want to check.
905
 * @param input_size The total number of inputs.
906
 * @param output_idx the index of the output tensor we want to check.
907
 * @param output_size The total number of outputs.
908
 * @return 1 if the input tensor can be used as the output tensor.
909
 */
910
CCV_WARN_UNUSED(int) ccv_nnc_cmd_allow_inplace(const ccv_nnc_cmd_t cmd, const int input_idx, const int input_size, const int output_idx, const int output_size);
911
/**
912
 * Check whether this command need to enforce inplace operation against a particular input and output (index from 0).
913
 * @param cmd The wrapped command.
914
 * @param input_idx The index of the input tensor we want to check.
915
 * @param input_size The total number of inputs.
916
 * @param output_idx the index of the output tensor we want to check.
917
 * @param output_size The total number of outputs.
918
 * @return 1 if the input tensor is required to be used as the output tensor.
919
 */
920
CCV_WARN_UNUSED(int) ccv_nnc_cmd_enforce_inplace(const ccv_nnc_cmd_t cmd, const int input_idx, const int input_size, const int output_idx, const int output_size);
921
/**
922
 * Set for a profiler to be on or off. Right now, this just proxy call on to cudaProfilerStart / cudaProfilerStop.
923
 * @param state 1 is on, 0 is off.
924
 */
925
void ccv_nnc_set_profiler(int state);
926
/**
927
 * Set the queue watermark when queueing up GPU commands. This is a Metal-only option.
928
 * @param > 0 is how many in-flight GPU commands can have.
929
 */
930
void ccv_nnc_set_queue_watermark(int state);
931
/**
932
 * Get the queue watermark when queueing up GPU commands. This is a Metal-only option.
933
 * @return How many in-flight GPU commands can have.
934
 */
935
CCV_WARN_UNUSED(int) ccv_nnc_queue_watermark(void);
936
/**
937
 * Set the device mapping to use custom order for device rather than driver imposed order. This is helpful
938
 * to manage code where which GPU to use have no control over. The previous permutation is cleared up on
939
 * each call and you can set 0 size device map to clear up all custom mapping.
940
 * @param type Currently, only CCV_NNC_STREAM_CONTEXT_GPU on NVIDIA systems are supported.
941
 * @param device_map The array of device map, maximum 64 devices.
942
 * @param size The size of the array, only first 64 will be used.
943
 */
944
void ccv_nnc_set_device_permutation(const int type, const int* const device_map, const int size);
945
/**
946
 * Set the path to binary artifacts that would accelerate command compilations. Note that the binary
947
 * artifacts path are separated into read one and write one. They could be the same, but would be
948
 * better to be a separate one to avoid competing with each other.
949
 * @param paths_to_read The file paths to read binary artifacts. Whether it is a file or directory is implementation dependent.
950
 * @param paths_to_read_size How many paths in 1.
951
 * @param path_to_write The file path to write binary artifacts. Whether it is a file or directory is implementation dependent.
952
 */
953
void ccv_nnc_set_binary_artifacts(const char** const paths_to_read, const int paths_to_read_size, const char* const path_to_write);
954
955
enum {
956
  CCV_NNC_QX_8I_ROWWISE = 0x900,
957
};
958
/**
959
 * Quantize a given memory region of a given datatype / memory resides, into nbits palette.
960
 * @param input The input memory region, it can be CCV_64F, CCV_32F or CCV_16F.
961
 * @param datatype The datatype, it can be CCV_64F, CCV_32F or CCV_16F.
962
 * @param memory_type Where the memory resides. Right now only support CPU_MEMORY.
963
 * @param input_length How many elements in the input.
964
 * @param qbits How many bits for the palette. Right now only 4 / 5 / 6 / 7 / 8 bits supported.
965
 * @param number_in_blocks How many elements share a palette.
966
 * @param output The output memory region.
967
 * @param output_length The maximum size of the output.
968
 * @return The actual length in bytes of the output.
969
 */
970
CCV_WARN_UNUSED(size_t) ccv_nnc_palettize(const void* input, const int datatype, const int memory_type, const size_t input_length, const int qbits, const int number_in_blocks, void* output, const size_t output_length);
971
/**
972
 * Dequantize a given memory region of a given datatype / memory resides, from built-in nbits palette.
973
 * @param input The input memory region.
974
 * @param datatype The datatype, it can be CCV_64F, CCV_32F or CCV_16F.
975
 * @param memory_type Where the memory resides. It can be either CPU_MEMORY or GPU_MEMORY.
976
 * @param input_length The size of the input in bytes.
977
 * @param qbits How many bits for the palette. Right now only 4 / 5 / 6 / 7 / 8 bits supported.
978
 * @param number_in_blocks How many elements share a palette.
979
 * @param output The output memory region, it can be CCV_64F, CCV_32F or CCV_16F.
980
 * @param output_length How many elements in the output.
981
 */
982
void ccv_nnc_depalettize(const void* input, const int datatype, const int memory_type, const size_t input_length, const int qbits, const int number_in_blocks, void* output, const size_t output_length);
983
/**
984
 * Quantize a given memory region of a given datatype / memory resides, into row-wise int8 + scale.
985
 * The row-wise split is based on the innermost dimension, thus @p row_length is required.
986
 * @param input The input memory region, it can be CCV_64F, CCV_32F, CCV_16F or CCV_16BF.
987
 * @param datatype The datatype of the input memory region.
988
 * @param memory_type Where the memory resides. Right now only support CPU_MEMORY.
989
 * @param input_length How many elements in the input.
990
 * @param row_length The number of elements in each row.
991
 * @param output The output memory region.
992
 * @param output_length The maximum size of the output in bytes.
993
 * @return The actual length in bytes of the output.
994
 */
995
CCV_WARN_UNUSED(size_t) ccv_nnc_quantize_8i_rowwise(const void* input, const int datatype, const int memory_type, const size_t input_length, const size_t row_length, void* output, const size_t output_length);
996
/**
997
 * Dequantize a memory region from row-wise int8 + scale.
998
 * The row-wise split is based on the innermost dimension, thus @p row_length is required.
999
 * @param input The input memory region.
1000
 * @param datatype The datatype of the output memory region and row-wise scales.
1001
 * @param memory_type Where the memory resides. Right now only support CPU_MEMORY.
1002
 * @param input_length The size of the input in bytes.
1003
 * @param row_length The number of elements in each row.
1004
 * @param output The output memory region.
1005
 * @param output_length How many elements in the output.
1006
 */
1007
void ccv_nnc_dequantize_8i_rowwise(const void* input, const int datatype, const int memory_type, const size_t input_length, const size_t row_length, void* output, const size_t output_length);
1008
1009
/** @} */
1010
1011
/**
1012
 * @defgroup level_1_stream Streams
1013
 * @{
1014
 */
1015
1016
// Control flow constructs
1017
// Follow heavily based along CUDA's stream / event idea.
1018
enum {
1019
  CCV_STREAM_CONTEXT_CPU = 0x1, /**< A CPU based stream context (unsupported). */
1020
  CCV_STREAM_CONTEXT_GPU = 0x2, /**< A GPU based stream context. */
1021
};
1022
211k
#define CCV_STREAM_GET_CONTEXT(type) ((type) & 0x3)
1023
#define CCV_STREAM_GET_DEVICE(type) CCV_TENSOR_GET_DEVICE(type)
1024
44.9k
#define CCV_STREAM_GET_DEVICE_ID(type) CCV_TENSOR_GET_DEVICE_ID(type)
1025
3.30k
#define CCV_STREAM_SET_DEVICE_ID(type, device_id) CCV_TENSOR_SET_DEVICE_ID(type, device_id)
1026
/**
1027
 * Create a new stream context.
1028
 * @param type A combination of CPU / GPU and DEVICE_ID.
1029
 * @return The newly created stream context.
1030
 */
1031
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_nnc_stream_context_new(const int type);
1032
/**
1033
 * Get the type of the stream context.
1034
 * @param stream_context The stream context we want to inspect.
1035
 * @return The type of the stream context.
1036
 */
1037
CCV_WARN_UNUSED(int) ccv_nnc_stream_context_type(const ccv_nnc_stream_context_t* const stream_context);
1038
/**
1039
 * Get a stream context local workspace memory. This memory region will be reused
1040
 * the next time when you call this method on the same stream context.
1041
 * @param stream_context The stream context which provides the workspace memory.
1042
 * @param workspace_size The size of the workspace memory.
1043
 * @param mem The memory type of the said workspace memory (GPU or CPU).
1044
 * @return A pointer to the workspace memory.
1045
 */
1046
CCV_WARN_UNUSED(void*) ccv_nnc_stream_context_get_workspace(ccv_nnc_stream_context_t* const stream_context, const size_t workspace_size, const int mem);
1047
/**
1048
 * Deallocate any workspace memory on the stream context.
1049
 * @param stream The stream context to drain workspace memory.
1050
 */
1051
void ccv_nnc_stream_context_drain(ccv_nnc_stream_context_t* const stream);
1052
/**
1053
 * The callback prototype on the stream context.
1054
 */
1055
typedef void(*ccv_nnc_callback_f)(void* const callback_context);
1056
/**
1057
 * Add a callback function to be called once stream executed to that point.
1058
 * @param stream The stream context to add callback.
1059
 * @param callback The callback function.
1060
 * @param callback_context The context to be called with the callback function.
1061
 */
1062
void ccv_nnc_stream_context_add_callback(ccv_nnc_stream_context_t* const stream, const ccv_nnc_callback_f callback, void* const callback_context);
1063
/**
1064
 * Wait until all tasks submitted (command, graph run etc.) on the stream context
1065
 * completed.
1066
 * @param stream The stream context to wait.
1067
 */
1068
void ccv_nnc_stream_context_wait(const ccv_nnc_stream_context_t* const stream);
1069
/**
1070
 * The hooks to be called when a stream context is destroyed.
1071
 * At the moment, the stream context will be destroyed at the time
1072
 * ccv_nnc_stream_context_free is called, so there is no tricks.
1073
 * This method is useful because we have some resources associated
1074
 * with stream pointer, hence, would be good to free these resources
1075
 * upon free the stream.
1076
 */
1077
typedef void (*ccv_nnc_stream_context_destructor_f)(const ccv_nnc_stream_context_t* const stream, void* const context);
1078
/**
1079
 * Add a new destructor hook callback when a stream is freed.
1080
 * @param stream The stream to be observed.
1081
 * @param destructor The new destructor callback method.
1082
 * @param context additional context.
1083
 * @return A integer identifier to help remove the hook.
1084
 */
1085
int ccv_nnc_stream_context_add_destructor_hook(ccv_nnc_stream_context_t* const stream, ccv_nnc_stream_context_destructor_f destructor, void* const context);
1086
/**
1087
 * Remove a destructor hook callback.
1088
 * @param stream The stream we observe.
1089
 * @param hook_id The returned integer when calling the add method.
1090
 */
1091
void ccv_nnc_stream_context_remove_destructor_hook(ccv_nnc_stream_context_t* const stream, const int hook_id);
1092
/**
1093
 * Deallocate the stream context.
1094
 * @param stream_context The stream context to be destroyed.
1095
 */
1096
void ccv_nnc_stream_context_free(ccv_nnc_stream_context_t* const stream_context);
1097
/**
1098
 * Set random seed for stream context.
1099
 * @param stream_context The stream context to set the seed. 0 means use the default stream context.
1100
 * @param seed The seed for the stream context.
1101
 */
1102
void ccv_nnc_stream_context_set_seed(ccv_nnc_stream_context_t* const stream_context, uint32_t seed);
1103
/**
1104
 * Generate uint32_t random number for stream context.
1105
 * These are usually used as seed for other high-performance random number generators.
1106
 * @param stream_context The stream context associated with random number generation.
1107
 */
1108
uint32_t ccv_nnc_stream_context_genrand_uint32(ccv_nnc_stream_context_t* const stream_context);
1109
1110
/**
1111
 * Opaque pointer to the signal object.
1112
 */
1113
typedef struct ccv_nnc_stream_signal_s ccv_nnc_stream_signal_t;
1114
1115
/**
1116
 * Create a new stream signal.
1117
 * @param type A composed type denotes whether it associated with a GPU or CPU stream context, and on which device.
1118
 * @return The newly created stream signal.
1119
 */
1120
CCV_WARN_UNUSED(ccv_nnc_stream_signal_t*) ccv_nnc_stream_signal_new(const int type);
1121
/**
1122
 * Get the type of the stream signal.
1123
 * @param signal The stream signal we want to inspect.
1124
 * @return The type of the stream signal.
1125
 */
1126
CCV_WARN_UNUSED(int) ccv_nnc_stream_signal_type(const ccv_nnc_stream_signal_t* const signal);
1127
/**
1128
 * Emit a signal on a stream.
1129
 * @param stream The stream context where the signal will be emitted.
1130
 * @param signal The signal to be emitted. It has to be on the same device as the stream.
1131
 */
1132
void ccv_nnc_stream_context_emit_signal(ccv_nnc_stream_context_t* const stream, ccv_nnc_stream_signal_t* const signal);
1133
/**
1134
 * Emit a signal on a stream directly. It will be managed by the stream. You have to use it immediately after return.
1135
 * @param stream The stream context where the signal will be emitted.
1136
 * @return The new signal emitted on the stream context.
1137
 */
1138
ccv_nnc_stream_signal_t* ccv_nnc_stream_context_emit_signal_new(ccv_nnc_stream_context_t* const stream);
1139
/**
1140
 * Wait a signal on a stream.
1141
 * @param stream The stream context that will be blocked by the signal.
1142
 * @param signal The signal to be waited. It can be on a different device of the stream.
1143
 */
1144
void ccv_nnc_stream_context_wait_signal(const ccv_nnc_stream_context_t* const stream, const ccv_nnc_stream_signal_t* const signal);
1145
/**
1146
 * Get on which stream context this signal is going to be emitted on.
1147
 * @param signal The signal we want to inspect.
1148
 * @return The most recent stream context you called ccv_nnc_stream_context_emit_signal with.
1149
 */
1150
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_nnc_stream_signal_get_emitter(const ccv_nnc_stream_signal_t* const signal);
1151
/**
1152
 * Deallocate the signal.
1153
 * @param signal The signal to be destroyed.
1154
 */
1155
void ccv_nnc_stream_signal_free(ccv_nnc_stream_signal_t* const signal);
1156
/**
1157
 * Return number of devices.
1158
 * @param type The type of devices (CCV_NNC_STREAM_CONTEXT_GPU / CCV_NNC_STREAM_CONTEXT_CPU)
1159
 * @return The number of devices.
1160
 */
1161
CCV_WARN_UNUSED(int) ccv_nnc_device_count(const int type);
1162
/**
1163
 * The neighbor discovery function that will be called with the device id.
1164
 */
1165
typedef ccv_nnc_stream_context_t*(*ccv_nnc_stream_context_neighbor_discovery_f)(const int device_id, void* const context);
1166
/**
1167
 * Set the neighbor stream context discovery mechanism. This method exposes how
1168
 * neighbor should be defined per stream context. This method is useful for
1169
 * commands that operates cross devices and need to find the correct stream
1170
 * context for these devices. Stream context itself is bounded to one device
1171
 * only.
1172
 * @param stream_context The stream context that bounds to a discovery mechanism.
1173
 * @param discovery The neighbor discovery function to invoke.
1174
 * @param context The associated context with the neighbor discovery function.
1175
 */
1176
void ccv_nnc_stream_context_set_neighbor_discovery(ccv_nnc_stream_context_t* const stream_context, ccv_nnc_stream_context_neighbor_discovery_f discovery, void* const context);
1177
/**
1178
 * Find a neighbor stream context on a given device id for current stream context.
1179
 * @param stream_context The stream context which we will look for neighbors.
1180
 * @param device_id On which device the stream context may exist.
1181
 * @return 0 if no stream context found. Otherwise, return the stream context on that device.
1182
 */
1183
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_nnc_stream_context_find_neighbor(ccv_nnc_stream_context_t* const stream_context, const int device_id);
1184
1185
/** @} */
1186
1187
/** @} */
1188
1189
/**
1190
 * @defgroup level_2 Level-2 API
1191
 * @{
1192
 */
1193
1194
/**
1195
 * @defgroup level_2_essentials Essentials
1196
 * @{
1197
 */
1198
1199
enum {
1200
  CCV_NNC_SHORT_DOT_GRAPH = 0x0, /**< Display a simplified graph. */
1201
  CCV_NNC_LONG_DOT_GRAPH  = 0x1, /**< Display a graph that contains all information. */
1202
};
1203
1204
/**
1205
 * Opaque pointer holds the concrete graph representation.
1206
 */
1207
typedef struct ccv_nnc_graph_s ccv_nnc_graph_t;
1208
1209
/**
1210
 * The opaque on stack object hold a reference to an execution node within a graph.
1211
 */
1212
typedef struct {
1213
  int32_t d; // This is int because sometimes I piggy-back on negatives to carry out some internal computations.
1214
  ccv_nnc_graph_t* graph;
1215
} ccv_nnc_graph_exec_t;
1216
1217
82.2k
#define CCV_NO_GRAPH_EXEC(exec) ((exec).graph == 0)
1218
1219
/**
1220
 * Create an empty graph.
1221
 * Note that all graph mutation methods are not thread-safe.
1222
 * You should only operate the graph in serial fashion.
1223
 * @return An opaque ccv_nnc_graph_t pointer.
1224
 */
1225
CCV_WARN_UNUSED(ccv_nnc_graph_t*) ccv_nnc_graph_new(void);
1226
/**
1227
 * Create a node with specific command execution, as well as its inputs & outputs.
1228
 * Underlying, the graph maintains the backing object for the node, and all you get is
1229
 * a on-stack object to index the backing object from the graph.
1230
 * @param graph The concrete graph.
1231
 * @param cmd The wrapped command.
1232
 * @param hint The hint for this command.
1233
 * @param inputs The input tensors array.
1234
 * @param input_size The size of input tensors array.
1235
 * @param outputs The output tensors array.
1236
 * @param output_size The size of output tensors array.
1237
 * @return An on-stack object that references a execution node.
1238
 */
1239
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_new(ccv_nnc_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
1240
/**
1241
 * Set the command for an existing execution node.
1242
 * @param graph The concrete graph.
1243
 * @param exec The execution node reference.
1244
 * @param cmd The new wrapped command.
1245
 */
1246
void ccv_nnc_graph_exec_set(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_cmd_t cmd);
1247
/**
1248
 * Return the command on an existing execution node.
1249
 * @param graph The concrete graph.
1250
 * @param exec The execution node reference.
1251
 * @return The wrapped command.
1252
 */
1253
CCV_WARN_UNUSED(ccv_nnc_cmd_t) ccv_nnc_graph_exec_cmd(const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec);
1254
/**
1255
 * Set hint for an existing execution node.
1256
 * @param graph The concrete graph.
1257
 * @param exec The execution node reference.
1258
 * @param hint The new hint.
1259
 */
1260
void ccv_nnc_graph_exec_set_hint(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_hint_t hint);
1261
/**
1262
 * Set input / output tensors for an existing execution node.
1263
 * @param graph The concrete graph.
1264
 * @param exec The execution node reference.
1265
 * @param inputs The input tensors array.
1266
 * @param input_size The size of input tensors array.
1267
 * @param outputs The output tensors array.
1268
 * @param output_size The size of output tensors array.
1269
 */
1270
void ccv_nnc_graph_exec_set_io(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
1271
/**
1272
 * Concatenate input graph nodes with an output graph node to create a new graph.
1273
 * @param graph The concrete graph.
1274
 * @param source The execution node reference to connect.
1275
 * @param destination The execution node reference connect to.
1276
 * @return Non-zero if cannot concat successfully.
1277
 */
1278
int ccv_nnc_graph_exec_concat(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination);
1279
/**
1280
 * Disconnect input graph nodes with an output graph nodes in this graph.
1281
 * @param graph The concrete graph.
1282
 * @param source The execution node reference to disconnect.
1283
 * @param destination The execution node reference disconnect to.
1284
 * @return Non-zero if cannot disjoin successfully.
1285
 */
1286
int ccv_nnc_graph_exec_disjoin(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t source, const ccv_nnc_graph_exec_t destination);
1287
/**
1288
 * Count number of exec in the graph.
1289
 * @param graph The concrete graph.
1290
 * @return The number of execution nodes in the graph.
1291
 */
1292
int ccv_nnc_graph_exec_count(const ccv_nnc_graph_t* const graph);
1293
/**
1294
 * Generate output that can be parsed by GraphViz (DOT language).
1295
 * @param graph The concrete graph.
1296
 * @param flags Either CCV_NNC_SHORT_DOT_GRAPH or CCV_NNC_LONG_DOT_GRAPH
1297
 * @param out The output file stream.
1298
 */
1299
void ccv_nnc_graph_dot(const ccv_nnc_graph_t* const graph, const int flags, FILE* out);
1300
/**
1301
 * Run the autotune function on all execution node, and assign back with the optimized commands.
1302
 * @param graph The concrete graph.
1303
 * @param max_workspace_size The maximum allowed extra memory usage.
1304
 * @param flags A reserved field for flags.
1305
 * @param sources The source execution nodes to begin. 0 uses default sources.
1306
 * @param source_size The size of source execution nodes.
1307
 * @param destinations The destination execution nodes which we end. 0 uses default destinations.
1308
 * @param destination_size The size of destination execution nodes.
1309
 */
1310
void ccv_nnc_graph_autotune(ccv_nnc_graph_t* const graph, const size_t max_workspace_size, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size);
1311
/**
1312
 * Make the graph topsorted, thus, do a topological sort so when run the graph, no additional memory will be allocated.
1313
 * Otherwise when we run the graph, we need to allocate some memory on heap to faciliate.
1314
 * @param graph The concrete graph.
1315
 * @param exec_cvt The execution node assignments will change, and you can give an array to know the changes.
1316
 * @param exec_cvt_size The provided conversion array size.
1317
 */
1318
void ccv_nnc_graph_topsort(ccv_nnc_graph_t* const graph, int* const exec_cvt, const int exec_cvt_size);
1319
1320
/**
1321
 * Opaque pointer holds the graph schedule.
1322
 */
1323
typedef struct ccv_nnc_graph_static_schedule_s ccv_nnc_graph_static_schedule_t;
1324
/**
1325
 * Assuming the graph runs from the beginning to the end. Allocate a internal schedule object that will
1326
 * run the graph efficiently if it runs from the beginning to the end. It will basically call ccv_nnc_graph_static_schedule
1327
 * and save the end result to a internal schedule object to this graph.
1328
 * @param graph The concrete graph.
1329
 * @param stream_type The type of stream context we are going to use.
1330
 * @param max_stream_count The number of stream contexts to be allocated internally.
1331
 */
1332
void ccv_nnc_graph_set_default_static_schedule(ccv_nnc_graph_t* const graph, const int stream_type, const int max_stream_count);
1333
/**
1334
 * Allocate extra streams to make this graph parallel runnable. Note this requires the graph to be topsorted.
1335
 * After this is done, you can schedule a graph either on its default stream, or a new stream with the schedule
1336
 * object.
1337
 * @param graph The concrete graph.
1338
 * @param stream_type The type of stream context we are going to use.
1339
 * @param max_stream_count The number of stream contexts to be allocated internally.
1340
 * @param sources The source execution nodes to begin. 0 uses default sources.
1341
 * @param source_size The size of source execution nodes.
1342
 * @param destinations The destination execution nodes which we end. 0 uses default destinations.
1343
 * @param destination_size The size of destination execution nodes.
1344
 * @return An opaque schedule object that let the graph knows how to run itself efficiently.
1345
 */
1346
CCV_WARN_UNUSED(ccv_nnc_graph_static_schedule_t*) ccv_nnc_graph_static_schedule_new(ccv_nnc_graph_t* const graph, const int stream_type, const int max_stream_count, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size);
1347
/**
1348
 * Free a schedule object for a graph.
1349
 * @param schedule The schedule object returned from ccv_nnc_graph_static_schedule_new.
1350
 */
1351
void ccv_nnc_graph_static_schedule_free(ccv_nnc_graph_static_schedule_t* const schedule);
1352
/**
1353
 * Query the default stream for a given graph.
1354
 * @param graph The concrete graph.
1355
 * @return The default stream context.
1356
 */
1357
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_nnc_graph_default_stream(const ccv_nnc_graph_t* const graph);
1358
/**
1359
 * Set default sources for a give graph.
1360
 * @param graph The concrete graph.
1361
 * @param sources The source execution nodes to begin.
1362
 * @param source_size The size of source execution nodes.
1363
 */
1364
void ccv_nnc_graph_set_sources(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const sources, const int source_size);
1365
/**
1366
 * Get the default source execution nodes pointer.
1367
 * @param graph The concrete graph.
1368
 * @return A pointer to an array of default source execution nodes.
1369
 */
1370
ccv_nnc_graph_exec_t* ccv_nnc_graph_sources(const ccv_nnc_graph_t* const graph);
1371
/**
1372
 * Get the number of default source execution nodes.
1373
 * @param graph The concrete graph.
1374
 * @return The number of default source execution nodes.
1375
 */
1376
int ccv_nnc_graph_source_size(const ccv_nnc_graph_t* const graph);
1377
/**
1378
 * Set default destinations for a give graph.
1379
 * @param graph The concrete graph.
1380
 * @param destinations The destination execution nodes which we end.
1381
 * @param destination_size The size of destination execution nodes.
1382
 */
1383
void ccv_nnc_graph_set_destinations(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t* const destinations, const int destination_size);
1384
/**
1385
 * Get the default destination execution nodes pointer.
1386
 * @param graph The concrete graph.
1387
 * @return A pointer to an array of default destination execution nodes.
1388
 */
1389
ccv_nnc_graph_exec_t* ccv_nnc_graph_destinations(const ccv_nnc_graph_t* const graph);
1390
/**
1391
 * Get the number of default destination execution nodes.
1392
 * @param graph The concrete graph.
1393
 * @return The number of default destination execution nodes.
1394
 */
1395
int ccv_nnc_graph_destination_size(const ccv_nnc_graph_t* const graph);
1396
/**
1397
 * This graph, and its relevant auxiliary objects (opaque to user) are deallocated.
1398
 * @param graph The concrete graph.
1399
 */
1400
void ccv_nnc_graph_free(ccv_nnc_graph_t* const graph);
1401
/**
1402
 * Opaque pointer to the tape of tensors. The tape are used by the while loop.
1403
 */
1404
typedef struct ccv_nnc_tensor_tape_s ccv_nnc_tensor_tape_t;
1405
/**
1406
 * Execute a computation graph with all bells and whistles. Need to supply a tensor tape if it contains backward pass
1407
 * for while loop or branches. With tensor tape, the tensors are versioned, so you can "backpropagate through time".
1408
 * @param graph The concrete graph.
1409
 * @param flags A reserved field for flags.
1410
 * @param sources The source execution nodes array.
1411
 * @param source_size The size of source execution nodes array. 0 uses default sources.
1412
 * @param destinations The destination execution nodes array.
1413
 * @param destination_size The size of destination execution nodes array. 0 uses default destinations.
1414
 * @param tensor_tape An opaque tensor tape object to "backpropagate through time".
1415
 * @param stream_context Which stream this graph will be executed upon.
1416
 * @return CCV_NNC_EXEC_SUCCESS if succeed.
1417
 */
1418
int ccv_nnc_graph_run(ccv_nnc_graph_t* const graph, const int flags, const ccv_nnc_graph_exec_t* const sources, const int source_size, const ccv_nnc_graph_exec_t* const destinations, const int destination_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context);
1419
/**
1420
 * Execute a computation graph with all bells and whistles. Need to supply a tensor tape if it contains backward pass
1421
 * for while loop or branches. With tensor tape, the tensors are versioned, so you can "backpropagate through time".
1422
 * Comparing with ccv_nnc_graph_run method, this method doesn't take sources / destinations node, rather, it takes the
1423
 * schedule object.
1424
 * @param graph The concrete graph.
1425
 * @param flags A reserved field for flags.
1426
 * @param schedule The schedule object specified the sources / destinations and how to efficiently run this.
1427
 * @param tensor_tape An opaque tensor tape object to "backpropagate through time".
1428
 * @param stream_context Which stream this graph will be executed upon.
1429
 * @return CCV_NNC_EXEC_SUCCESS if succeed.
1430
 */
1431
int ccv_nnc_graph_run_with_schedule(ccv_nnc_graph_t* const graph, const int flags, const ccv_nnc_graph_static_schedule_t* const schedule, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context);
1432
/**
1433
 * Cancel execution of a graph. You need to handle synchronization yourself when calling this method to make
1434
 * sure the graph is currently executing when cancelling. This method will set a flag internally and the
1435
 * graph execution will check that flag when push compute on the computation device and abort if it is cancelled.
1436
 * When you call ccv_nnc_graph_run again, this cancellation won't in effect and you need to call cancel again.
1437
 * @param graph The concrete graph.
1438
 */
1439
void ccv_nnc_graph_cancel(ccv_nnc_graph_t* const graph);
1440
1441
/** @} */
1442
1443
/**
1444
 * @defgroup level_2_others Others
1445
 * @{
1446
 */
1447
1448
/**
1449
 * Set input / output flags for an existing execution node.
1450
 * This must be called after set_io, set additional flags for tensors related to this exec.
1451
 * @param graph The concrete graph.
1452
 * @param exec The execution node reference.
1453
 * @param input_flags The input flags array.
1454
 * @param input_flag_size the size of input flags array, should be the same as input tensors array (or 0).
1455
 * @param output_flags The output flags array.
1456
 * @param output_flag_size the size of output flags array, should be the same as output tensors array (or 0).
1457
 */
1458
void ccv_nnc_graph_exec_set_io_flags(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const int* const input_flags, const int input_flag_size, const int* const output_flags, const int output_flag_size);
1459
/**
1460
 * Set the pair reference for exec. In backward pass, an execution node's pair node is the forward pass node.
1461
 * @param graph The concrete graph.
1462
 * @param exec The execution node reference.
1463
 * @param pair_exec The pair execution node reference.
1464
 */
1465
void ccv_nnc_graph_exec_pair_with(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const ccv_nnc_graph_exec_t pair_exec);
1466
/**
1467
 * Add tensor pair that can be used to "carry over". (carry over: passing a tensor from current loop to the next loop).
1468
 * @param graph The concrete graph.
1469
 * @param from The tensor we have output in this loop.
1470
 * @param to The tensor we will use as input in the next loop.
1471
 */
1472
void ccv_nnc_graph_add_carry_over(ccv_nnc_graph_t* const graph, const ccv_nnc_tensor_t* const from, const ccv_nnc_tensor_t* const to);
1473
/**
1474
 * Updates are the tensors that not directly involved in the computation, but its pointers need to get updated
1475
 * along with this exec, thus need to be "update" to other exec nodes.
1476
 * @param graph The concrete graph.
1477
 * @param exec The execution node reference.
1478
 * @param update The tensor need to be updated along the execution node.
1479
 */
1480
void ccv_nnc_graph_exec_add_as_affected(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_tensor_t* const update);
1481
1482
/** @} */
1483
1484
/** @} */
1485
1486
/**
1487
 * @defgroup level_3 Level-3 API
1488
 * @{
1489
 */
1490
1491
/**
1492
 * @defgroup level_3_essentials Essentials
1493
 * @{
1494
 */
1495
1496
/**
1497
 * Opaque pointer to the symbolic graph object.
1498
 */
1499
typedef struct ccv_nnc_symbolic_graph_s ccv_nnc_symbolic_graph_t;
1500
1501
/**
1502
 * Opaque pointer to an arena of allocated tensors.
1503
 */
1504
typedef struct ccv_nnc_tensor_arena_s ccv_nnc_tensor_arena_t;
1505
1506
/**
1507
 * Opaque pointer to an arena of allocated execs.
1508
 */
1509
typedef struct ccv_nnc_graph_exec_arena_s ccv_nnc_graph_exec_arena_t;
1510
1511
/**
1512
 * On stack object references a tensor symbol in the symbolic graph.
1513
 */
1514
typedef struct {
1515
  int32_t d;
1516
  const ccv_nnc_symbolic_graph_t* graph;
1517
} ccv_nnc_tensor_symbol_t;
1518
1519
/**
1520
 * On stack object references a execution node symbol in the symbolic graph.
1521
 */
1522
typedef struct {
1523
  int32_t d;
1524
  const ccv_nnc_symbolic_graph_t* graph;
1525
} ccv_nnc_graph_exec_symbol_t;
1526
1527
enum {
1528
  CCV_NNC_TENSOR_SYMBOL_INIT_ZEROS = 0x01, /**< Initialize underlying tensor for the symbol with zeros */
1529
  CCV_NNC_TENSOR_SYMBOL_INIT_ONES = 0x02, /**< Initialize underlying tensor for the symbol with ones */
1530
  CCV_NNC_TENSOR_SYMBOL_TAPE_VAR = 0x04, /**< Mark this as a tape variable (it cannot be folded, will contain flag CCV_TAPE_ALLOC) */
1531
  // The one below is special.
1532
  CCV_NNC_TENSOR_SYMBOL_DEAD = 0x80000000, /**< Mark this tensor symbol as dead, any future usage will cause assertion */
1533
};
1534
1535
147k
#define CCV_NNC_TENSOR_SYMBOL_IS_DEAD(x) ((x) & CCV_NNC_TENSOR_SYMBOL_DEAD)
1536
1537
enum {
1538
  CCV_NNC_GRAPH_EXEC_DEAD = 0x1, /**< Mark this node as dead. */
1539
  CCV_NNC_GRAPH_EXEC_P_WHILE = 0x10, /**< Mark this node keyword is while */
1540
  CCV_NNC_GRAPH_EXEC_CASE_OF = 0x20, /**< Mark this node keyword is case_of */
1541
  CCV_NNC_GRAPH_EXEC_DISABLE_OPT = 0x10000, /**< Mark this node to avoid optimization pass. */
1542
};
1543
1544
451k
#define CCV_NNC_GRAPH_EXEC_IS_DEAD(x) ((x) & CCV_NNC_GRAPH_EXEC_DEAD)
1545
25.4k
#define CCV_NNC_GRAPH_REF(x) ((x)->_heap_graph_ref ? 
(x)->_heap_graph_ref178
:
(x)->_inline_graph_ref25.2k
)
1546
1547
enum {
1548
  CCV_NNC_NO_TENSOR_SYMBOL = -1, /**< Special symbol reference for no tensor symbol. */
1549
  CCV_NNC_WHILE_COUNT_TENSOR_SYMBOL = -2, /**< Special symbol reference for while loop count tensor. */
1550
};
1551
1552
enum {
1553
  CCV_NNC_NO_GRAPH_EXEC_SYMBOL = -1, /**< Special symbol reference for no exec symbol. */
1554
};
1555
1556
1557
enum {
1558
  CCV_NNC_SYMBOL_TENSOR, /**< Identifier for tensor symbol */
1559
  CCV_NNC_SYMBOL_TENSOR_ALIAS, /**< Identifier for tensor alias symbol */
1560
  CCV_NNC_SYMBOL_GRAPH_EXEC, /**< Identifier for exec symbol */
1561
};
1562
1563
22
#define CCV_NNC_IS_WHILE_COUNT_TENSOR_SYMBOL(d) (((uint32_t)(d) & 0xf) == 0xe)
1564
1565
/**
1566
 * A data structure to pass in a pair of tensor symbols.
1567
 */
1568
typedef struct {
1569
  ccv_nnc_tensor_symbol_t source; /**< The 'from' tensor symbol. */
1570
  ccv_nnc_tensor_symbol_t destination; /**< The 'to' tensor symbol. */
1571
} ccv_nnc_tensor_symbol_map_t;
1572
1573
/**
1574
 * Create a new empty symbolic graph. It is an opaque data structure that maintains the whole graph of computation in its symbolic form.
1575
 * Note that all graph mutation methods are not thread-safe. You should only operate the graph in serial fashion.
1576
 */
1577
CCV_WARN_UNUSED(ccv_nnc_symbolic_graph_t*) ccv_nnc_symbolic_graph_new(void);
1578
/**
1579
 * Create an tensor symbol (thus, with no actual memory space allocation) in a symbolic graph.
1580
 * @param graph The symbolic graph.
1581
 * @param info The tensor parameters.
1582
 * @param name The name of the tensor symbol, it is optional.
1583
 * @return A tensor symbol reference.
1584
 */
1585
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_param_t info, const char* const name);
1586
/**
1587
 * Create an alias to the tensor symbol as tensor view (thus, pointing to the same memory region, but with a different header info and offset).
1588
 * @param graph The symbolic graph.
1589
 * @param tensor_symbol The tensor symbol we are going to reference to.
1590
 * @param ofs The offset on each of the dimension.
1591
 * @param stride The stride of each dimension.
1592
 * @param info The tensor parameters for the new alias.
1593
 * @param name The name of the tensor symbol alias, it is optional.
1594
 * @return A tensor symbol alias reference.
1595
 */
1596
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_alias_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info, const char* const name);
1597
/**
1598
 * Manually delete a tensor symbol off the symbolic graph.
1599
 * @param graph The symbolic graph.
1600
 * @param tensor The tensor symbol reference.
1601
 */
1602
void ccv_nnc_tensor_symbol_free(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_tensor_symbol_t tensor);
1603
/**
1604
 * Create a graph execution node (an operation that takes a set of inputs and generates a set of outputs).
1605
 * @param graph The symbolic graph.
1606
 * @param cmd The wrapped command.
1607
 * @param inputs The input tensor symbols array.
1608
 * @param input_size The size of input tensor symbols array.
1609
 * @param outputs The output tensor symbols array.
1610
 * @param output_size The size of output tensor symbols array.
1611
 * @param name The name of this execution node, optional.
1612
 * @return The execution node symbol reference.
1613
 */
1614
ccv_nnc_graph_exec_symbol_t ccv_nnc_graph_exec_symbol_new(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name);
1615
/**
1616
 * ccv_nnc_graph_exec_symbol_new defaults to use `ccv_nnc_hint_auto` find the best hints for a set of inputs / outputs.
1617
 * However, you can also set your own hints.
1618
 * @param graph The symbolic graph.
1619
 * @param exec The execution node symbol reference.
1620
 * @param hint The hint for the command.
1621
 */
1622
void ccv_nnc_graph_exec_symbol_set_hint(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const ccv_nnc_hint_t hint);
1623
/**
1624
 * Manually delete a exec symbol off the symbolic graph.
1625
 * @param graph The symbolic graph.
1626
 * @param symbol The execution node symbol reference.
1627
 */
1628
void ccv_nnc_graph_exec_symbol_free(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_graph_exec_symbol_t symbol);
1629
enum {
1630
  CCV_NNC_AUTOGEN_ALL_EXECS = 0x1, /**< Automatic concatenation for all execution nodes */
1631
  CCV_NNC_AUTOGEN_SOURCES_AND_DESTINATIONS = 0x2, /**< Automatically find all source and destination nodes. */
1632
};
1633
/**
1634
 * Automatic concatenate these nodes together based on its inputs / outputs.
1635
 * Imagining this is to generate the execution flow based on input tensors and output tensors.
1636
 * nil for execs and 0 for exec_size means to loop over all the execs on the graph and autogen.
1637
 * @param graph The symbolic graph.
1638
 * @param execs The execution nodes array.
1639
 * @param exec_size The size of execution nodes array.
1640
 * @param flags The flags determines what operations to perform when concatenating.
1641
 * @return non-zero if cannot figure out.
1642
 */
1643
int ccv_nnc_graph_exec_symbol_autogen(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const execs, const int exec_size, const int flags);
1644
/**
1645
 * Set the default sources for a symbolic graph.
1646
 * @param graph The symbolic graph.
1647
 * @param sources The source execution nodes array.
1648
 * @param source_size The size of source execution nodes array.
1649
 */
1650
void ccv_nnc_symbolic_graph_set_sources(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size);
1651
/**
1652
 * Add one node to the default sources for a symbolic graph.
1653
 * @param graph The symbolic graph.
1654
 * @param source The source execution node.
1655
 */
1656
void ccv_nnc_symbolic_graph_add_source(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t source);
1657
/**
1658
 * Get the pointer to the default sources.
1659
 * @param graph The symbolic graph.
1660
 * @return The pointer to the source execution nodes array.
1661
 */
1662
ccv_nnc_graph_exec_symbol_t* ccv_nnc_symbolic_graph_sources(const ccv_nnc_symbolic_graph_t* const graph);
1663
/**
1664
 * Get the size of the default source nodes array.
1665
 * @param graph The symbolic graph.
1666
 * @return The size of the default source nodes array.
1667
 */
1668
int ccv_nnc_symbolic_graph_source_size(const ccv_nnc_symbolic_graph_t* const graph);
1669
/**
1670
 * Set the default destinations for a symbolic graph.
1671
 * @param graph The symbolic graph.
1672
 * @param destinations The destination execution nodes array.
1673
 * @param destination_size The size of destination execution nodes array.
1674
 */
1675
void ccv_nnc_symbolic_graph_set_destinations(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
1676
/**
1677
 * Add one node to the default destinations for a symbolic graph.
1678
 * @param graph The symbolic graph.
1679
 * @param destination The destination execution node.
1680
 */
1681
void ccv_nnc_symbolic_graph_add_destination(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t destination);
1682
/**
1683
 * Get the pointer to the default destinations.
1684
 * @param graph The symbolic graph.
1685
 * @return The pointer to the destinationsexecution nodes array.
1686
 */
1687
ccv_nnc_graph_exec_symbol_t* ccv_nnc_symbolic_graph_destinations(const ccv_nnc_symbolic_graph_t* const graph);
1688
/**
1689
 * Get the size of the default destination nodes array.
1690
 * @param graph The symbolic graph.
1691
 * @return The size of the default destination nodes array.
1692
 */
1693
int ccv_nnc_symbolic_graph_destination_size(const ccv_nnc_symbolic_graph_t* const graph);
1694
/**
1695
 * Generate output that can be parsed by GraphViz (DOT language).
1696
 * @param graph The symbolic graph.
1697
 * @param flags Either CCV_NNC_SHORT_DOT_GRAPH or CCV_NNC_LONG_DOT_GRAPH
1698
 * @param out The output file stream.
1699
 */
1700
void ccv_nnc_symbolic_graph_dot(const ccv_nnc_symbolic_graph_t* const graph, const int flags, FILE* out);
1701
1702
/**
1703
 * The data structure to wrap a tensor symbol and a concrete tensor together.
1704
 */
1705
typedef struct {
1706
  ccv_nnc_tensor_symbol_t symbol;
1707
  const ccv_nnc_tensor_t* tensor;
1708
} ccv_nnc_tensor_bind_t;
1709
1710
typedef struct {
1711
  void* (*alloc)(const int type, const int pinned_mem /* Currently only used to annotate CCV_TENSOR_PINNED_MEM, future can be expanded to generic flags */, const size_t size, void* const arg);
1712
  void (*free)(void* const ptr, void* const arg);
1713
} ccv_nnc_symbolic_graph_compile_allocator_vtab_t;
1714
1715
typedef struct {
1716
  const ccv_nnc_symbolic_graph_compile_allocator_vtab_t* isa;
1717
  struct {
1718
    void* alloc;
1719
    void* free;
1720
  } context;
1721
} ccv_nnc_symbolic_graph_compile_allocator_t;
1722
1723
typedef struct {
1724
  ccv_nnc_symbolic_graph_compile_allocator_t allocator;
1725
} ccv_nnc_symbolic_graph_compile_param_t;
1726
1727
/**
1728
 * Compile a symbolic graph into a graph that can be executed, and a set of tensors (opaque data structure tensor arena) are allocated based on which tensor symbols are the input and which are the outputs. The tensor allocation is done to minimize the required storage.
1729
 * tensor_binds provide custom binding for these tensors. You still responsible to manage the life-time of these tensors.
1730
 * outputs marks the tensor symbols that need to be kept til the end of the graph.
1731
 * @param graph The symbolic graph.
1732
 * @param compile_params A ccv_nnc_symbolic_graph_compile_param_t struct defines compilation parameters.
1733
 * @param tensor_binds The binding array (a tensor symbol and a concrete tensor). We replace everywhere that uses the tensor symbol with the concrete tensor.
1734
 * @param tensor_bind_size The size of the binding array.
1735
 * @param outputs The output tensor symbols that we want to keep the value.
1736
 * @param output_size The size of the output tensor symbols array.
1737
 * @param sources The sources for the graph.
1738
 * @param source_size The size of the sources array. 0 to use default sources.
1739
 * @param destinations The destinations for the graph.
1740
 * @param destination_size The size of the destinations array. 0 to use default destinations.
1741
 * @param graph_ref The pointer to store the generated concrete graph.
1742
 * @param tensor_arena_ref The pointer to store ccv_nnc_tensor_arena_t.
1743
 * @param graph_exec_arena_ref The pointer to store ccv_nnc_graph_exec_arena_t.
1744
 */
1745
void ccv_nnc_symbolic_graph_compile(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_symbolic_graph_compile_param_t compile_params, const ccv_nnc_tensor_bind_t* const tensor_binds, const int tensor_bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, ccv_nnc_graph_t** const graph_ref, ccv_nnc_tensor_arena_t** const tensor_arena_ref, ccv_nnc_graph_exec_arena_t** const graph_exec_arena_ref);
1746
/**
1747
 * Free the symbolic graph and its associated memory. Note that if you compiled a graph / tensor arena out of this symbolic graph, these won't be free'd.
1748
 * @param graph The symbolic graph.
1749
 */
1750
void ccv_nnc_symbolic_graph_free(ccv_nnc_symbolic_graph_t* const graph);
1751
/**
1752
 * Find corresponding tensor by a symbol from the tensor arena.
1753
 * @param tensor_arena The tensor arena object generated through compilation,
1754
 * @param symbol The tensor symbol reference. Because tensor symbol reference is on stack. It can still be used even the original symbolic graph is free'd.
1755
 * @return A concrete tensor from the tensor arena.
1756
 */
1757
CCV_WARN_UNUSED(ccv_nnc_tensor_t*) ccv_nnc_tensor_from_symbol(const ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_tensor_symbol_t symbol);
1758
/**
1759
 * Bind a tensor to a symbol. You still responsible to manage the life-time of the tensor to make sure it is not freed until everything is done.
1760
 * @param tensor_arena The tensor arena object generated through compilation.
1761
 * @param symbol The tensor symbol reference. Because tensor symbol reference is on stack. It can still be used even the original symbolic graph is free'd.
1762
 * @param tensor The new tensor to bind to.
1763
 */
1764
void ccv_nnc_tensor_bind_symbol(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_t* const tensor);
1765
/**
1766
 * Clear existing bindings on the tensor arena.
1767
 * @param tensor_arena The tensor arena object generated through compilation to clear bindings.
1768
 */
1769
void ccv_nnc_tensor_arena_clear_bindings(ccv_nnc_tensor_arena_t* const tensor_arena);
1770
/**
1771
 * Free the data buffer of the tensor arena.
1772
 * @param tensor_arena The tensor arena object generated through compilation.
1773
 */
1774
void ccv_nnc_tensor_arena_buffer_free(ccv_nnc_tensor_arena_t* const tensor_arena);
1775
/**
1776
 * Free the opaque tensor arena structure.
1777
 * @param tensor_arena The tensor arena object generated through compilation.
1778
 */
1779
void ccv_nnc_tensor_arena_free(ccv_nnc_tensor_arena_t* const tensor_arena);
1780
/**
1781
 * Find corresponding graph exec by a exec symbol from graph exec arena.
1782
 * @param graph_exec_arena The graph execution node arena object generated through compilation,
1783
 * @param symbol The execution node symbol reference. Because execution node symbol reference is on stack. It can still be used even the original symbolic graph is free'd.
1784
 * @return A execution node reference to the concrete graph.
1785
 */
1786
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_from_symbol(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena, const ccv_nnc_graph_exec_symbol_t symbol);
1787
/**
1788
 * Return the node that can drive all the source nodes from the compilation.
1789
 * @param graph_exec_arena The graph execution node arena object generated through compilation,
1790
 * @return A execution node reference that is the source.
1791
 */
1792
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_source(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena);
1793
/**
1794
 * Return the node that can drain all the destination nodes from the compilation.
1795
 * @param graph_exec_arena The graph execution node arena object generated through compilation,
1796
 * @return A execution node reference that is the destination.
1797
 */
1798
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_exec_destination(const ccv_nnc_graph_exec_arena_t* const graph_exec_arena);
1799
/**
1800
 * Free the opaque graph exec arena structure.
1801
 * @param graph_exec_arena The graph execution node arena object generated through compilation,
1802
 */
1803
void ccv_nnc_graph_exec_arena_free(ccv_nnc_graph_exec_arena_t* const graph_exec_arena);
1804
/**
1805
 * Write symbolic graph to disk, along with some binding tensors.
1806
 * @param graph The symbolic graph.
1807
 * @param tensor_binds The binding array (pair of tensor symbol and concrete tensor).
1808
 * @param tensor_bind_size The size of the binding array.
1809
 * @param fn The file name.
1810
 */
1811
void ccv_nnc_symbolic_graph_write(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_bind_t* const tensor_binds, const int tensor_bind_size, const char* const fn);
1812
/**
1813
 * Read symbolic graph from disk, with some binding tensors.
1814
 * @param fn The file name.
1815
 * @param graph_ref The pointer to store symbolic graph.
1816
 * @param tensor_binds_ref The pointer to store the binding array.
1817
 * @param tensor_bind_size_ref The pointer to store the size of the binding array.
1818
 */
1819
void ccv_nnc_symbolic_graph_read(const char* const fn, ccv_nnc_symbolic_graph_t** const graph_ref, ccv_nnc_tensor_bind_t** const tensor_binds_ref, int* const tensor_bind_size_ref);
1820
1821
/**
1822
 * The format callback function. Note that these are all integer ids. They can be filled to
1823
 * ccv_nnc_graph_exec_symbol_t.d or ccv_nnc_tensor_symbol_t.d.
1824
 * @param graph The symbolic graph.
1825
 * @param node The id for the node. It is unique in the graph.
1826
 * @param name The name for the node. It is either NULL or \0 terminated string.
1827
 * @param cmd The associated command for this node.
1828
 * @param flags The flag that help to identify if it is a sub-graph, which type it is (P_WHILE or CASE_OF)
1829
 * @param incomings The incoming nodes for execution.
1830
 * @param incoming_size The number of incoming nodes for execution.
1831
 * @param outgoings The outgoing nodes for execution.
1832
 * @param outgoing_size The number of outgoing nodes for execution.
1833
 * @param inputs The input tensor symbols.
1834
 * @param input_size The number of the input tensor symbols.
1835
 * @param outputs The output tensor symbols.
1836
 * @param output_size The number of the output tensor symbols.
1837
 * @param context The context passed through ccv_nnc_symbolic_graph_format.
1838
 */
1839
typedef void(*ccv_nnc_symbolic_graph_format_f)(const ccv_nnc_symbolic_graph_t* const graph, const int node, const char* const name, const ccv_nnc_cmd_t cmd, const int flags, const int* const incomings, const int incoming_size, const int* const outgoings, const int outgoing_size, const int* const inputs, const int input_size, const int* const outputs, const int output_size, void* const context);
1840
/**
1841
 * Provide a hook for upper level to do custom formatting of a given symbolic graph. You can
1842
 * implement logic to format the graph into protobuf, or json, or doing persistence. However, this
1843
 * is not the method for you to visit the graph, and do mutations on it. This function doesn't
1844
 * recurse into sub-graphs. You need to inspect each node to know if these are sub-graphs and
1845
 * handle accordingly.
1846
 * @param graph The symbolic graph.
1847
 * @param sources The sources for the graph.
1848
 * @param source_size The size of the sources array. 0 to use default sources.
1849
 * @param destinations The destinations for the graph.
1850
 * @param destination_size The size of the destinations array. 0 to use default destinations.
1851
 * @param format_fn The format callback to be called on every node.
1852
 * @param context The context that will be passed to the callback.
1853
 */
1854
void ccv_nnc_symbolic_graph_format(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context);
1855
1856
/** @} */
1857
1858
/**
1859
 * @defgroup level_3_others Others
1860
 * @{
1861
 */
1862
1863
/**
1864
 * Return the symbol it alias to.
1865
 * @param graph The symbolic graph.
1866
 * @param tensor_symbol The tensor symbol alias.
1867
 * @return A tensor symbol reference to the original tensor symbol. If this symbol has no reference, return NO_SYMBOL (.graph = 0)
1868
 */
1869
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_alias_to(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol);
1870
/**
1871
 * Set the tensor symbol parameters.
1872
 * @param graph The symbolic graph.
1873
 * @param tensor The tensor symbol reference.
1874
 * @param info The new tensor parameters.
1875
 * @return non-zero if encountered errors.
1876
 */
1877
int ccv_nnc_tensor_symbol_set(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, const ccv_nnc_tensor_param_t info);
1878
/**
1879
 * Get the parameters for a tensor symbol.
1880
 * @param graph The symbolic graph.
1881
 * @param tensor The tensor symbol reference.
1882
 * @return The tensor parameters.
1883
 */
1884
CCV_WARN_UNUSED(ccv_nnc_tensor_param_t) ccv_nnc_tensor_symbol_params(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor);
1885
/**
1886
 * Get the name for a tensor symbol.
1887
 * @param graph The symbolic graph.
1888
 * @param tensor The tensor symbol reference.
1889
 * @return The tensor name if available. Otherwise 0. The memory is managed by the graph.
1890
 */
1891
CCV_WARN_UNUSED(const char*) ccv_nnc_tensor_symbol_name(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor);
1892
/**
1893
 * Set the tensor symbol alias parameters.
1894
 * @param graph The symbolic graph.
1895
 * @param tensor The tensor symbol reference.
1896
 * @param ofs The offset on each of the dimension.
1897
 * @param stride The stride of each dimension.
1898
 * @return non-zero if it is not a tensor alias.
1899
 */
1900
int ccv_nnc_tensor_symbol_alias_set(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC]);
1901
/**
1902
 * Get the parameters for a tensor symbol.
1903
 * @param graph The symbolic graph.
1904
 * @param tensor The tensor symbol reference.
1905
 * @param ofs The offset on each of the dimension.
1906
 * @param stride The stride of each dimension.
1907
 * @return non-zero if it is not a tensor alias.
1908
 */
1909
int ccv_nnc_tensor_symbol_alias_params(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, int ofs[CCV_NNC_MAX_DIM_ALLOC], int stride[CCV_NNC_MAX_DIM_ALLOC]);
1910
/**
1911
 * Set the flags for this tensor symbol. The flags are only used for symbol, not for tensor.
1912
 * @param graph The symbolic graph.
1913
 * @param tensor The tensor symbol reference.
1914
 * @param flags A reserved field for flags.
1915
 */
1916
void ccv_nnc_tensor_symbol_set_flags(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor, const int flags);
1917
/**
1918
 * Get all the flags for a tensor.
1919
 * @param graph The symbolic graph.
1920
 * @param tensor The tensor symbol reference.
1921
 */
1922
CCV_WARN_UNUSED(int) ccv_nnc_tensor_symbol_flags(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor);
1923
/**
1924
 * Set the cmd of this exec symbol.
1925
 * @param graph The symbolic graph.
1926
 * @param exec The execution node symbol reference.
1927
 * @param cmd The new wrapped command.
1928
 */
1929
void ccv_nnc_graph_exec_symbol_set(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const ccv_nnc_cmd_t cmd);
1930
/**
1931
 * Set the flags for this exec symbol. The flags are only used for symbol. We can only set higher 16-bit.
1932
 * @param graph The symbolic graph.
1933
 * @param exec The execution node symbol reference.
1934
 * @param flags A reserved field for flags.
1935
 */
1936
void ccv_nnc_graph_exec_symbol_set_flags(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const int flags);
1937
/**
1938
 * Get the flags for a tensor. We can only retrieve the higher 16-bit.
1939
 * @param graph The symbolic graph.
1940
 * @param exec The execution node symbol reference.
1941
 */
1942
CCV_WARN_UNUSED(int) ccv_nnc_graph_exec_symbol_flags(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec);
1943
/**
1944
 * Return the command on this exec symbol.
1945
 * @param graph The symbolic graph.
1946
 * @param exec The execution node symbol reference.
1947
 * @return The wrapped command.
1948
 */
1949
CCV_WARN_UNUSED(ccv_nnc_cmd_t) ccv_nnc_graph_exec_symbol_cmd(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec);
1950
/**
1951
 * Return the command on this exec symbol.
1952
 * @param graph The symbolic graph.
1953
 * @param exec The execution node symbol reference.
1954
 * @return The name for the exec symbol if available. The memory is managed by the graph.
1955
 */
1956
CCV_WARN_UNUSED(const char*) ccv_nnc_graph_exec_symbol_name(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec);
1957
/**
1958
 * Set the inputs / outputs for a exec symbol.
1959
 * @param graph The symbolic graph.
1960
 * @param exec The execution node symbol reference.
1961
 * @param inputs The input tensor symbols array.
1962
 * @param input_size The size of input tensor symbols array.
1963
 * @param outputs The output tensor symbols array.
1964
 * @param output_size The size of output tensor symbols array.
1965
 */
1966
void ccv_nnc_graph_exec_symbol_set_io(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size);
1967
/**
1968
 * Manually concatenate input node with an output graph node.
1969
 * @param graph The symbolic graph.
1970
 * @param source The source execution node symbol to connect.
1971
 * @param destination The destination execution node symbol connect to.
1972
 * @return non-zero if cannot concat successfully.
1973
 */
1974
int ccv_nnc_graph_exec_symbol_concat(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t source, const ccv_nnc_graph_exec_symbol_t destination);
1975
/**
1976
 * Manually disconnect input node with an output graph node for this graph.
1977
 * @param graph The symbolic graph.
1978
 * @param source The source execution node symbol to disconnect.
1979
 * @param destination The destination execution node symbol disconnect to.
1980
 * @return non-zero if cannot disjoin successfully.
1981
 */
1982
int ccv_nnc_graph_exec_symbol_disjoin(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t source, const ccv_nnc_graph_exec_symbol_t destination);
1983
/**
1984
 * Number of exec symbols.
1985
 * @param graph The symbolic graph.
1986
 */
1987
CCV_WARN_UNUSED(int) ccv_nnc_graph_exec_symbol_count(const ccv_nnc_symbolic_graph_t* const graph);
1988
/**
1989
 * Number of active exec symbols.
1990
 * @param graph The symbolic graph.
1991
 * @param type The type of op, can be CCV_NNC_SYMBOL_TENSOR, CCV_NNC_SYMBOL_GRAPH_EXEC (will error out on CCV_NNC_SYMBOL_TENSOR_ALIAS)
1992
 */
1993
CCV_WARN_UNUSED(int) ccv_nnc_symbolic_graph_active_symbol_count(const ccv_nnc_symbolic_graph_t* const graph, const int type);
1994
/**
1995
 * Substitution function. Given an execution node symbol and a command, return a new command.
1996
 */
1997
typedef ccv_nnc_cmd_t(*ccv_nnc_symbolic_graph_subst_f)(const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd);
1998
/**
1999
 * Generate a duplicate of the provided graph.
2000
 * While generating the duplicate, it calls the function pointer to re-process the node type.
2001
 * @param graph The symbolic graph.
2002
 * @param subst The substitution function.
2003
 * @return The duplicated symbolic graph.
2004
 */
2005
CCV_WARN_UNUSED(ccv_nnc_symbolic_graph_t*) ccv_nnc_symbolic_graph_dup(const ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_symbolic_graph_subst_f subst);
2006
/**
2007
 * Number of tensor symbols.
2008
 * @param graph The symbolic graph.
2009
 */
2010
CCV_WARN_UNUSED(int) ccv_nnc_tensor_symbol_count(const ccv_nnc_symbolic_graph_t* const graph);
2011
/**
2012
 * Compute all the tensor shapes within this graph.
2013
 * @param graph The symbolic graph.
2014
 * @param sources The sources for the graph.
2015
 * @param source_size The size of the sources array. 0 to use default sources.
2016
 * @param destinations The destinations for the graph.
2017
 * @param destination_size The size of the destinations array. 0 to use default destinations.
2018
 */
2019
void ccv_nnc_symbolic_graph_tensor_auto(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
2020
/**
2021
 * For a given tensor symbol, this method resolves to its local reference inside the given graph.
2022
 * This is related to the sub-graph of symbolic graphs. A tensor symbol in the sub-graph can still have a
2023
 * representation in the parent graph. This method used to find the local reference in any graph.
2024
 * @param graph The symbolic graph.
2025
 * @param tensor_symbol The tensor symbol we want to resolve.
2026
 * @return A tensor symbol reference in the given graph.
2027
 */
2028
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_resolve(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol);
2029
/**
2030
 * Pass graph's tensor symbol into its sub graph. We will make the connection that the source tensor
2031
 * symbol in the source symbolic graph is the destination tensor symbol in the destination symbolic graph.
2032
 * The reason to do this inference is because a tensor symbol is local to a symbolic graph under the hood.
2033
 * Although you can use tensor symbols from different graphs directly (it calls this method or the resolve
2034
 * method above when create an execution node symbol), sometimes you need this method to do it manually.
2035
 * @param src_graph The source symbolic graph.
2036
 * @param dest_graph The destination symbolic graph.
2037
 * @param src_tensor_symbol The tensor symbol we want to resolve.
2038
 * @param dest_tensor_symbol The tensor symbol we want to resolve.
2039
 */
2040
void ccv_nnc_tensor_symbol_hookup(ccv_nnc_symbolic_graph_t* const src_graph, ccv_nnc_symbolic_graph_t* const dest_graph, const ccv_nnc_tensor_symbol_t src_tensor_symbol, const ccv_nnc_tensor_symbol_t dest_tensor_symbol);
2041
/**
2042
 * Set bypasses for a tensor symbol.
2043
 * For case..of graphs, if the condition doesn't meet, we will skip the execution of a sub-graph.
2044
 * However, in that case, we cannot express easily which output tensor corresponds to which input tensor.
2045
 * This methods provides the way.
2046
 * @param graph The symbolic graph.
2047
 * @param symbol_map The pair of tensors array, source is the input tensor, destination is the output tensor.
2048
 * @param symbol_map_size The size of the tensor pairs array.
2049
 */
2050
void ccv_nnc_tensor_symbol_set_bypasses(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_map_t* const symbol_map, const int symbol_map_size);
2051
/**
2052
 * Fetch input / output for an exec symbol. For efficiency consideration, this returns pointer directly.
2053
 * @param graph The symbolic graph.
2054
 * @param symbol The execution node symbol reference.
2055
 * @param inputs The pointer to store input tensor symbols array.
2056
 * @param input_size The pointer to store the size of input tensor symbols array.
2057
 * @param outputs The pointer to store output tensor symbols array.
2058
 * @param output_size The pointer to store the size of output tensor symbols array.
2059
 */
2060
void ccv_nnc_graph_exec_symbol_io(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, const int** const inputs, int* const input_size, const int** const outputs, int* const output_size);
2061
/**
2062
 * Replace a input / output tensor symbol on an exec symbol.
2063
 * @param graph The symbolic graph.
2064
 * @param symbol The execution node symbol reference.
2065
 * @param old_symbol The old tensor symbol to be replaced.
2066
 * @param new_symbol The new tensor symbol on input / output.
2067
 */
2068
void ccv_nnc_graph_exec_symbol_replace_io(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_tensor_symbol_t old_symbol, const ccv_nnc_tensor_symbol_t new_symbol);
2069
/**
2070
 * Which exec symbol this is connected to. For efficiency consideration, this returns pointer directly.
2071
 * @param graph The symbolic graph.
2072
 * @param symbol The execution node symbol reference.
2073
 * @param tos The pointer to store outgoing indexes of the execution nodes.
2074
 * @param to_size the pointer to store the number of outgoing indexes.
2075
 */
2076
void ccv_nnc_graph_exec_symbol_to(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, const int** const tos, int* const to_size);
2077
/**
2078
 * Find the size allocated on the opaque tensor arena structure.
2079
 * @param tensor_arena The tensor arena object generated through compilation.
2080
 * @return The total allocated size in bytes.
2081
 */
2082
CCV_WARN_UNUSED(uint64_t) ccv_nnc_tensor_arena_size(const ccv_nnc_tensor_arena_t* const tensor_arena);
2083
/**
2084
 * Query whether a set of sources are the ancestors to a set of destination nodes.
2085
 * @param graph The symbolic graph.
2086
 * @param sources The exec sources to check whether they can reach some of the destinations.
2087
 * @param source_size How many sources in the source list.
2088
 * @param destinations The exec destinations to check whether sources can reach.
2089
 * @param destination_size How many destinations in the destination list.
2090
 * @param bitmask Bit return value, each bit represents a source, and 1 meant it can reach some of the destinations.
2091
 */
2092
void ccv_nnc_symbolic_graph_sources_to_destinations(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, uint64_t* const bitmask);
2093
/**
2094
 * Re-init the tensor arena with updated symbolic graph. This won't work if the symbolic graph requires
2095
 * larger tensors than what's available. Use this method properly, you can avoid re-compile a graph
2096
 * just because some tensor shape changed.
2097
 * @param tensor_arena The tensor arena object generated through compilation.
2098
 * @param graph The updated symbolic graph with different tensor shape.
2099
 * @return 0 if successful, -1 if the tensor arena doesn't have enough space to just re-init.
2100
 */
2101
int ccv_nnc_tensor_arena_reinit(ccv_nnc_tensor_arena_t* const tensor_arena, const ccv_nnc_symbolic_graph_t* const graph);
2102
/**
2103
 * Re-init the graph exec arena with updated symbolic graph. This updated some hyper-parameters of
2104
 * executions to match the updated symbolic graph. Note that this will try to keep the backend / algorithm
2105
 * selection from previous graph if possible (meaning if the command still match).
2106
 * @param graph_exec_arena The graph exec arena object provided mapping between symbolic and concrete graph.
2107
 * @param graph The concrete graph generated through compile method.
2108
 * @param symbolic_graph The updated symbolic graph.
2109
 */
2110
void ccv_nnc_graph_exec_reinit(ccv_nnc_graph_exec_arena_t* const graph_exec_arena, ccv_nnc_graph_t* const graph, const ccv_nnc_symbolic_graph_t* const symbolic_graph);
2111
/**
2112
 * Function prototype for tensor symbol creation callback.
2113
 */
2114
typedef void(*ccv_nnc_tensor_symbol_new_hook_f)(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_param_t info, const char* const name);
2115
/**
2116
 * Hook into the call to ccv_nnc_tensor_symbol_new, return previous provided context if call into this method.
2117
 * @param graph The symbolic graph.
2118
 * @param hook The function to be called if a new tensor symbol created.
2119
 * @param context The context associated with the callback function.
2120
 * @param previous_hook Return the previous hook if provided.
2121
 * @return The previous context associated with the previous hook function.
2122
 */
2123
void* ccv_nnc_tensor_symbol_new_hook(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_tensor_symbol_new_hook_f hook, void* context, ccv_nnc_tensor_symbol_new_hook_f* previous_hook);
2124
/**
2125
 * Function prototype for tensor symbol alias creation callback.
2126
 */
2127
typedef void(*ccv_nnc_tensor_symbol_alias_new_hook_f)(void* context, const ccv_nnc_tensor_symbol_t symbol, const ccv_nnc_tensor_symbol_t from_symbol, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info, const char* const name);
2128
/**
2129
 * Hook into the call to ccv_nnc_tensor_symbol_alias_new, return previous provided context if call into this method.
2130
 * @param graph The symbolic graph.
2131
 * @param hook The function to be called if a new tensor symbol alias created.
2132
 * @param context The context associated with the callback function.
2133
 * @param previous_hook The function to be called if a new tensor symbol alias created.
2134
 * @return The previous context associated with the previous hook function.
2135
 */
2136
void* ccv_nnc_tensor_symbol_alias_new_hook(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_tensor_symbol_alias_new_hook_f hook, void* context, ccv_nnc_tensor_symbol_alias_new_hook_f* previous_hook);
2137
/**
2138
 * Set the pair reference for tensor symbols. Peer reference for tensor symbols has very specific meanings.
2139
 * For a backward pass involves sub-graphs. The commands in the sub-graph could reference to tensor symbols of
2140
 * a different graph (its forward pass graph). That is not allowed (two graph has no ancestral relationship
2141
 * cannot share a tensor symbol). So we create a new tensor symbol, but set the pair reference.
2142
 * @param graph The symbolic graph.
2143
 * @param tensor_symbol The tensor symbol in the current graph.
2144
 * @param pair_tensor_symbol The tensor symbol in the pair graph.
2145
 */
2146
void ccv_nnc_tensor_symbol_pair_with(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t tensor_symbol, const ccv_nnc_tensor_symbol_t pair_tensor_symbol);
2147
/**
2148
 * Function prototype for execution node symbol creation callback.
2149
 */
2150
typedef void(*ccv_nnc_graph_exec_symbol_new_hook_f)(void* context, const ccv_nnc_graph_exec_symbol_t symbol, const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const char* const name);
2151
/**
2152
 * Hook into the call to ccv_nnc_graph_exec_symbol_new, return previous provided context if call into this method.
2153
 * @param graph The symbolic graph.
2154
 * @param hook The function to be called if a new execution node symbol created.
2155
 * @param context The context associated with the callback function.
2156
 * @param previous_hook The previous hook function associated with this operation.
2157
 * @return The previous context associated with the previous hook function.
2158
 */
2159
void* ccv_nnc_graph_exec_symbol_new_hook(ccv_nnc_symbolic_graph_t* const graph, ccv_nnc_graph_exec_symbol_new_hook_f hook, void* context, ccv_nnc_graph_exec_symbol_new_hook_f* previous_hook);
2160
/**
2161
 * Set the pair reference for exec. This is very similar to the one for concrete graph. A pair reference
2162
 * of a backward pass execution node is its forward pass counterpart.
2163
 * @param graph The symbolic graph.
2164
 * @param exec_symbol The execution node symbol in the current graph.
2165
 * @param pair_exec_symbol The pairing execution node symbol.
2166
 */
2167
void ccv_nnc_graph_exec_symbol_pair_with(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec_symbol, const ccv_nnc_graph_exec_symbol_t pair_exec_symbol);
2168
2169
/** @} */
2170
2171
/** @} */
2172
2173
/**
2174
 * @defgroup level_3_5 Level-3.5 API
2175
 * @{
2176
 */
2177
2178
/**
2179
 * @defgroup level_3_5_autograd Automatic Differentiation
2180
 * @{
2181
 */
2182
2183
/**
2184
 * Compute the backward graph, assuming the provided symbolic graph only contain the "forward" part from sources to destinations.
2185
 * This effectively is called the "autograd" or automatic differentiation process (specifically, "reverse AD") in other libs.
2186
 * For a expression y = f(x), to compute dx, x is the wrt_symbol, y is the f_symbol.
2187
 * @param graph The symbolic graph.
2188
 * @param f_symbols The tensor symbols array of the result (or loss).
2189
 * @param f_symbol_size The size of the f symbols array.
2190
 * @param wrt_symbols The tensor symbols array of the inputs.
2191
 * @param wrt_symbol_size The size of the wrt symbols array.
2192
 * @param sources The source execution nodes array for the computation.
2193
 * @param source_size The size of the source nodes array.
2194
 * @param destinations The destination execution nodes array for the computation.
2195
 * @param destination_size The size of the destination nodes array.
2196
 */
2197
void ccv_nnc_symbolic_graph_backward(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const f_symbols, const int f_symbol_size, const ccv_nnc_tensor_symbol_t* const wrt_symbols, const int wrt_symbol_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
2198
/**
2199
 * Get the symbol that contains the gradient. The list will be flushed if the ccv_nnc_symbolic_graph_backward function is called again.
2200
 * @param graph The symbolic graph.
2201
 * @param symbol The tensor symbol we want to retrieve its gradient (must be one of the wrt symbols or the f symbols).
2202
 * @return A tensor symbol that represents the gradient.
2203
 */
2204
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_for_backward(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol);
2205
/**
2206
 * Get the execution node symbol for a tensor symbol. This used to retrieve the execution node for a gradient tensor symbol.
2207
 * @param graph The symbolic graph.
2208
 * @param symbol The tensor symbol that represents the gradient (must be one of the wrt symbols).
2209
 * @return A execution node symbol that generates the gradient.
2210
 */
2211
CCV_WARN_UNUSED(ccv_nnc_graph_exec_symbol_t) ccv_nnc_graph_exec_symbol_for_backward(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol);
2212
2213
/** @} */
2214
2215
/**
2216
 * @defgroup level_3_5_while While Loop
2217
 * @{
2218
 */
2219
2220
/**
2221
 * @page symbolic_while Construct a "while" loop in a symbolic graph
2222
 *
2223
 * (This document was written in 2016, since then, Caffe2 added support for While loop (as sub-graph), similar
2224
 * implementation added for ONNX as well.)
2225
 *
2226
 * In NNC, a computation graph cannot allow cycles. Thus, there is no flexible way to express loops.
2227
 *
2228
 * A little survey on this problem:
2229
 *
2230
 * * Caffe2 supports specific type of recurrent neural network.
2231
 *
2232
 * * TensorFlow as it stands, supports while construct. Its while construct is very straightforward, a body and
2233
 *   a condition is provided, you can construct whatever graph as you want.
2234
 *
2235
 * * mxnet supports recurrent neural network by unrolling it into normal none-looped graph.
2236
 *
2237
 * * Theano supports "scan" ops, which is a terminable loop (with loop variant, known as sequence).
2238
 *
2239
 * * CNTK supports this with custom BrainScript. Within BrainScript, you can access the previous state in a
2240
 *   function, therefore, effectively supports calling a method multiple times (looping over).
2241
 *
2242
 * Of above, Caffe2 and mxnet gave up on supporting generic loop for performance reasons. TensorFlow supports
2243
 * generic while loop, with all the trouble it may introduce (see the Nested while loop bug in TensorFlow that
2244
 * recently fixed). Theano picked a point seems pretty sweet, although there are limitations. CNTK's BrainScript
2245
 * is a DSL, they can do whatever they want with the drawback now that they need to implement a language runtime.
2246
 * TensorFlow, Theano and CNTK all support auto-differentiation over the while loop with tape (Wengert list).
2247
 *
2248
 * A simple way to support loop is to support conditional jump. In fact, conditional jump is a more generic way
2249
 * of doing loops. However, if you put this into the consideration that fully differentiable computation graph
2250
 * wanna to be supported, it is terrible. With conditional jump, it is really hard for you to know which tensor
2251
 * is used where, thus keep track for reverse accumulation (backward propagation). There is no counter or
2252
 * whatsoever, it is pretty hard to trace back on which line is executed how many times. Compounding this with
2253
 * NNC's promise that as long as it shows on the graph can be "parallel" computed, it will be parallel computed,
2254
 * it is close to impossible to track if conditional jump used in its raw form. Certain restrictions must be
2255
 * applied to how to do the loop. The compromise comes from closer examination of NNC's preferences.
2256
 *
2257
 * NNC prefers to have the graph without cycles. It also prefers to be fully differentiable. Another important
2258
 * criteria is that most functions in NNC require SSA (Static Single Assignment) representation. With these in
2259
 * mind, supporting while loop has to be strict.
2260
 *
2261
 * Luckily, there are well-formalized way of supporting this in literature and practice. Because it is
2262
 * well-formalized, translating this into existing NNC implementation is actually pretty straightforward. We
2263
 * are going to introduce a special version of while loop. In literature that discussed about SSA, it may be
2264
 * called parameterized loop. For us, it works like this:
2265
 *
2266
 * To construct a while loop for existing NNC graph, you need to be able to separate the existing graph into
2267
 * two sub-graphs.
2268
 *
2269
 * The while-loop sub-graph (WL sub-graph) contains a set of incoming nodes (I-nodes), Condition false output
2270
 * nodes (CFO-nodes) and end nodes (E-nodes). Each set have its own properties, but in short, all incoming edges
2271
 * to the WL sub-graph connect to one of the I-nodes, but nothing else. All outgoing edges from the WL sub-graph
2272
 * connect to one of the CFO-nodes, but nothing else. A nodes can be either a I-node, CFO-node or E-node,
2273
 * non-exclusively.
2274
 *
2275
 * There are also 3 types of tensors used for all nodes in WL sub-graph: Input tensors (I-tensors) are tensors
2276
 * that are inputs to some nodes, and will never be outputs. Output tensors (O-tensors) are tensors that are
2277
 * outputs from some nodes, but never be inputs to any nodes. I-tensors can be outputs from some nodes that
2278
 * outside of WL sub-graph. O-tensors can be inputs to some nodes that outside of WL sub-graph. Internal
2279
 * tensors (IN-tensors) are not visible outside of WL sub-graph, therefore, they can be both inputs and outputs
2280
 * of some nodes inside the sub-graph. Some tensors can be feedback into the WL sub-graph, given either
2281
 * O-tensors or IN-tensors. A parameter map can be given in these cases to describe which maps to what.
2282
 *
2283
 * The way to drive a WL sub-graph like this: the WL sub-graph runs until all CFO-nodes are reached. At this
2284
 * point, the while_f condition is checked. If true, we continue until all the end-nodes are reached. At this
2285
 * point, we increase the counter, reconfigure the WL sub-graph with parameter map, and run from I-nodes all
2286
 * over again. When reached all CFO-nodes, the condition is checked again, if false, WL sub-graph terminates,
2287
 * and the graph continues from the nodes that are pointed by CFO-nodes.
2288
 *
2289
 * Given these constraints, doing automatic differentiation is not that hard any more. A WL sub-graph, from
2290
 * the whole graph's point of view, is just a giant command supports both forward / backward operations, with
2291
 * some extra information passed around in the form of userdata (tape).
2292
 *
2293
 * For WL sub-graph, we can continue to leverage the compile / backward function that already written for
2294
 * symbolic graph as well.
2295
 *
2296
 * For compile function, we just need to take care of parameter maps (these need to be converted into binded
2297
 * tensors).
2298
 *
2299
 * For backward function, we need to convert parameter maps from assigner (thus, y = x) to accumulator (x += y).
2300
 *
2301
 * This function will replace the nodes that it affects to one sub-graph node. Thus, how to drive this
2302
 * sub-graph is opaque. Its backward form is opaque as well.
2303
 *
2304
 * There are no connection between its nodes and the outside graph nodes other than the three sets:
2305
 *
2306
 * 1. Incoming nodes, the set of nodes that contains the incoming edges from outside, they cannot have edges
2307
 *    points by inside nodes. The sub-graph computation starts from these incoming nodes;
2308
 *
2309
 * 2. Condition false output nodes, when condition is false, we will break out of this while loop, these
2310
 *    nodes pointing to the outside nodes, but no inside nodes;
2311
 *
2312
 * 3. End nodes, the set of nodes that marks the end of the while body, and after these nodes are executed,
2313
 *    we will return to the incoming nodes. These end nodes shouldn't have any edges pointing to inside nodes
2314
 *    (OK if end nodes are condition true output nodes as well);
2315
 *
2316
 * Since these will become a sub-graph (which, to its owner graph, just simple "node"), it will have inputs
2317
 * and outputs. Besides that, the loop body needs to be parameterized to be SSA compliant (see:
2318
 * https://www.cs.cmu.edu/~fp/courses/15411-f13/lectures/06-ssa.pdf). Thus, a list of body parameters need to
2319
 * be provided.
2320
 */
2321
2322
/**
2323
 * @defgroup level_3_5_while_essentials While Loop Essentials
2324
 * @{
2325
 */
2326
2327
/**
2328
 * The given tensors contains all the common / input / output tensors specified in the sub-graph.
2329
 */
2330
typedef int(*ccv_nnc_graph_while_f)(ccv_nnc_tensor_t* const* const inputs, const int input_size, const void* const data);
2331
/**
2332
 * Create a tensor tape that can be used to record for while loop or case..of.
2333
 * @return A ccv_nnc_tensor_tape_t pointer.
2334
 */
2335
CCV_WARN_UNUSED(ccv_nnc_tensor_tape_t*) ccv_nnc_tensor_tape_new(void);
2336
/**
2337
 * Deallocate the tensor tape and all the memory it allocated.
2338
 * @param tape The tensor tape object.
2339
 */
2340
void ccv_nnc_tensor_tape_free(ccv_nnc_tensor_tape_t* const tape);
2341
/**
2342
 * The API to operate on the symbolic graph is more involved than the concrete graph for while loops.
2343
 * The reason is because symbolic graph operates in SSA form (static single assignment), therefore, the while
2344
 * loops for the symbolic graph has to be parameterized.
2345
 * @param graph The symbolic graph.
2346
 * @param cmd The command idenfitier, can be either CCV_NNC_GRAPH_FORWARD or CCV_NNC_GRAPH_BACKWARD
2347
 * @param while_graph The sub-graph to run the while loop.
2348
 * @param name The name of the while loop. Optional.
2349
 * @return A while loop execution symbol (backed by a sub-graph) of the giving graph.
2350
 */
2351
ccv_nnc_graph_exec_symbol_t ccv_nnc_symbolic_graph_while(ccv_nnc_symbolic_graph_t* const graph, const uint32_t cmd, ccv_nnc_symbolic_graph_t* const while_graph, const char* const name);
2352
/**
2353
 * Set the expression to be evaluated, and at which nodes to be evaluated.
2354
 * @param while_graph The symbolic graph that will run the while loop.
2355
 * @param while_expr The function pointer to the expression.
2356
 * @param while_data A custom data provided to the expression evaluation function.
2357
 * @param inputs The input tensor symbols array to the expression evaluation function.
2358
 * @param input_size The size of the input tensor symbols array.
2359
 * @param breakpoints The execution node symbols at which the while loop will pause, evaluate the expression, and choose to either break out or continue.
2360
 * @param breakpoint_size The size of the execution node symbols array.
2361
 */
2362
void ccv_nnc_symbolic_graph_set_while_expr(ccv_nnc_symbolic_graph_t* const while_graph, const ccv_nnc_graph_while_f while_expr, const void* const while_data, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_graph_exec_symbol_t* const breakpoints, const int breakpoint_size);
2363
/**
2364
 * Set the loop carry parameters when reuse. (parameterized loop, these will be carried over to the next loop).
2365
 * @param while_graph The symbolic graph that will run the while loop.
2366
 * @param symbol_map A pair of tensor symbols array, where the source tensor symbol is the output tensor symbol in this loop, the destination tensor symbol is the input tensor symbol in the next loop.
2367
 * @param symbol_map_size The size of the symbol map array.
2368
 */
2369
void ccv_nnc_symbolic_graph_set_carry_overs(ccv_nnc_symbolic_graph_t* const while_graph, const ccv_nnc_tensor_symbol_map_t* const symbol_map, const int symbol_map_size);
2370
/**
2371
 * Retrieve the special (magical) tensor symbol that retains the while loop counter (thus, dimension of 1x1x1, CCV_64S type).
2372
 * @param while_graph The symbolic graph that will run the while loop.
2373
 * @return A tensor symbol represents the implicit loop count.
2374
 */
2375
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_for_while_count(const ccv_nnc_symbolic_graph_t* const while_graph);
2376
/**
2377
 * Extract the sub-graph of the while loop from a symbol.
2378
 * @param graph The symbolic graph.
2379
 * @param while_symbol The execution node symbol.
2380
 * @return The sub-graph that represents a while loop.
2381
 */
2382
CCV_WARN_UNUSED(ccv_nnc_symbolic_graph_t*) ccv_nnc_symbolic_graph_from_while_symbol(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t while_symbol);
2383
/**
2384
 * Constructing looped concrete graph. Note that this interface is a little bit simpler than the one for symbolic
2385
 * graph. The reason is that a concrete graph operates on allocated tensors, thus, there is no mapping of tensor
2386
 * symbols between the parent graph and the while graph. (The reason to have a mapping in symbolic graphs is to
2387
 * constraint the variable leaking between the sub graph and parent graph).
2388
 * @param graph The concrete graph.
2389
 * @param cmd The command idenfitier, can be either CCV_NNC_GRAPH_FORWARD or CCV_NNC_GRAPH_BACKWARD
2390
 * @param while_graph The sub-graph to run the while loop.
2391
 * @return A execution node that represents the sub-graph.
2392
 */
2393
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_while(ccv_nnc_graph_t* const graph, const uint32_t cmd, ccv_nnc_graph_t* const while_graph);
2394
/**
2395
 * Set the evaluated expression for the while loop. The while loop will break out if the expression evaluates to 0.
2396
 * @param while_graph The concrete graph that will run the while loop.
2397
 * @param while_expr The function pointer to the expression.
2398
 * @param while_data A custom data provided to the expression evaluation function.
2399
 * @param inputs The input tensors array to the expression evaluation function.
2400
 * @param input_size The size of the input tensors array.
2401
 * @param breakpoints The execution nodes at which the while loop will pause, evaluate the expression, and choose to either break out or continue.
2402
 * @param breakpoint_size The size of the execution nodes array.
2403
 */
2404
void ccv_nnc_graph_set_while_expr(ccv_nnc_graph_t* const while_graph, const ccv_nnc_graph_while_f while_expr, const void* const while_data, ccv_nnc_tensor_t* const* const inputs, const int input_size, const ccv_nnc_graph_exec_t* const breakpoints, const int breakpoint_size);
2405
/**
2406
 * Get the special tensor for the while loop count. It contains one uint64_t value. We keep an implicit count
2407
 * when evaluate the while loop and you can access it with this tensor.
2408
 * @param while_graph The concrete graph that will run the while loop.
2409
 * @return A special tensor that you can retrieve the loop count at .data.i64[0].
2410
 */
2411
CCV_WARN_UNUSED(ccv_nnc_tensor_t) ccv_nnc_tensor_for_while_count(const ccv_nnc_graph_t* const while_graph);
2412
/**
2413
 * Retrieve the sub-graph from a execution node.
2414
 * @param graph The concrete graph.
2415
 * @param exec The execution node represents the sub-graph.
2416
 * @return The sub-graph.
2417
 */
2418
CCV_WARN_UNUSED(ccv_nnc_graph_t*) ccv_nnc_graph_from_while_exec(const ccv_nnc_graph_t* const graph, ccv_nnc_graph_exec_t exec);
2419
2420
/** @} */
2421
2422
/**
2423
 * @defgroup level_3_5_while_others While Loop Others
2424
 * @{
2425
 */
2426
2427
/**
2428
 * For a given tape on a given graph, update the input / output tensors so new version will be created (if needed).
2429
 * @param tape The tensor tape object.
2430
 * @param graph The concrete graph this tensor tape is executing in.
2431
 * @param input_flags The flags associated with input tensors.
2432
 * @param inputs The input tensors.
2433
 * @param input_size The size of input tensors array.
2434
 * @param output_flags The flags associated with output tensors.
2435
 * @param outputs The output tensors.
2436
 * @param output_size The size of output tensors array.
2437
 */
2438
void ccv_nnc_tensor_tape_io(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph, const int* const input_flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, const int* const output_flags, ccv_nnc_tensor_t* const* const outputs, const int output_size);
2439
/**
2440
 * Retrieve the number we associated with the execution node that recorded on the tape for a particular run of the graph.
2441
 * @param tape The tensor tape object.
2442
 * @param graph The concrete graph this tensor tape is executing in.
2443
 * @param exec The execution node.
2444
 * @return The number associated with the execution node.
2445
 */
2446
uint64_t ccv_nnc_tensor_tape_numbering(ccv_nnc_tensor_tape_t* const tape, const ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec);
2447
/**
2448
 * Set the number we associated with the execution node that recorded on the tape for a particular run of the graph.
2449
 * @param tape The tensor tape object.
2450
 * @param graph The concrete graph this tensor tape is executing in.
2451
 * @param exec The execution node.
2452
 * @param numbering The number associated with the execution node.
2453
 */
2454
void ccv_nnc_tensor_tape_set_numbering(ccv_nnc_tensor_tape_t* const tape, ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, const uint64_t numbering);
2455
/**
2456
 * Augmented tensor to run a graph with while loop (An obvious example is dynamic RNN).
2457
 */
2458
typedef struct ccv_nnc_tensor_multiview_s {
2459
  // This is an augmented ccv_nnc_tensor_view_t
2460
  // Namely, it can point to multiple versions of tensors.
2461
  int type; // This type is CCV_NNC_TENSOR_MULTI_VIEW
2462
  // kind specified how the multi-version tensors stored.
2463
  // See the comment on the follow up enums.
2464
  uint8_t kind;
2465
  uint16_t repeat;
2466
  intptr_t anchor; // on which graph this multi-view tensor is wrapped. This helps to determine on which level the multi-view tensor should be unwrapped.
2467
  // If this tensor points to a tensor view, data.u8 - offset is the real pointer start.
2468
  off_t offset;
2469
  struct ccv_nnc_tensor_multiview_s* p; // If this is wrapped with another multiview tensor. Get to the parent one.
2470
  ccv_nnc_tensor_t* it; // Current tensor (tensor in use), this is updated along with the graph computation.
2471
  // This is useful because by just traverse tv, I can get the latest up-to-date reference to this multi-view tensor.
2472
  ccv_array_t* sp; // Synchronized tensor views. This corresponds to ccv_nnc_tensor_synchronize_to_multiview method, that records all the tensors registered for updates.
2473
  ccv_nnc_tensor_t* _inline_data[4];
2474
  ccv_nnc_tensor_t** _heap_data;
2475
} ccv_nnc_tensor_multiview_t;
2476
3.40k
#define CCV_NNC_MULTIVIEW_DATA(x) ((x)->_heap_data ? 
(x)->_heap_data0
: (x)->_inline_data)
2477
234
#define CCV_NNC_MULTIVIEW_PHI (intptr_t)0x1 /**< Denote this is a phi multi-view tensor. */
2478
2479
enum {
2480
  CCV_NNC_MULTIVIEW_K0N = 0, /**< All of them are repeated. */
2481
  CCV_NNC_MULTIVIEW_K1N = 1, /**< The first one is the first, the second one starts to repeat. (0111111...) */
2482
};
2483
#define CCV_NNC_MULTIVIEW_K01(x) ((x)->kind == CCV_NNC_MULTIVIEW_K0N && (x)->repeat == 1)
2484
/**
2485
 * Setup a tensor multiview with a given set of tensors.
2486
 * A multiview tensor point to a list of tensors, and its access depends on the loop count.
2487
 * For example, if we have a multiview tensor with list of [a, b, c, d], and kind is 1N, repeat is 3.
2488
 * For loop count 0, 1, 2, 3, 4, 5, 6, the corresponding tensors used will be a, b, c, d, b, c. If kind
2489
 * is 0N, and repeat is 4, it will be a, b, c, d, a, b.
2490
 * @param data[] The pointer to the list of tensors the multiview object can point to.
2491
 * @param kind Can be either CCV_NNC_MULTIVIEW_K0N or CCV_NNC_MULTIVIEW_K1N, basically whether to keep the initial tensor.
2492
 * @param repeat The length of the repeat.
2493
 * @param graph Which graph this multiview object attaches to.
2494
 * @param tensor_multiview The tensor multiview object to be updated.
2495
 */
2496
void ccv_nnc_tensor_multiview(ccv_nnc_tensor_t* data[], const uint8_t kind, const uint16_t repeat, const ccv_nnc_graph_t* const graph, ccv_nnc_tensor_multiview_t* const tensor_multiview);
2497
/**
2498
 * Since tensor_multiview will never be allocated with *_new method, the *_free method simply frees anything that is dynamically allocated afterwards (such as the reference items).
2499
 * @param tensor_multiview The tensor multiview object to be deallocated.
2500
 */
2501
void ccv_nnc_tensor_multiview_free(const ccv_nnc_tensor_multiview_t tensor_multiview);
2502
/**
2503
 * Setup a tensor as a reference to a tensor multiview, thus, when tensor multiview's tu (current tensor) updates, the tensor reference's data.u8 will get update as well (point to the same memory region as the tu).
2504
 * @param tensor_multiview The tensor multiview object.
2505
 * @param tensor The tensor that will be updated along with the multiview object.
2506
 */
2507
void ccv_nnc_tensor_synchronize_to_multiview(ccv_nnc_tensor_multiview_t* const tensor_multiview, ccv_nnc_tensor_t* const tensor);
2508
/**
2509
 * Send broadcast to subscribers of the multiview, call this in the beginning of exec.
2510
 * @param tensor_multiview The tensor multiview object.
2511
 */
2512
void ccv_nnc_tensor_multiview_synchronize(ccv_nnc_tensor_multiview_t* const tensor_multiview);
2513
2514
/** @} */
2515
2516
/** @} */
2517
2518
/**
2519
 * @defgroup level_3_5_case_of Branching
2520
 * @{
2521
 */
2522
2523
/**
2524
 * @page symbolic_switch Construct "switch" control structure in symbolic graph
2525
 *
2526
 * Here I use the keyword case_of. To provide a "switch" control structure within NNC has some nice properties
2527
 * even though you can simulate this with a while loop technically.
2528
 *
2529
 * 1. More optimal memory allocation: with "switch" control structure, memory can be multiplexed for each code
2530
 *    path because they are mutually exclusive.
2531
 *
2532
 * 2. No tape should be used within each branch: if we simulate with a "while" loop, any results from within
2533
 *    the "switch" statement has to be kept on the tape, which is inefficient because you don't need any tape
2534
 *    for the "switch" statement other than record which path it is taken.
2535
 *
2536
 * The particular "switch" control structure provided here is a multi-way structured "switch". Each branch is a
2537
 * sub-graph, so it is well-scoped. A node branch out based on the case_of condition return value to either of
2538
 * the branch (numbering from 0 to n, -1 means no path taken). If no path taken, the output tensors will be
2539
 * assigned with the default tensors and continue. Otherwise the computation within the sub-graph will be
2540
 * carried out and the output tensors will be assigned with the tensors specified within that sub-graph and
2541
 * continue.
2542
 *
2543
 * If we want to consider speculative execution in the future, we need to revisit our memory allocation scheme.
2544
 */
2545
2546
/**
2547
 * Function prototype to evaluate a branch expression.
2548
 */
2549
typedef int(*ccv_nnc_graph_case_of_f)(ccv_nnc_tensor_t* const* const inputs, const int input_size, const void* const data);
2550
/**
2551
 * Create a new case..of execution node symbol.
2552
 * @param graph The symbolic graph.
2553
 * @param cmd The command idenfitier, can be either CCV_NNC_GRAPH_FORWARD or CCV_NNC_GRAPH_BACKWARD
2554
 * @param inputs The input tensor symbols array for the expression.
2555
 * @param input_size The size of the input tensor symbols array.
2556
 * @param symbol_map The pair of tensor symbols array where the source is the input tensor symbol and the destination is the output tensor symbol.
2557
 * @param symbol_map_size The size of symbol map array.
2558
 * @param name The name of the case..of graph. Optional.
2559
 * @return A execution node symbol that represents the case..of graph.
2560
 */
2561
CCV_WARN_UNUSED(ccv_nnc_graph_exec_symbol_t) ccv_nnc_symbolic_graph_case_of_new(ccv_nnc_symbolic_graph_t* const graph, const uint32_t cmd, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_tensor_symbol_map_t* const symbol_map, const int symbol_map_size, const char* const name);
2562
/**
2563
 * Set the expression to be evaluated when choose which sub-graph to branch to.
2564
 * @param graph The symbolic graph.
2565
 * @param exec The execution node symbol that represents the case..of graph.
2566
 * @param case_of The function pointer to evaluate.
2567
 * @param case_of_data The data associated with the function pointer.
2568
 */
2569
void ccv_nnc_symbolic_graph_set_case_of_expr(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t exec, ccv_nnc_graph_case_of_f case_of, const void* case_of_data);
2570
/**
2571
 * Set a sub-graph as one of the branch for the case..of graph.
2572
 * @param graph The symbolic graph.
2573
 * @param symbol The execution node symbol that represents the case..of graph.
2574
 * @param case_graph The sub-graph for one of the branch.
2575
 * @param case_of The index assigned to this sub-graph (expression returns this index to determine which sub-graph to execute).
2576
 * @param symbol_map The pair of tensor symbols array where the source is the output tensor symbol of the sub-graph, and the destination is the output tensor symbol of the execution node symbol.
2577
 * @param symbol_map_size The size of the symbol map array.
2578
 */
2579
void ccv_nnc_symbolic_graph_set_case_of(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, ccv_nnc_symbolic_graph_t* const case_graph, const int case_of, const ccv_nnc_tensor_symbol_map_t* const symbol_map, const int symbol_map_size);
2580
/**
2581
 * Create a new case..of execution node.
2582
 * @param graph The concrete graph.
2583
 * @param cmd The command idenfitier, can be either CCV_NNC_GRAPH_FORWARD or CCV_NNC_GRAPH_BACKWARD
2584
 * @param inputs The input tensors array supplied to the expression.
2585
 * @param input_size The size of the input tensors array.
2586
 * @param outputs The output tensors array.
2587
 * @param output_size The size of the output tensors array.
2588
 * @return A execution node that represents the case..of graph.
2589
 */
2590
CCV_WARN_UNUSED(ccv_nnc_graph_exec_t) ccv_nnc_graph_case_of_new(ccv_nnc_graph_t* const graph, const uint32_t cmd, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
2591
/**
2592
 * Set the expression to be evaluated when choose which sub-graph to branch to.
2593
 * @param graph The concrete graph.
2594
 * @param exec The execution node that represents the case..of graph.
2595
 * @param case_of The function pointer to evaluate.
2596
 * @param case_of_data The data associated with the function pointer.
2597
 * @param offset A integer added to the expression output to help choose the index. Thus, real index = expression index + offset.
2598
 */
2599
void ccv_nnc_graph_set_case_of_expr(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_graph_case_of_f case_of, const void* case_of_data, const int offset);
2600
/**
2601
 * Set a sub-graph as one of the branch for the case..of graph.
2602
 * @param graph The concrete graph.
2603
 * @param exec The execution node that represents the case..of graph.
2604
 * @param case_graph The sub-graph for one of the branch.
2605
 * @param case_of The index assigned to this sub-graph (expression returns this index + offset to determine which sub-graph to execute).
2606
 */
2607
void ccv_nnc_graph_set_case_of(ccv_nnc_graph_t* const graph, const ccv_nnc_graph_exec_t exec, ccv_nnc_graph_t* const case_graph, const int case_of);
2608
2609
/** @} */
2610
2611
/**
2612
 * @defgroup level_3_5_minimizer Gradient-based Optimization
2613
 * @{
2614
 */
2615
2616
/**
2617
 * This is the comparable part to Caffe's solver or TensorFlow's optimizer. It took a step further than just
2618
 * compute the gradient, but also apply the gradient to update parameters to minimize the loss.
2619
 * @param graph The symbolic graph.
2620
 * @param minimizer The wrapped command that represents a particular optimization strategy.
2621
 * @param losses The tensor symbols array of losses.
2622
 * @param loss_size The size of the loss symbols array.
2623
 * @param parameters The parameter tensor symbols to optimize.
2624
 * @param parameter_size The size of parameter symbols array.
2625
 * @param inputs The additional input symbols we compute gradient against.
2626
 * @param input_size The size of the additional input symbols array.
2627
 * @param sources The source execution nodes array.
2628
 * @param source_size The size of source nodes array.
2629
 * @param destinations The destinations execution nodes array.
2630
 * @param destination_size The size of destination nodes array.
2631
 * @param gradients The tensor symbols that represents the gradient for update, should be the same size as the parameters array + input array size. This can be 0 (optional).
2632
 * @param updated_parameters The tensor symbols that represents the updated parameters, should be the same size as the parameters array.
2633
 * @param saved_aux The tensor symbols that is helpful for particular optimization strategy.
2634
 * @param graph_exec_symbols The execution node symbols for the updates, should be the same size as the parameters array.
2635
 */
2636
void ccv_nnc_symbolic_graph_minimize(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_cmd_t minimizer, const ccv_nnc_tensor_symbol_t* const losses, const int loss_size, const ccv_nnc_tensor_symbol_t* const parameters, const int parameter_size, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size, ccv_nnc_tensor_symbol_t* const gradients, ccv_nnc_tensor_symbol_t* const updated_parameters, ccv_nnc_tensor_symbol_map_t* const saved_aux, ccv_nnc_graph_exec_symbol_t* const graph_exec_symbols);
2637
/**
2638
 * The number of extra saved aux per parameter only depends on the commands. For example, SGD with momentum requires 1 aux (for momentum).
2639
 * Others require more.
2640
 * @param minimizer The wrapped command that represents a particular optimization strategy.
2641
 * @return the number of saved aux per parameter.
2642
 */
2643
CCV_WARN_UNUSED(int) ccv_nnc_minimizer_saved_aux_size(const ccv_nnc_cmd_t minimizer);
2644
2645
/** @} */
2646
2647
/**
2648
 * @defgroup level_3_5_simplify Graph Simplification
2649
 * @{
2650
 */
2651
2652
/**
2653
 * @page symbolic_simplify Symbolic graph simplification
2654
 *
2655
 * We make a distinction between graph simplifications and optimizations (autotune).
2656
 *
2657
 * Simplification: rewrite the graph and the resulting graph will have less nodes. This is done on the symbolic
2658
 * graph only. Passes that is "simplification" include pruning, common sub-expression eliminations, constant
2659
 * folding etc.
2660
 *
2661
 * Optimization (autotune): graph optimization can have more objectives. The most obvious objective is to reduce
2662
 * computation time. For symbolic graph, passes that reduces computation time include data layout optimizations,
2663
 * auto parallel etc (in normal optimization implementations, they have a cost model to guide the optimization.
2664
 * NNC's implementation uses a cost database that profiles the time cost on the device to guide the optimization.
2665
 * We call it autotune to distinguish with the normal optimization passes because we need device profile data).
2666
 * There could be other objectives, for example, in many deep learning applications, reducing memory footprint
2667
 * can be desirable. However, as always in computer science, memory and time is a typical trade-off. Memory
2668
 * optimization almost always results longer computation time, and the objective is to trade between these two
2669
 * with a bias term (in other frameworks such as TensorFlow, the memory optimizer uses a list of "cheap ops" to
2670
 * bias between the time and memory footprint).
2671
 *
2672
 * For graph optimizations, it can happen on both the symbolic graph level as well as the concrete graph level.
2673
 * For NNC, symbolic graph is already very explicit (data layout, device allocation and data transfer between
2674
 * devices / nodes, even the command backend can all be specified on the symbolic graph), however, some
2675
 * information is unknown until it is compiled down to concrete graph (tensor addresses, tensor initialization
2676
 * etc.), and since graph optimizations need all the information to optimize. Keeping the flexibility to do
2677
 * optimization on both symbolic and concrete graph level seems reasonable.
2678
 */
2679
2680
enum {
2681
  /**
2682
   * If two commands generated the same outputs, all the places where the newer output used will be replaced by
2683
   * the old output. Later on the graph pruning stage, the command that generate the newer output will be
2684
   * eliminated.
2685
   */
2686
  CCV_NNC_SIMPLIFY_COMMON_SUBEXPRESSION_ELIMINATION,
2687
  /**
2688
   * For the given outputs, eliminate unused input tensors, and then eliminate graph execs that don't contribute
2689
   * to the outputs.
2690
   */
2691
  CCV_NNC_SIMPLIFY_GRAPH_PRUNING,
2692
  /**
2693
   * For CCV_NNC_DATA_TRANSFER, if the input / output is the same (on the same device, no alias), we can skip.
2694
   * Similarly, if it is on the same device, but alias of some, for some cases we can skip as well (if neither
2695
   * are carry overs, bypasses etc.)
2696
   */
2697
  CCV_NNC_SIMPLIFY_DATA_TRANSFER_OPT,
2698
  /**
2699
   * Combine a few smaller ops into bigger one. For now, this functionality is limited. I can only address ops
2700
   * that are sequential.
2701
   */
2702
  CCV_NNC_SIMPLIFY_OPS_FUSION,
2703
  // CCV_NNC_SIMPLIFY_CONSTANT_FOLDING, // This currently is not supported, because we don't have efficient way to express constant in symbolic graph.
2704
};
2705
/**
2706
 * Simplify a graph with given list of passes, in that particular order.
2707
 * Note, when a graph is simplified, its sources / destinations are changed as well.
2708
 * @param graph The symbolic graph.
2709
 * @param passes The array of passes we are going to apply.
2710
 * @param pass_size The size of the passes array.
2711
 * @param binds The tensor symbols we may bind to an input later (it doesn't prevent pruning any execution nodes).
2712
 * @param bind_size The size of the bind array.
2713
 * @param outputs The output tensor symbols we want to retain (we are going to prune any execution nodes that is not related to these outputs).
2714
 * @param output_size The size of the output array.
2715
 * @param sources The source execution node symbols array.
2716
 * @param source_size The size of source node symbols array.
2717
 * @param destinations The destinations execution node symbols array.
2718
 * @param destination_size The size of destination node symbols array.
2719
 */
2720
void ccv_nnc_symbolic_graph_simplify(ccv_nnc_symbolic_graph_t* const graph, const int* const passes, const int pass_size, const ccv_nnc_tensor_symbol_t* const binds, const int bind_size, const ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
2721
2722
/** @} */
2723
2724
/**
2725
 * @defgroup level_3_5_parallel Automatic Graph Parallelization
2726
 * @{
2727
 */
2728
2729
enum {
2730
  /**
2731
   * Op for reducer / allreducer. Currently only supports sum.
2732
   */
2733
  CCV_NNC_PARALLEL_REDUCE_OP_SUM,
2734
};
2735
2736
/**
2737
 * Turn the existing graph to be capable to run on several devices with different data inputs at parallel.
2738
 * With this method, additional tensor symbols will be created that runs on different devices. That has
2739
 * been said, there are concepts of "broadcast" and "reduce". "broadcast" tensor symbols will be copied to
2740
 * different devices, while "reduce" tensors will be summed from different devices to the default device.
2741
 * "allreducer" concept is simpler. The allreduce operation will be performed on these tensors and then
2742
 * be used on different devices again.
2743
 *
2744
 * Limitations: right now, the way to reduce / allreduce tensors only supports "sum". The data parallel
2745
 * only supports GPU, thus, the nodes will be duplicated are GPU computations and GPU memory backed
2746
 * tensors. Also, right now, the tensors to be broadcasted / allreduced / reduced should have no aliases.
2747
 *
2748
 * @param graph The symbolic graph.
2749
 * @param parallel Number of devices we want to run on. 0 will use all devices available. 1 will skip.
2750
 * @param broadcasts The tensor symbols to be broadcasted.
2751
 * @param broadcast_size The size of the broadcast tensor symbols array.
2752
 * @param allreducers The tensor symbols that to be allreduced.
2753
 * @param allreducer_size The size of the allreducer tensor symbols array.
2754
 * @param allreducer_outs Return the tensor symbols for allreducers that before allreduced. Optional, 0
2755
 *        means I don't care about this.
2756
 * @param reducers The tensor symbols to be reduced.
2757
 * @param reducer_size The size of the reducer tensor symbols array.
2758
 * @param reducer_outs Return the tensor symbols for reducers that after reduced. Optional, 0 means
2759
 *        I don't care about this.
2760
 * @param reduce_op_type The reduce op for reducer / allreducer.
2761
 * @param sources The source execution node symbols array.
2762
 * @param source_size The size of source node symbols array.
2763
 * @param destinations The destinations execution node symbols array.
2764
 * @param destination_size The size of destination node symbols array.
2765
 */
2766
void ccv_nnc_symbolic_graph_data_parallel(ccv_nnc_symbolic_graph_t* const graph, const int parallel, const ccv_nnc_tensor_symbol_t* const broadcasts, const int broadcast_size, const ccv_nnc_tensor_symbol_t* const allreducers, const int allreducer_size, ccv_nnc_tensor_symbol_t* const allreducer_outs, const ccv_nnc_tensor_symbol_t* const reducers, const int reducer_size, ccv_nnc_tensor_symbol_t* const reducer_outs, const int reduce_op_type, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
2767
/**
2768
 * Get the symbol that is on a device other than the default one. The list will be flushed if the
2769
 * ccv_nnc_symbolic_graph_data_parallel function is called again.
2770
 * @param graph The symbolic graph.
2771
 * @param symbol The tensor symbol we want to retrieve its counterpart on a different device.
2772
 * @param device_id The device numeric id for this symbol.
2773
 * @return A tensor symbol that is on a different device.
2774
 */
2775
CCV_WARN_UNUSED(ccv_nnc_tensor_symbol_t) ccv_nnc_tensor_symbol_copy(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol, const int device_id);
2776
/**
2777
 * Set corresponding symbol for this symbol on another device. Thus, someone else can query this
2778
 * later with ccv_nnc_tensor_symbol_copy
2779
 * @param graph The symbolic graph.
2780
 * @param symbol The tensor symbol we want to set its counterpart on a different device.
2781
 * @param device_id The device numeric id for this symbol.
2782
 * @param copy The tensor symbol counterpart on a different device.
2783
 */
2784
void ccv_nnc_tensor_symbol_set_copy(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t symbol, const int device_id, const ccv_nnc_tensor_symbol_t copy);
2785
/**
2786
 * Get the execution node that is on a device other than the default one. The list will be flushed
2787
 * if the ccv_nnc_symbolic_graph_data_parallel function is called again.
2788
 * @param graph The symbolic graph.
2789
 * @param symbol The execution node we want to retrieve its counterpart on a different device.
2790
 * @param device_id The device numeric id for this symbol.
2791
 * @return A execution node that is on a different device.
2792
 */
2793
CCV_WARN_UNUSED(ccv_nnc_graph_exec_symbol_t) ccv_nnc_graph_exec_symbol_copy(const ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, const int device_id);
2794
/**
2795
 * Set corresponding symbol for this symbol on another device. Thus, someone else can query this
2796
 * later with ccv_nnc_graph_exec_symbol_copy
2797
 * @param graph The symbolic graph.
2798
 * @param symbol The execution node we want to set its counterpart on a different device.
2799
 * @param device_id The device numeric id for this symbol.
2800
 * @param copy The execution node counterpart on a different device.
2801
 */
2802
void ccv_nnc_graph_exec_symbol_set_copy(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t symbol, const int device_id, const ccv_nnc_graph_exec_symbol_t copy);
2803
2804
/** @} */
2805
2806
/**
2807
 * @defgroup level_3_5_memory_compression Memory Compression
2808
 * @{
2809
 */
2810
2811
/**
2812
 * Apply LSSC memory compression algorithm to the convolution activations. This will compress the activation
2813
 * layer for convolution, therefore, save the overall memory usage during training time.
2814
 *
2815
 * @param graph The symbolic graph.
2816
 * @param sources The source execution node symbols array.
2817
 * @param source_size The size of source node symbols array.
2818
 * @param destinations The destinations execution node symbols array.
2819
 * @param destination_size The size of destination node symbols array.
2820
 */
2821
void ccv_nnc_symbolic_graph_memory_compression(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
2822
2823
/** @} */
2824
2825
/**
2826
 * @defgroup level_3_5_memory_reduction Memory Reduction
2827
 * @{
2828
 */
2829
2830
/**
2831
 * Investigate memory reduction opportunities on the graph. Right now, we are looking at datatype
2832
 * conversions that resulted larger datatype, and these larger ones kept during backward pass.
2833
 * For these cases, we will keep the smaller one instead, and reconvert to larger datatype prior
2834
 * to the backward pass.
2835
 *
2836
 * @param graph The symbolic graph.
2837
 * @param sources The source execution node symbols array.
2838
 * @param source_size The size of source node symbols array.
2839
 * @param destinations The destinations execution node symbols array.
2840
 * @param destination_size The size of destination node symbols array.
2841
 */
2842
void ccv_nnc_symbolic_graph_memory_reduction(ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_graph_exec_symbol_t* const sources, const int source_size, const ccv_nnc_graph_exec_symbol_t* const destinations, const int destination_size);
2843
2844
/** @} */
2845
2846
/** @} */
2847
2848
/**
2849
 * @defgroup level_4 Level-4 API
2850
 * @{
2851
 */
2852
2853
/**
2854
 * Opaque pointer to the dynamic graph structure.
2855
 */
2856
typedef struct ccv_nnc_dynamic_graph_s ccv_nnc_dynamic_graph_t;
2857
2858
/**
2859
 * Masquerade this as if it is a on stack variable, there is a heap allocation but managed by the dynamic graph.
2860
 * The fact that ccv_nnc_tensor_variable_t is a pointer is an implementation detail. It should be treated as an
2861
 * opaque type throughout. We may later extends this to be some on-stack information or even just a uid.
2862
 */
2863
typedef struct ccv_nnc_tensor_variable_s* ccv_nnc_tensor_variable_t;
2864
2865
/**
2866
 * Create a dynamic graph.
2867
 * @return A newly created dynamic graph.
2868
 */
2869
CCV_WARN_UNUSED(ccv_nnc_dynamic_graph_t*) ccv_nnc_dynamic_graph_new(void);
2870
2871
/** @cond ALL */
2872
// Get a new tensor variable.
2873
CCV_WARN_UNUSED(ccv_nnc_tensor_variable_t) ccv_nnc_tensor_variable_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info);
2874
16.5k
#define CCV_NNC_TENSOR_VARIABLE_NEW_X_1(graph) ccv_nnc_tensor_variable_new_impl(graph, ccv_nnc_tensor_auto)
2875
14.7k
#define CCV_NNC_TENSOR_VARIABLE_NEW_X_SEL(_1, _2, _FX, ...) _FX
2876
// Making so that this new method can take parameters for both no parameter or with tensor_param.
2877
31.3k
#define ccv_nnc_tensor_variable_new(graph, ...) CCV_NNC_TENSOR_VARIABLE_NEW_X_SEL(graph, ##__VA_ARGS__, ccv_nnc_tensor_variable_new_impl, 
CCV_NNC_TENSOR_VARIABLE_NEW_X_116.5k
)(graph, ##
__VA_ARGS__8.33k
)
2878
CCV_WARN_UNUSED(ccv_nnc_tensor_variable_t) ccv_nnc_tensor_constant_new_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_param_t info);
2879
#define CCV_NNC_TENSOR_CONSTANT_NEW_X_1(graph) ccv_nnc_tensor_constant_new_impl(graph, ccv_nnc_tensor_auto)
2880
37
#define CCV_NNC_TENSOR_CONSTANT_NEW_X_SEL(_1, _2, _FX, ...) _FX
2881
// Making so that this new method can take parameters for both no parameter or with tensor_param.
2882
37
#define ccv_nnc_tensor_constant_new(graph, ...) CCV_NNC_TENSOR_CONSTANT_NEW_X_SEL(graph, ##__VA_ARGS__, ccv_nnc_tensor_constant_new_impl, CCV_NNC_TENSOR_CONSTANT_NEW_X_1)(graph, ##
__VA_ARGS__5
)
2883
/** @endcond */
2884
2885
/**
2886
 * Create a new tensor variable that is an alias of a given tensor variable. You can alias any tensor
2887
 * variable that itself not an alias. You can also alias an alias, with some conditions: The tensor
2888
 * variable itself can be alias, but it needs to be contiguous as well. For example, a vector is
2889
 * contiguous. If both conditions satisfied, you can alias an alias.
2890
 * @param graph The dynamic graph.
2891
 * @param tensor_variable The tensor variable we are going to alias from.
2892
 * @param ofs The offset on each of the dimension.
2893
 * @param stride The stride of each dimension. If all 0, it matches the dimension of the tensor_variable.
2894
 * @param info The tensor parameters for the new alias.
2895
 * @return New tensor variable that is an alias.
2896
 */
2897
CCV_WARN_UNUSED(ccv_nnc_tensor_variable_t) ccv_nnc_tensor_variable_alias_new(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_tensor_param_t info);
2898
/**
2899
 * Get the parameters for a tensor variable.
2900
 * @param graph The dynamic graph.
2901
 * @param tensor_variable The tensor variable reference.
2902
 * @return The tensor parameters.
2903
 */
2904
CCV_WARN_UNUSED(ccv_nnc_tensor_param_t) ccv_nnc_tensor_variable_params(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable);
2905
/**
2906
 * Get the parameters for a tensor variable alias.
2907
 * @param graph The symbolic graph.
2908
 * @param tensor_variable The tensor variable reference.
2909
 * @param ofs The offset on each of the dimension.
2910
 * @param stride The stride of each dimension.
2911
 * @return non-zero if it is not a tensor alias.
2912
 */
2913
int ccv_nnc_tensor_variable_alias_params(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, int ofs[CCV_NNC_MAX_DIM_ALLOC], int stride[CCV_NNC_MAX_DIM_ALLOC]);
2914
2915
/** @cond ALL */
2916
/**
2917
 * Get the underlying tensor for the tensor variable. The tensor allocation may be performed when calling this
2918
 * method. If the tensor cannot be allocated (because no shape specified), return 0.
2919
 * @param graph The dynamic graph.
2920
 * @param tensor_variable The tensor variable to get the underlying tensor.
2921
 * @param stream_context Which stream this command will be executed upon.
2922
 * @return The underlying tensor.
2923
 */
2924
CCV_WARN_UNUSED(ccv_nnc_tensor_t*) ccv_nnc_tensor_from_variable_impl(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_stream_context_t* const stream_context);
2925
8.55k
#define CCV_NNC_TENSOR_FROM_VARIABLE_X_1(graph, tensor_variable) ccv_nnc_tensor_from_variable_impl(graph, tensor_variable, 0)
2926
60.4k
#define CCV_NNC_TENSOR_FROM_VARIABLE_X_SEL(_1, _2, _3, _FX, ...) _FX
2927
// Making so that this new method can take parameters for both no parameter or with tensor_param.
2928
69.0k
#define ccv_nnc_tensor_from_variable(graph, tensor_variable, ...) CCV_NNC_TENSOR_FROM_VARIABLE_X_SEL
(graph, tensor_variable, ##__VA_ARGS__, ccv_nnc_tensor_from_variable_impl, 46.0k
CCV_NNC_TENSOR_FROM_VARIABLE_X_18.55k
)(graph, tensor_variable, ##__VA_ARGS__)
2929
/** @endcond */
2930
/**
2931
 * Query whether a given tensor variable is a constant (no gradient).
2932
 * @param graph The dynamic graph.
2933
 * @param tensor_variable The tensor variable to query whether it is a constant.
2934
 */
2935
CCV_WARN_UNUSED(int) ccv_nnc_tensor_variable_is_constant(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable);
2936
/**
2937
 * Set a tensor on the tensor variable. Tensor variable doesn't take over the life-cycle management of the tensor
2938
 * (in similar way as the tensor binds).
2939
 * @param graph The dynamic graph.
2940
 * @param tensor_variable The tensor variable to set.
2941
 * @param tensor The tensor that is going to be associated with the tensor variable.
2942
 */
2943
void ccv_nnc_tensor_variable_set(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_t* const tensor);
2944
/**
2945
 * Detach the tensor variable from current graph. It acts as if computed between
2946
 * ``ccv_nnc_dynamic_graph_set_no_grad``. Thus, there are a few requirements for this:
2947
 * 1. It cannot be an alias when detach. You have to detach the original, not the alias.
2948
 * 2. When detach a variable, it could impact correctness when computing gradients. This cut off backprop, acting as if the
2949
 *    detached variable is a constant (it will be marked as is).
2950
 * After this call, the tensor variable will be marked as constant and you can query that through ``ccv_nnc_tensor_variable_is_constant``.
2951
 * Why this method rather than making this variable as constant to begin with? First, an constant
2952
 * cannot be the output. Second, you may not wrap your computation between no grad, or not all inputs
2953
 * are constants, resulting a tensor variable that is on a graph. This method is helpful to rescue from
2954
 * that situation.
2955
 * @param graph The dynamic graph.
2956
 * @param tensor_variable The tensor variable to be detached.
2957
 */
2958
void ccv_nnc_tensor_variable_detach(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable);
2959
/**
2960
 * A destructor function to be called when a tensor variable will be freed in the sense that no
2961
 * backward computation need it no more.
2962
 * Thus, we pass in tensor rather than tensor variable for the destructor.
2963
 */
2964
typedef void (*ccv_nnc_tensor_variable_destructor_f)(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_t* const tensor, void* const context);
2965
/**
2966
 * Hook into a tensor variable such that when it is actually freed (destroyed), the callback will receive
2967
 * the update.
2968
 * @param graph The dynamic graph.
2969
 * @param tensor_variable The tensor variable to observe when it is destroyed.
2970
 * @param func The callback function.
2971
 * @param context The context to be passed along to the callback function.
2972
 **/
2973
void ccv_nnc_tensor_variable_destructor_hook(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable, ccv_nnc_tensor_variable_destructor_f func, void* const context);
2974
/**
2975
 * Check given tensor variables whether have effects to another set of tensor variables.
2976
 * @param graph The dynamic graph.
2977
 * @param source_variables The tensor variables to check whether it has effect to another set of variables.
2978
 * @param source_variable_size The size of source tensor variables.
2979
 * @param destination_variables Whether the source variables has effect to this list of variables.
2980
 * @param destination_variable_size The size of destination tensor variables.
2981
 * @param bitmask Bit return value, each bit represents a source tensor variable, and 1 meant it can reach some of the destinations.
2982
 */
2983
void ccv_nnc_dynamic_graph_has_effect_to_tensor_variables(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t* const source_variables, const int source_variable_size, const ccv_nnc_tensor_variable_t* const destination_variables, const int destination_variable_size, uint64_t* const bitmask);
2984
/**
2985
 * Execute a command with given tensor variables, the output is in the output tensor variables.
2986
 * @param graph The dynamic graph.
2987
 * @param cmd The wrapped command.
2988
 * @param hint The hint associated with the command.
2989
 * @param flags A reserved field for flags.
2990
 * @param inputs The input tensor variables array.
2991
 * @param input_size The size of the input tensor variables array.
2992
 * @param outputs The output tensor variables array.
2993
 * @param output_size The size of the output tensor variables array.
2994
 * @param parallel The parallel parameter, how many concurrent computations we need to execute.
2995
 * @param stream_context Which stream this command will be executed upon.
2996
 */
2997
int ccv_nnc_dynamic_graph_exec(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, const int parallel, ccv_nnc_stream_context_t* const stream_context);
2998
/**
2999
 * Compute the gradient of given tensor, with respect to the f. Thus, df / dt.
3000
 * @param dynamic_graph The dynamic graph.
3001
 * @param f_variables The output losses.
3002
 * @param f_variable_size The size of output losses array.
3003
 * @param df_optionals The custom gradients for f. If not provided, will default to 1.
3004
 * @param inputs The input variables.
3005
 * @param input_size The size of the input variables array.
3006
 * @param outputs The gradients with respect to the inputs. If the gradient already have value exist, it will be
3007
 *        accumulated into the final value.
3008
 * @param output_size The size of the outputs array. Should be equal to the input_size.
3009
 * @param stream_context Which stream this computation will be executed upon.
3010
 */
3011
void ccv_nnc_dynamic_graph_backward(ccv_nnc_dynamic_graph_t* const dynamic_graph, const ccv_nnc_tensor_variable_t* const f_variables, const int f_variable_size, const ccv_nnc_tensor_variable_t* const df_optionals, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context);
3012
/**
3013
 * Apply gradients to the set of parameters to update them with appropriate minimizer.
3014
 * @param dynamic_graph The dynamic graph.
3015
 * @param minimizer The wrapped command that represents a particular optimization strategy.
3016
 * @param gradients The computed gradients to be applied.
3017
 * @param gradient_size The size of gradients array.
3018
 * @param parameters The parameters to update.
3019
 * @param parameter_size The size of parameters array, should be the same length as gradients.
3020
 * @param saved_aux The aux variables to faciliate the minimizer. See ccv_nnc_minimizer_saved_aux_size.
3021
 * @param parallel The parallel parameter, how many concurrent computations we need to execute.
3022
 * @param stream_context Which stream this computation will be executed upon.
3023
 */
3024
void ccv_nnc_dynamic_graph_apply_gradients(ccv_nnc_dynamic_graph_t* const dynamic_graph, const ccv_nnc_cmd_t minimizer, const ccv_nnc_tensor_variable_t* const gradients, const int gradient_size, ccv_nnc_tensor_variable_t* const parameters, const int parameter_size, ccv_nnc_tensor_variable_t* const saved_aux, const int parallel, ccv_nnc_stream_context_t* const stream_context);
3025
/**
3026
 * Apply one step of minimization (most likely, a gradient descent) to the parameters with a given loss (or
3027
 * losses).
3028
 * @param dynamic_graph The dynamic graph.
3029
 * @param minimizer The wrapped command that represents a particular optimization strategy.
3030
 * @param losses The losses we are trying to minimize.
3031
 * @param loss_size The size of the losses array.
3032
 * @param dloss_optionals The custom gradient for losses. If not provided, will default to 1.
3033
 * @param parameters The parameters to update.
3034
 * @param parameter_size The size of parameters array.
3035
 * @param saved_aux The aux variables to faciliate the minimizer. See ccv_nnc_minimizer_saved_aux_size.
3036
 * @param parallel The parallel parameter, how many concurrent computations we need to execute.
3037
 * @param stream_context Which stream this computation will be executed upon.
3038
 */
3039
void ccv_nnc_dynamic_graph_minimize(ccv_nnc_dynamic_graph_t* const dynamic_graph, const ccv_nnc_cmd_t minimizer, const ccv_nnc_tensor_variable_t* const losses, const int loss_size, const ccv_nnc_tensor_variable_t* const dloss_optionals, ccv_nnc_tensor_variable_t* const parameters, const int parameter_size, ccv_nnc_tensor_variable_t* const saved_aux, const int parallel, ccv_nnc_stream_context_t* const stream_context);
3040
/**
3041
 * Read more in Level-5 API section.
3042
 */
3043
typedef struct ccv_cnnp_model_s ccv_cnnp_model_t;
3044
/**
3045
 * Evaluate a CNNP model on the dynamic graph with set of inputs / outputs.
3046
 * @param dynamic_graph The dynamic graph.
3047
 * @param model The CNNP model to be evaluated against. Note that ccv_nnc_dynamic_graph_backward /
3048
 *              ccv_nnc_dynamic_graph_apply_gradients / ccv_nnc_dynamic_graph_minimize all works with this
3049
 *              model. It takes over the life-cycle of the model, and now you don't need to free it any more.
3050
 * @param is_test Whether we are in test mode or not.
3051
 * @param inputs The input variables.
3052
 * @param input_size The size of the input variables array.
3053
 * @param outputs The gradients with respect to the inputs.
3054
 * @param output_size The size of the outputs array.
3055
 * @param tensor_tape An opaque tensor tape object to "backpropagate through time".
3056
 * @param stream_context Which stream this computation will be executed upon.
3057
 */
3058
void ccv_nnc_dynamic_graph_evaluate(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context);
3059
/**
3060
 * Dry run a CNNP model on the dynamic graph with set of inputs up until the actual execution.
3061
 * @param dynamic_graph The dynamic graph.
3062
 * @param model The CNNP model to be evaluated against. Note that ccv_nnc_dynamic_graph_backward /
3063
 *              ccv_nnc_dynamic_graph_apply_gradients / ccv_nnc_dynamic_graph_minimize all works with this
3064
 *              model. It takes over the life-cycle of the model, and now you don't need to free it any more.
3065
 * @param is_test Whether we are in test mode or not.
3066
 * @param inputs The input variables.
3067
 * @param input_size The size of the input variables array.
3068
 * @param stream_context Which stream this computation will be executed upon.
3069
 */
3070
void ccv_nnc_dynamic_graph_dry_run(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_stream_context_t* const stream_context);
3071
/**
3072
 * Set the maximum operator-level concurrency. This is a soft-limit, e.g. if you have operations on
3073
 * different devices, they are concurrent.
3074
 * @param graph The dynamic graph.
3075
 * @param max_stream_count The maximum concurrency if the dynamic graph schedules internal streams. 0 is no limit.
3076
 */
3077
void ccv_nnc_dynamic_graph_set_max_concurrency(ccv_nnc_dynamic_graph_t* const graph, const int max_stream_count);
3078
/**
3079
 * Enable or disable gradient computation on a dynamic graph.
3080
 * @param dynamic_graph The dynamic graph.
3081
 * @param no_grad If it is 1, disable gradient computation on the dynamic graph.
3082
 * @return 0 if it turned, otherwise it is not turned.
3083
 */
3084
int ccv_nnc_dynamic_graph_set_no_grad(ccv_nnc_dynamic_graph_t* const dynamic_graph, const int no_grad);
3085
/**
3086
 * Dynamic graph will retain a memory it allocated for efficient reuse. Triggering this method
3087
 * intentionally will force these memory to be collected. This is helpful if you know the existing
3088
 * allocation won't be enough for the future use.
3089
 * @param dynamic_graph The dynamic graph.
3090
 */
3091
void ccv_nnc_dynamic_graph_gc(ccv_nnc_dynamic_graph_t* const dynamic_graph);
3092
/**
3093
 * Dispose a tensor variable. You cannot do any computation against this tensor variable afterwards.
3094
 * @param graph The dynamic graph.
3095
 * @param tensor_variable The tensor variable to be disposed.
3096
 */
3097
void ccv_nnc_tensor_variable_free(ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_tensor_variable_t tensor_variable);
3098
/**
3099
 * Free the dynamic graph.
3100
 * @param graph The dynamic graph.
3101
 */
3102
void ccv_nnc_dynamic_graph_free(ccv_nnc_dynamic_graph_t* const graph);
3103
/**
3104
 * Generate output that can be parsed by GraphViz (DOT language).
3105
 * @param graph The dynamic graph.
3106
 * @param flags Either CCV_NNC_SHORT_DOT_GRAPH or CCV_NNC_LONG_DOT_GRAPH
3107
 * @param out The output file stream.
3108
 */
3109
void ccv_nnc_dynamic_graph_dot(const ccv_nnc_dynamic_graph_t* const graph, const int flags, FILE* out);
3110
/**
3111
 * Count how many ops we kept for gradient computation purpose. This method is useful when we
3112
 * want to assert at end of some train loop, we shouldn't have any gradient computation left.
3113
 * @param graph The dynamic graph.
3114
 * @param type The type of variables to trace. CCV_NNC_SYMBOL_TENSOR / CCV_NNC_SYMBOL_GRAPH_EXEC
3115
 * @return How many gradient computations we kept.
3116
 */
3117
CCV_WARN_UNUSED(int) ccv_nnc_dynamic_graph_bookkeeping_count(const ccv_nnc_dynamic_graph_t* const graph, const int type);
3118
/**
3119
 * Provide a hook for upper level to do custom formatting of a given dynamic graph for whatever
3120
 * inside. You can implement logic to format the graph into protobuf, or json. However, this
3121
 * is not the method for you to visit the graph, and do mutations on it. If ops are not needed for
3122
 * gradient computation, likely these are not kept on the dynamic graph at all. You probably will
3123
 * get an empty graph. What's still available can be checked with the ccv_nnc_dynamic_graph_bookkeeping_count.
3124
 * @param graph The dynamic graph.
3125
 * @param format_fn The format callback to be called on every node.
3126
 * @param context The context that will be passed to the callback.
3127
 */
3128
void ccv_nnc_dynamic_graph_format(const ccv_nnc_dynamic_graph_t* const graph, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context);
3129
3130
/** @} */
3131
3132
/**
3133
 * @defgroup level_5 Level-5 API
3134
 * @{
3135
 */
3136
3137
/**
3138
 * @page dataframe What is "dataframe" in ML?
3139
 *
3140
 * A large part of machine learning consists of go through data, process them to a shape / form that makes sense,
3141
 * and pass that into the model to train. Deep learning frameworks such as TensorFlow or PyTorch provides some
3142
 * dataset APIs for this purpose. It is convenient for these frameworks because by being Python, people can use
3143
 * Pandas to process the data. In Pandas, this is called Dataframe, which again, imitates R language.
3144
 *
3145
 * Another interesting observation comes from recent (2018) release of Create ML framework from Apple. It provides
3146
 * a very close to Pandas style data process API (MLDataTable) but in Swift. This implementation is important because
3147
 * it provides a survey point other than Python.
3148
 *
3149
 * Comparing to Python, Swift is a stronger typed language. Though all being high-level, they all have pretty good
3150
 * string support (of course!), operator overloading, and polymorphism. String support makes column naming natural,
3151
 * Operator overloading makes conditioning and filtering easier, and polymorphism makes column type representation
3152
 * straight-forward. These, unfortunately, are the challenges I need to face when implementing in C with the eye
3153
 * towards that later the similar ideas can be implemented on top on a high-level language based on this one.
3154
 *
3155
 * It seems I haven't answered the most crucial question yet: what's special about these data process APIs? It is
3156
 * easier to answer this to first see what Pandas or MLDataTable does.
3157
 *
3158
 * * They both represent data as tables. Each column represents different type of the data (time, nd-array, scalar
3159
 *   or string). As such, they both have API to add / remove / rename columns, and load tabular data from disk.
3160
 *
3161
 * * They both provide API to filter (remove / add) rows, and derive new column from existing columns.
3162
 *
3163
 * * Pandas provides more API for data alignment (merge columns from different tables into one table), and compute
3164
 *   statistics (group rows by some criteria, and compute min / max / std / mean within that group).
3165
 *
3166
 * * MLDataTable provides API to batching data (random split) which covered in TensorFlow / PyTorch's Dataset API
3167
 *   as well.
3168
 *
3169
 * It turns out when you have a noisy dataset, these functionalities are useful to remove unwanted data quickly.
3170
 * If you have a relatively clean dataset, it also allows you to prepare data in a more elegant way. For NNC,
3171
 * the interesting requirements are:
3172
 *
3173
 * 1. Represents scalars, tensors, string as columns; columns can be named.
3174
 *
3175
 * 2. New columns can be derived, from existing ones.
3176
 *
3177
 * 3. Rows can be filtered, grouped, and statistics can be computed.
3178
 *
3179
 * 4. Columns can be aligned, with some given indexes.
3180
 *
3181
 * 5. All these can be done efficiently, on a scale of hundreds of Gigabytes data.
3182
 */
3183
3184
/**
3185
 * @defgroup level_5_dataframe Dataframe API
3186
 * @{
3187
 */
3188
3189
/**
3190
 * A data enumeration function to supply data for given row indexes.
3191
 */
3192
typedef void (*ccv_cnnp_column_data_enum_f)(const int column_idx, const int* const row_idxs, const int row_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context);
3193
/**
3194
 * A destructor for data.
3195
 */
3196
typedef void (*ccv_cnnp_column_data_deinit_f)(void* const data, void* const context);
3197
/**
3198
 * A destructor for context.
3199
 */
3200
typedef void (*ccv_cnnp_column_data_context_deinit_f)(void* const context);
3201
/**
3202
 * Column data.
3203
 */
3204
typedef struct {
3205
  int stream_type; /**< The type of stream context for this column. Each column only compatible with one stream type. */
3206
  char* name; /**< The name of the column. */
3207
  ccv_cnnp_column_data_enum_f data_enum; /**< The data enumeration function for this column. */
3208
  ccv_cnnp_column_data_deinit_f data_deinit; /**< The deinit function that will be used to destroy the data. */
3209
  void* context; /**< The context go along with this column. */
3210
  ccv_cnnp_column_data_context_deinit_f context_deinit; /**< The deinit function that will be used to destroy the context. */
3211
} ccv_cnnp_column_data_t;
3212
/**
3213
 * An opaque structure point to the dataframe object.
3214
 */
3215
typedef struct ccv_cnnp_dataframe_s ccv_cnnp_dataframe_t;
3216
/**
3217
 * Create a dataframe object with given column data.
3218
 * @param column_data The column data that can be loaded.
3219
 * @param column_size The size of column data array.
3220
 * @param row_count The number of rows in this dataframe.
3221
 */
3222
CCV_WARN_UNUSED(ccv_cnnp_dataframe_t*) ccv_cnnp_dataframe_new(const ccv_cnnp_column_data_t* const column_data, const int column_size, const int row_count);
3223
/**
3224
 * Add a new column to the dataframe.
3225
 * @param dataframe The dataframe object to add column to.
3226
 * @param data_enum The data provider function for the new column.
3227
 * @param stream_type The type of stream context for this derived column.
3228
 * @param data_deinit The deinit function will be used to destroy the derived data.
3229
 * @param context The context that can be used to generate new column.
3230
 * @param context_deinit The deinit function will be used to destroy the context.
3231
 * @param name The name of the newly added column.
3232
 * @return The new column index.
3233
 */
3234
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_add(ccv_cnnp_dataframe_t* const dataframe, ccv_cnnp_column_data_enum_f data_enum, const int stream_type, ccv_cnnp_column_data_deinit_f data_deinit, void* const context, ccv_cnnp_column_data_context_deinit_f context_deinit, const char* name);
3235
/**
3236
 * A map function that takes the data from multiple columns and derive new data out of it.
3237
 */
3238
typedef void (*ccv_cnnp_column_data_map_f)(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context);
3239
/**
3240
 * Derive a new column out of existing columns in the dataframe.
3241
 * @param dataframe The dataframe object that contains existing columns.
3242
 * @param map The map function used to derive new column from existing columns.
3243
 * @param stream_type The type of stream context for this derived column.
3244
 * @param data_deinit The deinit function will be used to destroy the derived data.
3245
 * @param column_idxs The columns that will be used to derive new column.
3246
 * @param column_idx_size The size of existing columns array.
3247
 * @param context The context that can be used to generate new column.
3248
 * @param context_deinit The deinit function will be used to destroy the context.
3249
 * @param name The name of the new column.
3250
 * @return The new column index.
3251
 */
3252
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_map(ccv_cnnp_dataframe_t* const dataframe, ccv_cnnp_column_data_map_f map, const int stream_type, ccv_cnnp_column_data_deinit_f data_deinit, const int* const column_idxs, const int column_idx_size, void* const context, ccv_cnnp_column_data_context_deinit_f context_deinit, const char* name);
3253
/**
3254
 * Shuffle an existing dataframe.
3255
 * @param dataframe The dataframe that is about to be shuffled.
3256
 */
3257
void ccv_cnnp_dataframe_shuffle(ccv_cnnp_dataframe_t* const dataframe);
3258
/**
3259
 * Query row count of the dataframe.
3260
 * @param dataframe The dataframe we want to query row count.
3261
 * @return The row count of the dataframe.
3262
 */
3263
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_row_count(ccv_cnnp_dataframe_t* const dataframe);
3264
/**
3265
 * Query the column name of a given column on the dataframe.
3266
 * @param dataframe The dataframe we want to query the column name.
3267
 * @param column_idx The index of a column.
3268
 * @return The name of the column.
3269
 */
3270
CCV_WARN_UNUSED(const char*) ccv_cnnp_dataframe_column_name(ccv_cnnp_dataframe_t* const dataframe, const int column_idx);
3271
/**
3272
 * A sampling function that takes multiple rows of one column, and sample to one row.
3273
 */
3274
typedef void (*ccv_cnnp_column_data_sample_f)(void* const* const input_data, const int batch_size, void** const output_data, void* const context, ccv_nnc_stream_context_t* const stream_context);
3275
/**
3276
 * Sample a dataframe by batch size. Thus, n rows are sampled to 1 row per sample function on
3277
 * one specific column. This will also sample the multi-column dataframe down to 1 column
3278
 * by selecting the one column to sample.
3279
 * @param dataframe The dataframe that is about to be sampled.
3280
 * @param sample The sample function used to sample n rows into 1.
3281
 * @param data_deinit The deinit function will be used to destroy the derived data.
3282
 * @param column_idx The column we selected to sample.
3283
 * @param batch_size How many rows will be sampled to 1 row from the original data.
3284
 * @param context The context that can be used in sample function.
3285
 * @param context_deinit The deinit function will be used to destroy the context.
3286
 * @return The sampled dataframe.
3287
 */
3288
CCV_WARN_UNUSED(ccv_cnnp_dataframe_t*) ccv_cnnp_dataframe_sample_new(ccv_cnnp_dataframe_t* const dataframe, ccv_cnnp_column_data_sample_f sample, ccv_cnnp_column_data_deinit_f data_deinit, const int column_idx, const int batch_size, void* const context, ccv_cnnp_column_data_context_deinit_f context_deinit);
3289
/**
3290
 * Extract a value out of a struct. Assuming the data points to a struct. This method extract
3291
 * n-offset value of that struct. For example, if you have struct { ccv_nnc_tensor_t* a; ccv_nnc_tensor_t* b; } S;
3292
 * if you want to extract the b tensor to a different column, you can call this function with
3293
 * offsetof(S, b).
3294
 * @param dataframe The dataframe object to be extracted.
3295
 * @param column_idx The column that we want to extract value of.
3296
 * @param offset The offset. For example, offsetof(S, b).
3297
 * @param name The name of the new column.
3298
 * @return The new column that contains the extracted value.
3299
 */
3300
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_extract_value(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t offset, const char* name);
3301
/**
3302
 * Make a tuple out of columns specified. Thus, the new derived column will contains a tuple
3303
 * with data from all the columns specified here. Tuple here represented as void* tuple[], an
3304
 * array of void* pointers.
3305
 * @param dataframe The dataframe that will contain the new column.
3306
 * @param column_idxs The columns to be tupled.
3307
 * @param column_idx_size The number of columns.
3308
 * @param name The name of the new column.
3309
 * @return The derived column with the tuple.
3310
 */
3311
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_make_tuple(ccv_cnnp_dataframe_t* const dataframe, const int* const column_idxs, const int column_idx_size, const char* name);
3312
/**
3313
 * The size of the tuple. It is equal to the number of columns we specified. The behavior of
3314
 * calling this method on a column that is not a tuple is undefined.
3315
 * @param dataframe The dataframe that contains the tuple column.
3316
 * @param column_idx The tuple column we are going to inspect.
3317
 * @return The tuple size of the column.
3318
 */
3319
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_tuple_size(const ccv_cnnp_dataframe_t* const dataframe, const int column_idx);
3320
/**
3321
 * Extract a data out of a tuple.
3322
 * @param dataframe The dataframe that will contain the new column.
3323
 * @param column_idx The column that is a tuple.
3324
 * @param index The index into the tuple.
3325
 * @param name The name of the new column.
3326
 * @return The derived column with the extracted value.
3327
 */
3328
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_extract_tuple(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const int index, const char* name);
3329
/**
3330
 * The opaque pointer to the iterator.
3331
 */
3332
typedef struct ccv_cnnp_dataframe_iter_s ccv_cnnp_dataframe_iter_t;
3333
/**
3334
 * Get a new iterator of the dataframe.
3335
 * @param dataframe The dataframe object to iterate through.
3336
 * @param column_idxs The columns that will be iterated.
3337
 * @param column_idx_size The size of columns array.
3338
 * @return The opaque iterator object.
3339
 */
3340
CCV_WARN_UNUSED(ccv_cnnp_dataframe_iter_t*) ccv_cnnp_dataframe_iter_new(ccv_cnnp_dataframe_t* const dataframe, const int* const column_idxs, const int column_idx_size);
3341
/**
3342
 * Get the next item from the iterator.
3343
 * @param iter The iterator to go through.
3344
 * @param data_ref The output for the data.
3345
 * @param column_idx_size The size of the data_ref array.
3346
 * @param stream_context The stream context to extract data asynchronously.
3347
 * @return 0 if the iteration is successful, -1 if there is no more row. -2 if it is already ended.
3348
 */
3349
int ccv_cnnp_dataframe_iter_next(ccv_cnnp_dataframe_iter_t* const iter, void** const data_ref, const int column_idx_size, ccv_nnc_stream_context_t* const stream_context);
3350
/**
3351
 * Assuming iterator is on the same row, peek into potentially different column index.
3352
 * @param iter The iterator to go through.
3353
 * @param data_ref The output for the data.
3354
 * @param offset The offset for which column in this iterator to peek at.
3355
 * @param data_ref_size How many columns in this iterator to peek at.
3356
 * @param stream_context The stream context to extract data asynchronously.
3357
 */
3358
void ccv_cnnp_dataframe_iter_peek(ccv_cnnp_dataframe_iter_t* const iter, void** const data_ref, const int offset, const int data_ref_size, ccv_nnc_stream_context_t* const stream_context);
3359
/**
3360
 * Prefetch next item on the iterator with the given stream context. You can call this method multiple times
3361
 * to prefetch multiple items ahead of time.
3362
 * @param iter The iterator to go through.
3363
 * @param prefetch_count How much ahead we should advance for.
3364
 * @param stream_context The stream context to extract data asynchronously.
3365
 * @return 0 if the prefetch is successful, -1 if it is ended.
3366
 */
3367
int ccv_cnnp_dataframe_iter_prefetch(ccv_cnnp_dataframe_iter_t* const iter, const int prefetch_count, ccv_nnc_stream_context_t* const stream_context);
3368
/**
3369
 * Set the cursor of the iterator. When set to 0, the iterator effectively restarts.
3370
 * @param iter The iterator to go through.
3371
 * @param idx The index of the cursor.
3372
 * @return 0 if it is successful, -1 if it is not (exceed the range).
3373
 */
3374
int ccv_cnnp_dataframe_iter_set_cursor(ccv_cnnp_dataframe_iter_t* const iter, const int idx);
3375
/**
3376
 * Free the dataframe iterator object.
3377
 * @param iter The dataframe iterator to be freed.
3378
 */
3379
void ccv_cnnp_dataframe_iter_free(ccv_cnnp_dataframe_iter_t* const iter);
3380
/**
3381
 * Free the dataframe object.
3382
 * @param dataframe The dataframe object to be freed.
3383
 */
3384
void ccv_cnnp_dataframe_free(ccv_cnnp_dataframe_t* const dataframe);
3385
3386
/** @} */
3387
3388
/**
3389
 * @defgroup level_5_dataframe_add_ons Dataframe Add-ons
3390
 * @{
3391
 */
3392
3393
/**
3394
 * Turn a ccv_array_t to a dataframe object.
3395
 * @param array The array we want to turn into a dataframe object.
3396
 * @return The new dataframe object.
3397
 */
3398
CCV_WARN_UNUSED(ccv_cnnp_dataframe_t*) ccv_cnnp_dataframe_from_array_new(ccv_array_t* const array);
3399
/**
3400
 * Derive a new column that copies a tensor array from given column to the derived column on GPU.
3401
 * @param dataframe The dataframe object that get the derived column.
3402
 * @param column_idx The original column contains tensor array on CPU.
3403
 * @param tensor_offset Only copy as outputs[i] = inputs[i + tensor_offset].
3404
 * @param tensor_size How many tensors in the tensor array.
3405
 * @param device_id The device we want to copy the tensors to.
3406
 * @param name The name of the new column.
3407
 * @return The index of the newly derived column.
3408
 */
3409
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_copy_to_gpu(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const int tensor_offset, const int tensor_size, const int device_id, const char* name);
3410
/**
3411
 * Derive a new column by executing a generic command.
3412
 * @param dataframe The dataframe object that get the derived column.
3413
 * @param column_idx The original column contains tensor array.
3414
 * @param cmd The command for this operation.
3415
 * @param hint The hint to run the command.
3416
 * @param flags The flags with the command.
3417
 * @param input_offset Use inputs[i + input_offset] to inputs[i + input_offset + input_size - 1] as the inputs
3418
 * @param input_size How many tensors in the input array.
3419
 * @param output_params The parameters for the outputs.
3420
 * @param output_size How many tensors in the output array.
3421
 * @param stream_type The type of stream context we are going to use.
3422
 * @param name The name of the new column.
3423
 * @return The index of the newly derived column.
3424
 */
3425
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_cmd_exec(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const int input_offset, const int input_size, const ccv_nnc_tensor_param_t* const output_params, const int output_size, const int stream_type, const char* name);
3426
/**
3427
 * Add a new column contains some tensors. This will add a new column that each row is the tensor specified
3428
 * as the parameters. It comes handy when you want to have some auxiliary tensors along with each row.
3429
 * @param dataframe The dataframe object that get the new column.
3430
 * @param params The parameters for the tensors.
3431
 * @param name The name of the new column.
3432
 * @return The index of the newly added column.
3433
 */
3434
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_add_aux(ccv_cnnp_dataframe_t* const dataframe, const ccv_nnc_tensor_param_t params, const char* name);
3435
/**
3436
 * Read image off a said column. That column should contain the filename (as char array). The new column
3437
 * will contain the ccv_dense_matrix_t / ccv_nnc_tensor_t (both are toll-free bridging) of the image.
3438
 * @param dataframe The dataframe object that loads the images.
3439
 * @param column_idx The column which contains the filename.
3440
 * @param structof The offset to the filename (as char array) from that column. For example, the column
3441
 *        could be a struct and filename could be one of the field. In that case, you can pass offsetof(S, filename)
3442
 * @param name The name of the new column.
3443
 * @return The index of the newly derived column.
3444
 */
3445
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_read_image(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t structof, const char* name);
3446
/**
3447
 * The structure to describe how to apply random jitter to the image.
3448
 */
3449
typedef struct {
3450
  float contrast; /**< The random contrast, the final contrast will be [1 / (1 + contrast), 1 + contrast] */
3451
  float saturation; /**< The saturation, the final saturation will be [1 / (1 + saturation), 1 + saturation] */
3452
  float brightness; /**< The brightness, the final brightness will be between [1 / (1 + brightness), 1 + brightness] */
3453
  float lighting; /**< AlexNet style PCA based image jitter */
3454
  float aspect_ratio; /**< Stretch aspect ratio between [1 / (1 + asepct_ratio), 1 + aspect_ratio] */
3455
  int symmetric; /**< Apply random flip on x-axis (around y-axis */
3456
  int seed; /**< The seed for random generator. */
3457
  int center_crop; /**< Enable crop to the center (otherwise do random crop). */
3458
  struct {
3459
    int min; /**< The minimal dimension of resize */
3460
    int max; /**< The maximal dimension of resize. The final resize can be computed from min + (max - min) * random_unit */
3461
    int roundup; /**< The dimension on both height / width are a multiple of roundup value. */
3462
  } resize;
3463
  struct {
3464
    int rows; /**< The height of the final image. */
3465
    int cols; /**< The width of the final image. */
3466
  } size;
3467
  struct {
3468
    int x; /**< The extra random offset on x-axis. */
3469
    int y; /**< The extra random offset on y-axis. */
3470
  } offset;
3471
  struct {
3472
    float mean[3]; /**< Normalize the image with mean. */
3473
    float std[3];/**< Normalize the image with std. pixel = (pixel - mean) / std */
3474
  } normalize;
3475
} ccv_cnnp_random_jitter_t;
3476
/**
3477
 * Apply random jitter on a image to generate a new image.
3478
 * @param dataframe The dataframe object that contains the original image.
3479
 * @param column_idx The column which contains the original image.
3480
 * @param datatype The final datatype of the image. We only support CCV_32F right now.
3481
 * @param random_jitter The random jitter parameters to be applied to.
3482
 * @param name The name of the new column.
3483
 * @return The index of the newly derived column.
3484
 */
3485
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_image_random_jitter(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const int datatype, const ccv_cnnp_random_jitter_t random_jitter, const char* name);
3486
/**
3487
 * Generate a one-hot tensor off the label from a struct.
3488
 * @param dataframe The dataframe object that contains the label.
3489
 * @param column_idx The column which contains the label (as int).
3490
 * @param structof The offset to the label (as int) from that column. For example, the column
3491
 *        could be a struct and label could be one of the field. You can pass offsetof(S, filename)
3492
 * @param range The range of the label, from [0...range - 1]
3493
 * @param onval The value when it hit.
3494
 * @param offval The value for the others.
3495
 * @param datatype The datatype of the tensor.
3496
 * @param format The format of the tensor.
3497
 * @param name The name of the new column.
3498
 * @return The index of the newly derived column.
3499
 */
3500
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_one_hot(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t structof, const int range, const float onval, const float offval, const int datatype, const int format, const char* name);
3501
/**
3502
 * Generate a scalar tensor (a tensor with one value) off a value from a struct.
3503
 * @param dataframe The dataframe object that contains the value.
3504
 * @param column_idx The column which contains the value (as datatype).
3505
 * @param structof The offset to the label (as int) from that column. For example, the column
3506
 *        could be a struct and label could be one of the field. You can pass offsetof(S, filename)
3507
 * @param from_dt The datatype of the value.
3508
 * @param to_dt The datatype of the tensor.
3509
 * @param format The format of the tensor.
3510
 * @param name The name of the new column.
3511
 * @return The index of the newly derived column.
3512
 */
3513
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_copy_scalar(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t structof, const int from_dt, const int to_dt, const int format, const char* name);
3514
/**
3515
 * Generate vector with ones up to a given length, the rest will be zeros. When applied to batched lengths
3516
 * array, this will generate a matrix of these vectors, squared. The derived column will be a tuple of vectors
3517
 * for the given number of columns.
3518
 * @param dataframe The dataframe object that will contain the matrix.
3519
 * @param column_idxs The columns which contain the sequence lengths (a 1d tensor).
3520
 * @param column_idx_size The number of columns. The derived column will be a tuple of vectors.
3521
 * @param variable_size The size of the final vector can vary, depending on the max length of current batch.
3522
 * @param max_length The absolute max length for inputs.
3523
 * @param name The name of the new column.
3524
 * @return The index of the newly derived column.
3525
 */
3526
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_one_squared(ccv_cnnp_dataframe_t* const dataframe,  const int* const column_idxs, const int column_idx_size, const int variable_size, const int max_length, const char* name);
3527
/**
3528
 * Truncate a given matrix (as a list of vector) to the given size provided by another vector. The truncated
3529
 * column will be a tuple of vectors for the given columns.
3530
 * @param dataframe The dataframe object that will contain the matrix.
3531
 * @param vec_idxs The columns of the given matrix to be truncated.
3532
 * @param vec_idx_size The number of columns for vec_idxs.
3533
 * @param len_idxs The columns of the given sizes as a vector.
3534
 * @param len_idx_size The number of columns for len_idxs.
3535
 * @param name The name of the new column.
3536
 * @return The index of the newly derived column.
3537
 */
3538
CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_truncate(ccv_cnnp_dataframe_t* const dataframe, const int* const vec_idxs, const int vec_idx_size, const int* len_idxs, const int len_idx_size, const char* name);
3539
/**
3540
 * Combine multiple tensors in a column into one tensor. This method can take multiple columns, which
3541
 * will result a tuple of tensors. Each tensor in the tuple is a batched one from a given column.
3542
 * @param dataframe The dataframe contains the columns of tensors to be batched.
3543
 * @param column_idxs The columns that contain the tensors.
3544
 * @param column_idx_size The number of columns that contain the tensors.
3545
 * @param batch_count How many tensors in one column to be batched together.
3546
 * @param group_count We can generate many groups of batched tensor. For example, if you have column A, B, C, each
3547
 *        have different tensors. If group_count is 1, the result tuple will be (A_b, B_b, C_b). If group count is
3548
 *        2, the result tuple will be (A_b1, B_b1, C_b1, A_b2, B_b2, C_b2). A_b1 etc. will still contain the same
3549
 *        number of batch_count tensors.
3550
 * @param format The result format of the tensor. We support simply transformation NCHW <=> NHWC with the source tensor.
3551
 * @return The newly created dataframe with the 0-th column is the tuple of batched tensors.
3552
 */
3553
CCV_WARN_UNUSED(ccv_cnnp_dataframe_t*) ccv_cnnp_dataframe_combine_new(ccv_cnnp_dataframe_t* const dataframe, const int* const column_idxs, const int column_idx_size, const int batch_count, const int group_count, const int format);
3554
3555
/** @} */
3556
3557
/**
3558
 * @page dataframe_csv Why to support comma-separated-values files in dataframe?
3559
 *
3560
 * C can be used as a parser. It usually can be fast. But most of them can be buggy and has bugs that can either crash, be
3561
 * exploited, or simply incorrect. There really isn't much motivation for me to start write a parser, even as simple as
3562
 * for CSV files.
3563
 *
3564
 * However, it does brought to my attention that a full-speed (defined by saturating the PCIx4 for SSD) implementation would
3565
 * be beneficial. I am also started to use nnc in many places that is handy to load a csv file and generate some tensors out
3566
 * of it.
3567
 *
3568
 * This implementation plan to use a variant of the two-pass approach documented in
3569
 * https://www.microsoft.com/en-us/research/uploads/prod/2019/04/chunker-sigmod19.pdf while first implemented in
3570
 * https://github.com/wiseio/paratext. It is differentiated from these two in these particular ways:
3571
 *
3572
 * 1. The first pass will not only find the quotes and even / odd CRLF, but also collect statistics on how many lines assuming
3573
 *    the first CRLF is within quote / outside of the quote;
3574
 *
3575
 * 2. The second pass will do a copy into a continuous page mirrors the original csv file, but null-terminate each column, and
3576
 *    assign the start pointer for each.
3577
 *
3578
 * The speculative approach while interesting, for many-core system implementation, it can be challenging and the worse-case
3579
 * scenario is indeed worse.
3580
 *
3581
 * The implementation itself follows https://tools.ietf.org/html/rfc4180, with only customization of delimiters (so it can support
3582
 * table-separated-values) and quotes (so you can choose between " and '). Escaping only supports double-quotes for whatever quote
3583
 * symbol you elect.
3584
 */
3585
3586
/**
3587
 * @defgroup level_5_dataframe_csv Dataframe for Comma-Separated-Values Files
3588
 * @{
3589
 */
3590
enum {
3591
  /* It is a file pointer. */
3592
  CCV_CNNP_DATAFRAME_CSV_FILE = 0,
3593
  /* It is a pointer to a memory. */
3594
  CCV_CNNP_DATAFRAME_CSV_MEMORY = 1,
3595
};
3596
3597
/**
3598
 * Create a dataframe object that read a CSV file. This will eagerly load the file into memory, parse each row / column
3599
 * into null-terminated strings, you can later convert these into numerics if needed. Each column will be a column indexed
3600
 * from 0 to column_size - 1. If there are syntax errors, the parser will make guesses and continue to parse to its best knowledge.
3601
 * If it cannot, we will return null for the object. We support both CRLF, LF, and LFCR termination.
3602
 * @param input The FILE handle for on-disk file, or the pointer to the region of the memory we are going to use.
3603
 * @param type The type of either `CCV_CNNP_DATAFRAME_CSV_FILE` or `CCV_CNNP_DATAFRAME_CSV_MEMORY`
3604
 * @param len The length of the memory region, if it is `CCV_CNNP_DATAFRAME_CSV_MEMORY`.
3605
 * @param delim The delim, it is ',' by default (if you provided '\0')
3606
 * @param quote The quote for escape strings, it is '"' by default (if you provided '\0')
3607
 * @param include_header whether to parse the header seperately. 1 means we treat the first line as header.
3608
 * @param column_size The number of columns in the resulted dataframe.
3609
 * @return A dataframe that can represent the csv file. nullptr if failed.
3610
 */
3611
CCV_WARN_UNUSED(ccv_cnnp_dataframe_t*) ccv_cnnp_dataframe_from_csv_new(void* const input, const int type, const size_t len, const char delim, const char quote, const int include_header, int* const column_size);
3612
3613
/** @} */
3614
3615
/**
3616
 * @page model Models, layers, and Keras
3617
 *
3618
 * With Keras API in mind, this model implementation essentially is a light-weight way to group neural network layers
3619
 * together. This is a rare case in NNC (or ccv in general) where Object-Oriented programming makes sense. I borrowed
3620
 * heavily from Objective-C / C++ to implement this Object-Oriented interface.
3621
 *
3622
 * Now back to elaboration of the Model interface. It is specifically designed with Keras in mind, asking question:
3623
 * If we are going to build Keras high-level API in any languages (Ruby, Python, Swift, Julia), what's the underlying
3624
 * C interface would look like? Here is your answer (hint: it looks very much like just Python Keras API).
3625
 *
3626
 * A model consists of a set of inputs and outputs. This sounds very much like what "Command" is in Level-1 APIs,
3627
 * however, they are different: a model is stateful. For example, a convolution command takes 3 inputs: image, kernel
3628
 * weight and bias, has 1 output: image. A convolution model takes 1 input: image, and 1 output: image. kernel weight
3629
 * and bias are internal states to the model (in Keras, it is called "layer" for convolution, and model means a set of
3630
 * layers. In NNC, that kind of differentiation feels superficial, therefore, a layer is a model).
3631
 *
3632
 * A model can be combined, and a new model can be a combination of other models.
3633
 *
3634
 * The simpler composed model is the sequential model. A sequential model is a model that consists a sequence of models
3635
 * that contains one input and one output. The output of the earlier model feed into the later one, thus, a sequential
3636
 * evaluation path.
3637
 */
3638
3639
/**
3640
 * @defgroup level_5_model Model API
3641
 * @{
3642
 */
3643
3644
/**
3645
 * model type is an abstract type, you won't interact with a naked model ever.
3646
 */
3647
typedef struct ccv_cnnp_model_s ccv_cnnp_model_t;
3648
/**
3649
 * With this type, now in NNC, we have 4 types that represents a "tensor":
3650
 *
3651
 * 1. ccv_nnc_tensor_t / ccv_nnc_tensor_view_t / ccv_nnc_tensor_multiview_t: a concrete tensor with memory allocated.
3652
 *
3653
 * 2. ccv_nnc_tensor_symbol_t: a symbol representation of a tensor, with its data layout, device affinity, and type
3654
 *                             specified.
3655
 *
3656
 * 3. ccv_nnc_tensor_variable_t: in dynamic graph, this represents a concrete tensor with memory allocated, but also
3657
 *                               associated with a recorded execution.
3658
 *
3659
 * 4. ccv_cnnp_model_io_t: this is the most flexible one. No data layout, device affinity or type specified. It can even
3660
 *                         represent a list of tensors rather than just one. This is a handle used by model API to
3661
 *                         associates model inputs / outputs.
3662
 */
3663
typedef struct ccv_cnnp_model_io_s* ccv_cnnp_model_io_t;
3664
/**
3665
 * Create a naked input.
3666
 * @return A ccv_cnnp_model_io_t represents an input.
3667
 */
3668
CCV_WARN_UNUSED(ccv_cnnp_model_io_t) ccv_cnnp_input(void);
3669
/**
3670
 * This method mimics Keras callable for model (thus, override __call__ method in Python class).
3671
 * @param model A model that we can apply a set of inputs to get one output.
3672
 * @param inputs The set of inputs.
3673
 * @param input_size The size of inputs array.
3674
 * @return A ccv_cnnp_model_io_t that represents the output of the given model.
3675
 */
3676
CCV_WARN_UNUSED(ccv_cnnp_model_io_t) ccv_cnnp_model_apply(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t* const inputs, const int input_size);
3677
/**
3678
 * This method adds non-functional dependencies for a model IO. "Non-functional dependencies" means
3679
 * their outputs are not used for this IO, however, their existence establishes a partial ordering
3680
 * for the execution. In that way, they act as "inputs" but not functional.
3681
 * @param model_io A model IO for which we will add additional non-functional dependencies.
3682
 * @param dependencies The set of dependencies.
3683
 * @param dependency_size The size of dependencies array.
3684
 */
3685
void ccv_cnnp_model_add_dependencies(ccv_cnnp_model_io_t model_io, const ccv_cnnp_model_io_t* const dependencies, const int dependency_size);
3686
enum {
3687
  /* Select only weights, no bias terms. */
3688
  CCV_CNNP_PARAMETER_SELECT_WEIGHT = 0,
3689
  /* Select bias terms, no weights. */
3690
  CCV_CNNP_PARAMETER_SELECT_BIAS = 1,
3691
};
3692
/**
3693
 * This method exposes parameter for a model out as a potential input for another model. Since
3694
 * it is a ccv_cnnp_model_io_t, it can also be used by other methods.
3695
 * @param model A model that we can extract parameters out.
3696
 * @param selector The selector for a parameter. ALL_PARAMETERS means all parameters, or you can select CCV_CNNP_PARAMETER_SELECT_WEIGHT or CCV_CNNP_PARAMETER_SELECT_BIAS.
3697
 * @param index The index into a parameter. ALL_PARAMETERS means all parameters.
3698
 */
3699
CCV_WARN_UNUSED(ccv_cnnp_model_io_t) ccv_cnnp_model_parameters(ccv_cnnp_model_t* const model, const int selector, const int index);
3700
/**
3701
 * A notification function such that a model can be notified.
3702
 * This is useful to broadcast a message to all models as sub-model of someone else.
3703
 */
3704
typedef void (*ccv_cnnp_model_notify_f)(const ccv_cnnp_model_t* const model, const int tag, void* const payload, void* const context);
3705
/**
3706
 * Hook into a model such that when there is a notification, the callback will receive it.
3707
 * @param model A model that can be notified.
3708
 * @param func The callback function.
3709
 * @param context The context to be passed along to the callback function.
3710
 **/
3711
void ccv_cnnp_model_notify_hook(ccv_cnnp_model_t* const model, ccv_cnnp_model_notify_f func, void* const context);
3712
/**
3713
 * Notify a model and its sub-models with a tag and a payload. This will be triggered
3714
 * synchronously.
3715
 * @param model A model that will be notified.
3716
 * @param tag An integer to help identify what kind of notification.
3717
 * @param payload A payload pointer that you can carry arbitrary information.
3718
 */
3719
void ccv_cnnp_model_notify(const ccv_cnnp_model_t* const model, const int tag, void* const payload);
3720
/**
3721
 * This method name is deceiving. It return a composed model, not a naked model.
3722
 * This composed model takes set of inputs, and run through various other models to arrive at
3723
 * the set of outputs.
3724
 * @param inputs The set of inputs.
3725
 * @param input_size The size of inputs array.
3726
 * @param outputs The set of outputs.
3727
 * @param output_size The size of outputs array.
3728
 * @param is_trainable Whether the parameters of this model can be trained. -1 means inherent from parent.
3729
 * @param name The unique name of the model.
3730
 * @return A composed model that takes inputs, and generate the outputs.
3731
 */
3732
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_model_new(const ccv_cnnp_model_io_t* const inputs, const int input_size, const ccv_cnnp_model_io_t* const outputs, const int output_size, const int is_trainable, const char* const name);
3733
/**
3734
 * This method returns a sequential model, which composed from a sequence of models.
3735
 * @param models The list of models, that takes one input, and emit one output, feeding into the subsequent one.
3736
 * @param model_size The size of the list.
3737
 * @param is_trainable Whether the parameters of this model can be trained.
3738
 * @param name The unique name of the model.
3739
 * @return A composed model that applies these models one by one in sequence.
3740
 */
3741
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_sequential_new(ccv_cnnp_model_t* const* const models, const int model_size, const int is_trainable, const char* const name);
3742
/**
3743
 * A model generation function to be called for dynamic models.
3744
 */
3745
typedef ccv_cnnp_model_t* (*ccv_cnnp_model_dynamic_f)(const ccv_nnc_tensor_param_t* const inputs, const int input_size, void* const context);
3746
/**
3747
 * This method returns a model that will be recreated if it is recompiled. Put it this way, you can call
3748
 * ccv_cnnp_model_compile multiple times with different inputs and input size, however, the model will
3749
 * only be recompiled to some extent. For example, if you called ccv_cnnp_reshape, the shape is determined
3750
 * at the moment you create that model, recompilation won't change. There are two ways to workaround this:
3751
 * 1. Use models that doesn't have explicit shape specified, for example, ccv_cnnp_dense, and avoid models
3752
 *    that is not as flexible, such as ccv_cnnp_reshape, or ccv_cnnp_cmd_exec.
3753
 * 2. Create with ccv_cnnp_dynamic_new such that the model will be recreated again whenever recompile.
3754
 * @param func The function to be called to create the model.
3755
 * @param context The context used along to create the model.
3756
 * @param name The unique name of the model.
3757
 * @return A model object that is yet to be created until build.
3758
 */
3759
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_dynamic_new(ccv_cnnp_model_dynamic_f func, void* const context, const char* const name);
3760
/**
3761
 * Prepare the model to be trained, the input specifies the batch size etc.
3762
 * Input size technically is not needed, here is a safety check.
3763
 * @param model The model to be compiled.
3764
 * @param inputs The tensor parameters for the model's inputs, that can be used to derive all tensor shapes.
3765
 * @param input_size The size of the inputs array.
3766
 * @param minimizer The wrapped command that represents a particular optimization strategy.
3767
 * @param loss The wrapped command that computes the loss function.
3768
 */
3769
void ccv_cnnp_model_compile(ccv_cnnp_model_t* const model, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_cmd_t minimizer, const ccv_nnc_cmd_t loss);
3770
/**
3771
 * Absorb a new model into the existing model. This requires the new model has exactly the same parameters
3772
 * but other dimensionality's can change. The new model has to not be compiled yet, its life-cycle management
3773
 * will be take over by the existing model. You don't need to free it separately.
3774
 * @param model The existing model.
3775
 * @param init The new model.
3776
 * @param inputs The tensor parameters for the model's inputs, that can be used to derive all tensor shapes.
3777
 * @param input_size The size of the inputs array.
3778
 */
3779
void ccv_cnnp_model_absorb(ccv_cnnp_model_t* const model, ccv_cnnp_model_t* const init, const ccv_nnc_tensor_param_t* const inputs, const int input_size);
3780
/**
3781
 * Create a copy of an existing model.
3782
 * @param model The existing model.
3783
 * @param is_trainable Whether the parameters of this model can be trained.
3784
 * @return The new model that is exactly the same copy of the old one.
3785
 */
3786
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_model_copy(const ccv_cnnp_model_t* const model, const int is_trainable);
3787
/**
3788
 * Get the output size of the model.
3789
 * @param model The existing model.
3790
 * @return The output size of the model.
3791
 */
3792
CCV_WARN_UNUSED(int) ccv_cnnp_model_output_size(const ccv_cnnp_model_t* const model);
3793
/**
3794
 * Get whether the model is trainable.
3795
 * @param model The existing model.
3796
 * @return Whether the model is trainable, -1 is inherited from its parent.
3797
 */
3798
CCV_WARN_UNUSED(int) ccv_cnnp_model_is_trainable(const ccv_cnnp_model_t* const model);
3799
/**
3800
 * Compute the shape of the output tensor after the model applied to the input.
3801
 * This can only be called after the model is compiled with proper input parameters.
3802
 * @param model The model to compute the output shapes.
3803
 * @param outputs The computed tensor parameters in the output.
3804
 * @param output_size The size of the output array, it has to match the model's output.
3805
 */
3806
void ccv_cnnp_model_tensor_auto(ccv_cnnp_model_t* const model, ccv_nnc_tensor_param_t* const outputs, const int output_size);
3807
/**
3808
 * Generate output that can be parsed by GraphViz (DOT language).
3809
 * @param model The composed model.
3810
 * @param flags Either CCV_NNC_SHORT_DOT_GRAPH or CCV_NNC_LONG_DOT_GRAPH
3811
 * @param outs The output file streams.
3812
 * @param out_size The size of output file stream array.
3813
 */
3814
void ccv_cnnp_model_dot(const ccv_cnnp_model_t* const model, const int flags, FILE** const outs, const int out_size);
3815
/**
3816
 * Provide a hook for upper level to do custom formatting of a given model. You can implement logic
3817
 * to format the model into protobuf, or json. This is only useful after model is compiled.
3818
 * @param model The composed model.
3819
 * @param format_fn The format callback to be called on every node.
3820
 * @param context The context that will be passed to the callback.
3821
 */
3822
void ccv_cnnp_model_format(const ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_format_f format_fn, void* const context);
3823
/**
3824
 * Fit a model to a given input / output. This is a combination of running ccv_cnnp_model_evaluate /
3825
 * ccv_cnnp_model_backward / ccv_cnnp_model_apply_gradients. The difference is that when calling
3826
 * individual functions, the graph is compiled piece by piece, thus, is less efficient than calling
3827
 * ccv_cnnp_model_fit directly. However, having the separate functions makes this implementation much
3828
 * more versatile, for example, can accumulate gradients for multiple batches, or using custom gradients
3829
 * etc.
3830
 * @param model The composed model.
3831
 * @param inputs The input tensors.
3832
 * @param input_size The size of the input tensors array.
3833
 * @param fits The target tensors.
3834
 * @param fit_size The size of the target tensors array.
3835
 * @param outputs The actual outputs from the model.
3836
 * @param output_size The size of the outputs array.
3837
 * @param tensor_tape An opaque tensor tape object to "backpropagate through time".
3838
 * @param stream_context The stream where the fit can be executed upon.
3839
 */
3840
void ccv_cnnp_model_fit(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const fits, const int fit_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context);
3841
enum {
3842
  /**
3843
   * Don't disable any outgrad.
3844
   */
3845
  CCV_CNNP_DISABLE_OUTGRAD_NONE = (uint64_t)0,
3846
  /**
3847
   * Disable all inputs' outgrads.
3848
   */
3849
  CCV_CNNP_DISABLE_OUTGRAD_ALL = (uint64_t)(int64_t)-1,
3850
};
3851
/**
3852
 * The parameters for how evaluation should behave.
3853
 */
3854
typedef struct {
3855
  int requires_grad; /**< Whether we need to keep intermediate results for gradient computations. */
3856
  int is_test; /**< Whether we evaluate it as test, or just as forward pass of the training process. */
3857
  uint64_t disable_outgrad; /**< Whether we can compute outflow gradients when call ccv_cnnp_model_backward later, this is a bitmask, you can mark for which input the outgrad is disabled. */
3858
} ccv_cnnp_evaluate_param_t;
3859
/**
3860
 * Evaluate model with output.
3861
 * @param model The composed model.
3862
 * @param params The parameters for how evaluation should behave.
3863
 * @param inputs The input tensors.
3864
 * @param input_size The size of the input tensors array.
3865
 * @param outputs The actual outputs from the model.
3866
 * @param output_size The size of the outputs array.
3867
 * @param tensor_tape An opaque tensor tape object to "backpropagate through time".
3868
 * @param stream_context The stream where the evaluation can be executed upon.
3869
 */
3870
void ccv_cnnp_model_evaluate(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context);
3871
/**
3872
 * Dryrun the model with inputs / outputs. This runs the evaluation loop up until the actual execution.
3873
 * @param model The composed model.
3874
 * @param params The parameters for how evaluation should behave.
3875
 * @param inputs The input tensors.
3876
 * @param input_size The size of the input tensors array.
3877
 * @param outputs The actual outputs from the model.
3878
 * @param output_size The size of the outputs array.
3879
 */
3880
void ccv_cnnp_model_dry_run(ccv_cnnp_model_t* const model, const ccv_cnnp_evaluate_param_t params, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size);
3881
/**
3882
 * Based on the input gradients, compute the output gradients (w.r.t. the inputs). This also adds parameter gradients.
3883
 * @param model The composed model.
3884
 * @param ingrads The input gradients.
3885
 * @param ingrad_size The size of the input gradients array.
3886
 * @param outgrads The output gradients (w.r.t. the inputs).
3887
 * @param outgrad_size The size of the output gradients array.
3888
 * @param tensor_tape An opaque tensor tape object to "backpropagate through time".
3889
 * @param stream_context The stream where the gradient computation can be executed upon.
3890
 */
3891
void ccv_cnnp_model_backward(ccv_cnnp_model_t* const model, ccv_nnc_tensor_t* const* const ingrads, const int ingrad_size, ccv_nnc_tensor_t* const* const outgrads, const int outgrad_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context);
3892
/**
3893
 * Apply the computed gradients to the parameter tensors.
3894
 * @param model The composed model.
3895
 * @param stream_context The stream where the gradient computation can be executed upon.
3896
 */
3897
void ccv_cnnp_model_apply_gradients(ccv_cnnp_model_t* const model, ccv_nnc_stream_context_t* const stream_context);
3898
/**
3899
 * Cancel execution of a model, whether it is forward / backward or gradient application pass. You need to make
3900
 * sure the model is currently executing when cancelling. This method will set a flag internally and the
3901
 * execution will check that flag when push compute on the computation device and abort if it is cancelled.
3902
 * When you call other model execution method again, this cancellation won't in effect and you need to call
3903
 * cancel again.
3904
 * @param model The composed model.
3905
 */
3906
void ccv_cnnp_model_cancel(ccv_cnnp_model_t* const model);
3907
/**
3908
 * Set flags for the exec symbols created by the model. See CCV_NNC_GRAPH_EXEC_* for details.
3909
 * Note that practically right now, only DISABLE_OPT is useful.
3910
 * @param model The composed model before apply / evaluate.
3911
 * @param flags The flags to set on all exec symbols potentially associated with this model.
3912
 */
3913
void ccv_cnnp_model_set_flags(ccv_cnnp_model_t* const model, const int flags);
3914
/**
3915
 * Get flags for the exec symbols created by the model. See CCV_NNC_GRAPH_EXEC_* for details.
3916
 * Note that practically right now, only DISABLE_OPT is useful.
3917
 * @param model The composed model before apply / evaluate.
3918
 */
3919
CCV_WARN_UNUSED(int) ccv_cnnp_model_flags(ccv_cnnp_model_t* const model);
3920
enum {
3921
  /**
3922
   * This is the default flag, if the model is not initialized, will attempt to read from the disk.
3923
   * Otherwise, will persist existing parameters to disk.
3924
   */
3925
  CCV_CNNP_MODEL_CHECKPOINT_READ_WRITE,
3926
  /**
3927
   * Only read parameters out of disk, even it is already initialized.
3928
   */
3929
  CCV_CNNP_MODEL_CHECKPOINT_READ_ONLY,
3930
  /**
3931
   * Only write parameters to disk.
3932
   */
3933
  CCV_CNNP_MODEL_CHECKPOINT_WRITE_ONLY,
3934
};
3935
/**
3936
 * Write model's tensors to a SQLite database with a given name. Note that we specifically say
3937
 * "model's tensors" because it doesn't persist the model's structure. Hence, you shouldn't
3938
 * expect us to take a name to then have a fully functional model restored from there. You still
3939
 * need to construct the model. This method only write the tensors (weights and other internal ones)
3940
 * to disk.
3941
 * @param model The model.
3942
 * @param handle The SQLite handle.
3943
 * @param name The name to find the tensors related to the model in the database.
3944
 * @param options The IO options that can do data encode / decode before persistence.
3945
 * @return CCV_IO_FINAL for success, otherwise error.
3946
 */
3947
int ccv_cnnp_model_write(const ccv_cnnp_model_t* const model, void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options);
3948
/**
3949
 * Write model's tensors to a SQLite database implicitly with "" name. This is a convenience method
3950
 * to ccv_cnnp_model_write particularly useful at training time.
3951
 * @param model The composed model.
3952
 * @param fn The file name.
3953
 * @param options The IO options that can do data encode / decode before persistence.
3954
 */
3955
void ccv_cnnp_model_write_to_file(ccv_cnnp_model_t* const model, const char* const fn, const ccv_nnc_tensor_io_option_t* const options);
3956
/**
3957
 * The prototype for the writer function when exporting parameters out.
3958
 * @param tensor The tensor to be written to disk.
3959
 * @param sql The sql to be executed.
3960
 * @param handle The custom handle that you passed in from ``ccv_cnnp_model_write`` method.
3961
 * @param options The IO options that can do data encode / decode before persistence.
3962
 * @param name The name give to a particular parameter.
3963
 */
3964
typedef int (*ccv_cnnp_model_io_writer_f)(const ccv_nnc_tensor_t* const tensor, const char* const sql, void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options);
3965
/**
3966
 * The prototype for the reader function to load parameters.
3967
 * @param handle The custom handle that you passed in from ``ccv_cnnp_model_read`` method.
3968
 * @param name The name give to a particular parameter.
3969
 * @param options The IO options that can do data encode / decode before persistence.
3970
 * @param params The recommended tensor params.
3971
 * @param tensor_out The tensor to be loaded.
3972
 */
3973
typedef int (*ccv_cnnp_model_io_reader_f)(void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options, const ccv_nnc_tensor_param_t params, ccv_nnc_tensor_t** const tensor_out);
3974
/**
3975
 * Set IO interceptor for loading weights from / to the model to replace the default SQLite reader / writer.
3976
 * @param model The model.
3977
 * @param reader The reader function for loading weights.
3978
 * @param writer The writer function for exporting weights out.
3979
 */
3980
void ccv_cnnp_model_set_io(ccv_cnnp_model_t* const model, ccv_cnnp_model_io_reader_f reader, ccv_cnnp_model_io_writer_f writer);
3981
/**
3982
 * Read model's tensors from a SQLite database with a given name.
3983
 * @param handle The SQLite handle.
3984
 * @param name The name to find the tensors related to the model in the database.
3985
 * @param options The IO options that can do data encode / decode before persistence.
3986
 * @param model_out The model which you want to restore the tensors. It should have the same
3987
 *                  structure as the one in write to.
3988
 * @return CCV_IO_FINAL for success, otherwise error.
3989
 */
3990
int ccv_cnnp_model_read(void* const handle, const char* const name, const ccv_nnc_tensor_io_option_t* const options, const ccv_cnnp_model_t* const model_out);
3991
/**
3992
 * Read model's tensors to a SQLite database implicitly with "" name. This is a convenience method
3993
 * to ccv_cnnp_model_read particularly useful at training time.
3994
 * @param fn The file name.
3995
 * @param options The IO options that can do data encode / decode before persistence.
3996
 * @param model_out The model which you want to restore the tensors. It should have the same
3997
 *                  structure as the one in write to.
3998
 */
3999
void ccv_cnnp_model_read_from_file(const char* const fn, const ccv_nnc_tensor_io_option_t* const options, const ccv_cnnp_model_t* const model_out);
4000
/**
4001
 * Apply data parallel to the composed model. This method has to be called before we call either
4002
 * evaluate or fit and after the model is compiled.
4003
 * @param model The composed model.
4004
 * @param parallel Number of devices we want to run on. 0 will use all devices available. 1 will skip.
4005
 */
4006
void ccv_cnnp_model_set_data_parallel(ccv_cnnp_model_t* const model, const int parallel);
4007
/**
4008
 * Set the maximum operator-level concurrency. This is a soft-limit, e.g. if you have operations on
4009
 * different devices, they are concurrent.
4010
 * @param model The composed model.
4011
 * @param max_stream_count The maximum concurrency if the model schedules internal streams. 0 is no limit.
4012
 */
4013
void ccv_cnnp_model_set_max_concurrency(ccv_cnnp_model_t* const model, const int max_stream_count);
4014
/**
4015
 * Apply memory compression to the composed model. The memory compression technique can reduce memory
4016
 * usage up to 75% comparing with raw mix-precision model during training time.
4017
 * @param model The composed model.
4018
 * @param memory_compression Whether to enable the memory compression (1 - enable, 0 - disable (default))
4019
 */
4020
void ccv_cnnp_model_set_memory_compression(ccv_cnnp_model_t* const model, const int memory_compression);
4021
/**
4022
 * Apply memory reduction to the composed model. The memory reduction technique can reduce memory
4023
 * usage losslessly. Right now, the supported memory reduction technique is to redo datatype conversion.
4024
 * @param model The composed model.
4025
 * @param memory_reduction Whether to enable the memory reduction (1 - enable, 0 - disable (default))
4026
 */
4027
void ccv_cnnp_model_set_memory_reduction(ccv_cnnp_model_t* const model, const int memory_reduction);
4028
/**
4029
 * Set the computations in this model to be gradient checkpointing. This can be strategically applied
4030
 * to models within the higher-level composed models such that these models can effectively save 0
4031
 * gradients during backprop with the cost of running forward pass twice.
4032
 * @param model The model that will turn on gradient checkpointing.
4033
 * @param gradient_checkpointing Whether to enable gradient checkpointing (1 - enable, 0 - disable (default))
4034
 */
4035
void ccv_cnnp_model_set_gradient_checkpointing(ccv_cnnp_model_t* const model, const int gradient_checkpointing);
4036
/**
4037
 * Get whether gradient checkpointing is enabled or not for this model.
4038
 * @param model The model that will turn on gradient checkpointing.
4039
 */
4040
int ccv_cnnp_model_gradient_checkpointing(ccv_cnnp_model_t* const model);
4041
/**
4042
 * Set compile parameters on the model so it compiles the graph with the said parameters.
4043
 * @param model The composed model.
4044
 * @param compile_params A ccv_nnc_symbolic_graph_compile_param_t struct defines compilation parameters.
4045
 */
4046
void ccv_cnnp_model_set_compile_params(ccv_cnnp_model_t* const model, const ccv_nnc_symbolic_graph_compile_param_t compile_params);
4047
/**
4048
 * This method set the max workspace size. If the graph is already compiled. It will re-run
4049
 * autotune to use the new workspace size to find the best algorithm.
4050
 * @param model The composed model.
4051
 * @param workspace_size The size in bytes that we can use as workspace (scratch memory).
4052
 */
4053
void ccv_cnnp_model_set_workspace_size(ccv_cnnp_model_t* const model, size_t workspace_size);
4054
/**
4055
 * This method returns the current max workspace size.
4056
 * @param model The composed model.
4057
 */
4058
size_t ccv_cnnp_model_workspace_size(ccv_cnnp_model_t* const model);
4059
/**
4060
 * Set a parameter that is specified by the parameter span. This will override whatever value in that
4061
 * parameter. The given tensor should match the dimension of the parameter. It doesn't matter whether
4062
 * the given tensor is on CPU or GPU, it will be copied over. This method is limited, it can only set
4063
 * tensor once the model is compiled.
4064
 * @param model The composed model.
4065
 * @param parameter The parameter that is used to specify which parameter to override.
4066
 * @param tensor The tensor contains the value we want to copy over.
4067
 */
4068
void ccv_cnnp_model_set_parameter(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, const ccv_nnc_tensor_t* const tensor);
4069
/**
4070
 * Copy a parameter that is specified by the parameter span out of a model. This will override the value
4071
 * in the tensor you provided. The given tensor should match the dimension of the parameter and should
4072
 * already be allocated. It doesn't matter whether the given tensor is on CPU or GPU.
4073
 * @param model The composed model.
4074
 * @param parameter The parameter that is used to specify which parameter to copy from.
4075
 * @param tensor The tensor that receives value.
4076
 */
4077
void ccv_cnnp_model_parameter_copy(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter, ccv_nnc_tensor_t* const tensor);
4078
/**
4079
 * Get the ccv_nnc_tensor_param_t for a particular parameter of a model.
4080
 * @param model The composed model.
4081
 * @param parameter The parameter that is used to specify which parameter to retrieve ccv_nnc_tensor_param_t.
4082
 * @return The ccv_nnc_tensor_param_t structure that specifies a tensor shape.
4083
 */
4084
CCV_WARN_UNUSED(ccv_nnc_tensor_param_t) ccv_cnnp_model_parameter_tensor_params(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter);
4085
/**
4086
 * Get the internal name for a particular parameter of a model.
4087
 * @param model The composed model.
4088
 * @param parameter The parameter that is used to specify which parameter to retrieve ccv_nnc_tensor_param_t.
4089
 * @return The name string for internal name, its life-cycle is managed by the model, and valid until the next invocation of the model either another call or free.
4090
 */
4091
CCV_WARN_UNUSED(const char*) ccv_cnnp_model_parameter_name(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameter);
4092
/**
4093
 * This method returns the number of parameters for this particular model. Note that this is only available after
4094
 * model is compiled.
4095
 * @param model A model that is compiled.
4096
 * @return The number of parameters.
4097
 */
4098
CCV_WARN_UNUSED(int) ccv_cnnp_model_parameter_count(ccv_cnnp_model_t* const model);
4099
/**
4100
 * This method returns the total byte size of parameters for this particular model. Note that this is only available after
4101
 * model is compiled.
4102
 * @param model A model that is compiled.
4103
 * @return The total byte size of parameters.
4104
 */
4105
CCV_WARN_UNUSED(uint64_t) ccv_cnnp_model_parameters_size(ccv_cnnp_model_t* const model);
4106
/**
4107
 * This method moved parameters of this particular model to designated device. It invalidates the parameters
4108
 * on a given model and requires to move back if the model needs to be used later.
4109
 * You can consider this as a counterpart for ccv_cnnp_model_parameter_copy, but operates on the whole model.
4110
 * @param model A model that is compiled.
4111
 * @param names The name associated with the tensor parameter.
4112
 * @param tensors The tensor associated with this parameter.
4113
 * @param count The size of the array provided for names and tensors, this should match ccv_cnnp_model_parameter_count call.
4114
 * @param type Either CCV_TENSOR_GPU_MEMORY or CCV_TENSOR_CPU_MEMORY.
4115
 * @return 1 for success.
4116
 */
4117
CCV_WARN_UNUSED(int) ccv_cnnp_model_parameters_move(ccv_cnnp_model_t* const model, char** const names, ccv_nnc_tensor_t** const tensors, const int count, const int type);
4118
/**
4119
 * This method moves or copies parameters from the array to this particular model to designated device.
4120
 * If it is a move, it invalidates the parameters in the array and leaves a "skeleton" tensor.
4121
 * You can consider this as a counterpart for ccv_cnnp_model_set_parameter, but operates on the whole model.
4122
 * @param model A model that is compiled.
4123
 * @param names The name associated with the tensor parameter.
4124
 * @param tensors The tensor associated with this parameter.
4125
 * @param count The size of the array provided for names and tensors, this should match ccv_cnnp_model_parameter_count call.
4126
 * @param invalidates Whether to invalidate the original tensor (1 - to invalidate, use move semantics if possible).
4127
 */
4128
void ccv_cnnp_model_set_parameters_from_key_values(ccv_cnnp_model_t* const model, char* const* const names, ccv_nnc_tensor_t** const tensors, const int count, const int invalidates);
4129
/**
4130
 * Use this to loop over and if the parameter matches, return 1.
4131
 */
4132
typedef int (*ccv_cnnp_model_parameters_filter_f)(const ccv_cnnp_model_t* const model, const char* const name, void* const context);
4133
/**
4134
 * Loop over a compiled model to find a parameter to either write or modify.
4135
 * @param model A model that is compiled.
4136
 * @param filter The callback that determines whether this parameter matches.
4137
 * @param context The context to be passed along with the callback.
4138
 * @return an array of ccv_cnnp_model_io_t.
4139
 */
4140
CCV_WARN_UNUSED(ccv_array_t*) ccv_cnnp_model_parameters_filter(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f filter, void* const context);
4141
/**
4142
 * Loop over a compiled model to find a parameter to either write or modify.
4143
 * @param model A model that is compiled.
4144
 * @param first The callback that determines whether a parameter is found.
4145
 * @param context The context to be passed along with the callback.
4146
 * @return a ccv_cnnp_model_io_t or 0 if not found.
4147
 */
4148
CCV_WARN_UNUSED(ccv_cnnp_model_io_t) ccv_cnnp_model_parameter_first(ccv_cnnp_model_t* const model, ccv_cnnp_model_parameters_filter_f first, void* const context);
4149
/**
4150
 * Loop over a compiled model to find a parameter that is not initialized.
4151
 * @param model A model that is compiled.
4152
 * @return a ccv_cnnp_model_io_t or 0 if not found.
4153
 */
4154
CCV_WARN_UNUSED(ccv_cnnp_model_io_t) ccv_cnnp_model_parameter_first_uninit(ccv_cnnp_model_t* const model);
4155
/**
4156
 * Set parameters from another model. This will override whatever values in these parameters. The
4157
 * given parameters from another model should match the dimension of the parameter. It doesn't matter
4158
 * whether the given tensor is on CPU or GPU. This method can only set when both models are compiled.
4159
 * @param model The composed model to be set on parameters.
4160
 * @param parameters The parameters to be override.
4161
 * @param from_model The model to copy parameters from.
4162
 * @param from_parameters The parameters to be copied from.
4163
 */
4164
void ccv_cnnp_model_set_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters);
4165
4166
/**
4167
 * @param context The context pass to the share method.
4168
 * @param source_name The name of the parameter from the from model.
4169
 * @param updated_name The name of the parameter from the model. You can update the value.
4170
 * @param provided_size The size of the updated_name buffer.
4171
 * @return 0 if succeed. -1 if failed.
4172
 */
4173
typedef int(*ccv_cnnp_model_parameters_renamer_f)(void* const context, const char* const source_name, char* const updated_name, const size_t provided_size);
4174
/**
4175
 * Share parameters between two models. This is a very specific setup to enable memory optimization
4176
 * by sharing parameter weights between two models. The models can be different as long as the weights
4177
 * match. The model is responsible to keep from_model alive / from destroyed. There is no refcount.
4178
 * Besides using the parameters to identify, you can also use the given block to provide name match.
4179
 * @param model The composed model to be set on parameters.
4180
 * @param parameters The parameters to be override.
4181
 * @param from_model The model to copy parameters from.
4182
 * @param from_parameters The parameters to be shared from.
4183
 * @param renamer The provided rename function that can get the new name from the from_parameters.
4184
 * @param context The context for renamer function.
4185
 */
4186
void ccv_cnnp_model_share_parameters(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters, ccv_cnnp_model_parameters_renamer_f renamer, void* const context);
4187
/**
4188
 * Process parameters such as exponential averaging.
4189
 * parameters = zip(from_parameters, to_parameters).map { cmd(to_parameter, from_parameter) }
4190
 * The order is selected in such way because many of our commands only support inplace op if the first
4191
 * parameter matches.
4192
 * @param model The composed model to have parameters zip mapped.
4193
 * @param parameters The parameters to be written (and read).
4194
 * @param cmd The command to apply on the parameters.
4195
 * @param hint The hint supplied to the cmd.
4196
 * @param flags The flags supplied to the cmd.
4197
 * @param aux_ins Additional inputs supplied to the cmd.
4198
 * @param aux_in_size The size of additional inputs supplied to the cmd.
4199
 * @param aux_outs Additional outputs supplied to the cmd.
4200
 * @param aux_out_size The size of additional outputs supplied to the cmd.
4201
 * @param stream_context The stream context to be associated with.
4202
 * @param from_model The other composed model to have parameters zipped.
4203
 * @param from_parameters The parameters to be read.
4204
 */
4205
void ccv_cnnp_model_parameters_zip_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context, const ccv_cnnp_model_t* const from_model, const ccv_cnnp_model_io_t from_parameters);
4206
/**
4207
 * Process parameters such as clipping. parameters = parameters.map { cmd(parameter) }
4208
 * @param model The composed model to have parameters mapped.
4209
 * @param parameters The parameters to be mapped.
4210
 * @param cmd The command to apply on the parameters.
4211
 * @param hint The hint supplied to the cmd.
4212
 * @param flags The flags supplied to the cmd.
4213
 * @param aux_ins Additional inputs supplied to the cmd.
4214
 * @param aux_in_size The size of additional inputs supplied to the cmd.
4215
 * @param aux_outs Additional outputs supplied to the cmd.
4216
 * @param aux_out_size The size of additional outputs supplied to the cmd.
4217
 * @param stream_context The stream context to be associated with.
4218
 */
4219
void ccv_cnnp_model_parameters_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context);
4220
/**
4221
 * Process parameter gradients such as normalization. parameters.grad = parameters.apply { cmd(parameter.grad) }
4222
 * @param model The composed model to have parameters mapped.
4223
 * @param parameters The parameters to be mapped.
4224
 * @param cmd The command to apply on the parameters.
4225
 * @param hint The hint supplied to the cmd.
4226
 * @param flags The flags supplied to the cmd.
4227
 * @param aux_ins Additional inputs supplied to the cmd.
4228
 * @param aux_in_size The size of additional inputs supplied to the cmd.
4229
 * @param aux_outs Additional outputs supplied to the cmd.
4230
 * @param aux_out_size The size of additional outputs supplied to the cmd.
4231
 * @param stream_context The stream context to be associated with.
4232
 */
4233
void ccv_cnnp_model_parameter_gradients_map(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const aux_ins, const int aux_in_size, ccv_nnc_tensor_t* const* const aux_outs, const int aux_out_size, ccv_nnc_stream_context_t* const stream_context);
4234
/**
4235
 * If possible, move the parameter(s) to unified memory.
4236
 * @param model The composed model to have parameters mapped.
4237
 * @param parameters The parameters to be mapped.
4238
 * @param stream_context The stream context to be associated with.
4239
 */
4240
void ccv_cnnp_model_parameters_to_unified_memory(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context);
4241
/**
4242
 * Set a new minimizer for the model. This is useful when you need to update learn rate for stochastic
4243
 * gradient descent for example. This method can be called any time during the training process (after
4244
 * compilation).
4245
 * @param model The composed model.
4246
 * @param minimizer The wrapped command that represents a new optimization strategy.
4247
 * @param reset Reset all previous states of minimizers. This only makes sense if both parameters and parameter_size is 0.
4248
 * @param parameters The parameters to be applied the minimizer on. 0 meant for all.
4249
 * @param parameter_size The number of parameter spans.
4250
 */
4251
void ccv_cnnp_model_set_minimizer(ccv_cnnp_model_t* const model, const ccv_nnc_cmd_t minimizer, const int reset, const ccv_cnnp_model_io_t* const parameters, const int parameter_size);
4252
/**
4253
 * Retrieve the default minimizer for the model. This is set either you call model compile or
4254
 * ccv_cnnp_model_set_minimizer with no parameter spans.
4255
 * @param model The composed model.
4256
 * @return The minimizer command.
4257
 */
4258
CCV_WARN_UNUSED(ccv_nnc_cmd_t) ccv_cnnp_model_minimizer(ccv_cnnp_model_t* const model);
4259
/**
4260
 * Get the default stream from a compiled model. If the model is not compiled, the default stream is
4261
 * 0.
4262
 * @param model The composed model.
4263
 * @return The default stream for this model.
4264
 */
4265
CCV_WARN_UNUSED(ccv_nnc_stream_context_t*) ccv_cnnp_model_default_stream(const ccv_cnnp_model_t* const model);
4266
/**
4267
 * Get the allocated memory size (exclude workspace) from a compiled model. If the model is not compiled
4268
 * the size is 0.
4269
 * @param model The composed model.
4270
 * @return The number of bytes for memory allocated.
4271
 */
4272
CCV_WARN_UNUSED(uint64_t) ccv_cnnp_model_memory_size(const ccv_cnnp_model_t* const model);
4273
/**
4274
 * Free a given model.
4275
 * @param model The composed model.
4276
 */
4277
void ccv_cnnp_model_free(ccv_cnnp_model_t* const model);
4278
4279
/** @} */
4280
4281
/**
4282
 * @defgroup level_5_model_add_ons Model Add-ons
4283
 * @{
4284
 */
4285
4286
/**
4287
 * Process parameter gradients with normalization. Exactly the same as PyTorch's clip_grad_norm_
4288
 * @param model The composed model to have parameters mapped.
4289
 * @param parameters The parameters to be mapped.
4290
 * @param norm_type Currently only support 2.
4291
 * @param max_norm The max value for norm.
4292
 * @param stream_context The stream context to be associated with.
4293
 */
4294
void ccv_cnnp_model_parameters_clip_grad_norm(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int norm_type, float max_norm, ccv_nnc_stream_context_t* const stream_context);
4295
/**
4296
 * Process parameter gradients to check if any is nan.
4297
 * @param model The composed model to have parameters mapped.
4298
 * @param parameters The parameters to be mapped.
4299
 * @param stream_context The stream context to be associated with.
4300
 * @return 1 if it has any nan, 0 otherwise.
4301
 */
4302
int ccv_cnnp_model_parameter_gradients_isnan(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context);
4303
4304
enum {
4305
  CCV_CNNP_IO, /**< The parameter is a ccv_cnnp_io_t. */
4306
  CCV_CNNP_NO_TENSOR, /**< The parameter is not used. */
4307
  CCV_CNNP_TENSOR_NOT_OUTPUT, /**< This parameter indicates this is a tensor parameter, but it is not an output reflected as ccv_cnnp_io_t */
4308
  CCV_CNNP_INIT_SHARED_TENSOR, /**< The parameter is a provided tensor for initialization. */
4309
  CCV_CNNP_INIT_SHARED_TENSOR_AS_TRAINABLE, /**< The parameter is a provided tensor that can be updated. */
4310
};
4311
4312
typedef void(*ccv_cnnp_state_initializer_f)(void* const context, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const input, const ccv_nnc_tensor_symbol_t output_symbol);
4313
typedef void(*ccv_cnnp_cmd_exec_init_state_f)(const ccv_nnc_tensor_symbol_t tensor_symbol, const ccv_cnnp_state_initializer_f initializer, void* const initializer_context, void* const context);
4314
typedef void(*ccv_cnnp_cmd_exec_init_state_deinit_f)(void* const context);
4315
typedef void*(*ccv_cnnp_cmd_exec_init_state_copy_f)(void* const context);
4316
4317
typedef struct {
4318
  ccv_nnc_tensor_param_t info; /**< The tensor parameter for this one. */
4319
  void* context; /**< The context for which we initialize tensor. */
4320
  ccv_cnnp_cmd_exec_init_state_f init; /**< The function to init state for a tensor. */
4321
  ccv_cnnp_cmd_exec_init_state_copy_f copy; /**< The function to make a copy of the context. */
4322
  ccv_cnnp_cmd_exec_init_state_deinit_f deinit; /**< The function to release the context. */
4323
} ccv_cnnp_cmd_exec_io_init_state_t;
4324
4325
typedef struct {
4326
  int type; /**< The type of the parameter, could be CCV_CNNP_IO, NO_TENSOR, INIT_SHARED_TENSOR, or INIT_SHARED_TENSOR_TRAINABLE */
4327
  ccv_cnnp_cmd_exec_io_init_state_t init_state; /** The set of state to initialize the given tensor. */
4328
} ccv_cnnp_cmd_exec_io_t;
4329
/**
4330
 * A generic model based on the command. If the tensors are labeled as ccv_cnnp_io_t, it will participate
4331
 * as the input / output of the model. If it is a init tensor, the model will use this tensor for that parameter.
4332
 * More over, if it is marked as parameter, that tensor will be differentiated against when you call
4333
 * ccv_cnnp_model_fit. This model however doesn't take over ownership of the tensor. You should manage the life
4334
 * cycle of the given tensor and it is your responsibility to make sure they outlive the model. Also, all inputs and
4335
 * outputs marked as init tensors will be shared if you reuse this model in other places.
4336
 * @param cmd The command to generate this model.
4337
 * @param hint The hint to run the command.
4338
 * @param flags The flags with the command.
4339
 * @param inputs A list of ccv_cnnp_cmd_exec_io_t identify each input as either a init tensor or a ccv_cnnp_io_t.
4340
 * @param input_size The size of input list.
4341
 * @param outputs A list of types identify each output as ccv_cnnp_io_t or a none tensor.
4342
 * @param output_size The size of the outputs. There is no need to give ccv_cnnp_tensor_param_t for outputs because
4343
 *        all of them are CCV_CNNP_IO type.
4344
 * @param is_trainable Whether the parameters of this model can be trained.
4345
 * @param name The unique name of the model.
4346
 * @return A model based on the given command.
4347
 */
4348
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_cmd_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_cnnp_cmd_exec_io_t* const inputs, const int input_size, const int* const outputs, const int output_size, const int is_trainable, const char* const name);
4349
/**
4350
 * Copy a tensor as initialization for the given parameter.
4351
 * @param tensor The tensor to copy from.
4352
 * @return A init_state that can be passed to ccv_cnnp_cmd_exec_io_t
4353
 */
4354
CCV_WARN_UNUSED(ccv_cnnp_cmd_exec_io_init_state_t) ccv_cnnp_cmd_exec_io_copy(const ccv_nnc_tensor_t* const tensor);
4355
/**
4356
 * Initialize a given parameter with the command.
4357
 * @param cmd The command to call when need to initialize.
4358
 * @param hint The hint to accompany the command.
4359
 * @param flags The flags to accompany the command.
4360
 * @param params The tensor configuration.
4361
 * @return A init_state that can be passed to ccv_cnnp_cmd_exec_io_t
4362
 */
4363
CCV_WARN_UNUSED(ccv_cnnp_cmd_exec_io_init_state_t) ccv_cnnp_cmd_exec_io_set_by(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const ccv_nnc_tensor_param_t params);
4364
4365
typedef struct {
4366
  ccv_nnc_tensor_symbol_t symbol; /**< The tensor symbol this is reference to. */
4367
  int type; /**< The type of the parameter, could be CCV_CNNP_IO, INIT_SHARED_TENSOR, or INIT_SHARED_TENSOR_TRAINABLE */
4368
  ccv_cnnp_cmd_exec_io_init_state_t init_state; /** The set of state to initialize the given tensor. */
4369
} ccv_cnnp_tensor_symbol_param_t;
4370
/**
4371
 * A generic model based on the symbolic graph we provided. A list of tensor symbols are labeled whether it
4372
 * is ccv_cnnp_io_t or not (we identify whether this is a input or output based on whether it is in the graph).
4373
 * If it is not, we init it with a given tensor. If it is marked as parameter, that tensor will be differentiated
4374
 * against when you call ccv_cnnp_model_fit. The model doesn't take ownership over the init tensors. You are
4375
 * responsible to make sure the init tensors outlive the model until the initialization occurred. Also, these
4376
 * tensors will be shared if the model is reused.
4377
 * @param graph The symbolic graph that is our blue print for this model.
4378
 * @param tensor_symbol_params The list of tensor symbol parameters that labels a given symbol.
4379
 * @param tensor_symbol_param_size The size of the list.
4380
 * @param inputs The inputs to this graph. We can figure out which ones are inputs, but this gives us the order.
4381
 * @param input_size The size of the input list.
4382
 * @param outputs The outputs from this graph. We can figure out which ones are outputs, but this gives us the order.
4383
 * @param output_size The size of the output list.
4384
 * @param is_trainable Whether the parameters of this model can be trained.
4385
 * @param name The unique name of the model.
4386
 * @return A model based on the given symbolic graph.
4387
 */
4388
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_graph(const ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_tensor_symbol_param_t* const tensor_symbol_params, const int tensor_symbol_param_size, ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size, const int is_trainable, const char* const name);
4389
/**
4390
 * Sum multiple input tensors together.
4391
 * @param name The unique name of the model.
4392
 * @return A model that can be applied with multiple inputs, and generate output that is a sum of the inputs.
4393
 */
4394
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_sum(const char* const name);
4395
/**
4396
 * Concatenate input tensors together.
4397
 * @param axis Along this axis, we concatenate tensors together. Other dimensions need to be exactly the same.
4398
 * @param name The unique name of the model.
4399
 * @return A model that can be applied with multiple inputs, and generate output that is a concatenation of the inputs.
4400
 */
4401
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_concat(const int axis, const char* const name);
4402
/**
4403
 * Chunk the input tensor into n pieces.
4404
 * @param n How many pieces we chunk the tensor into.
4405
 * @param axis Along this axis, we chunk the tensor. Other dimensions need to be exactly the same.
4406
 * @param name The unique name of the model.
4407
 * @return A model that can be applied with one input, and generate outputs that are chunks of the input.
4408
 */
4409
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_chunk(const int n, const int axis, const char* const name);
4410
/**
4411
 * A convolution model.
4412
 * @param groups The number of kernel groups in the model.
4413
 * @param filters The total number of filters in the model (filters = groups * per group filters).
4414
 * @param kdim The dimensions of the kernel.
4415
 * @param dilation The dilation factor on each dimension.
4416
 * @param no_bias Whether has bias term or not.
4417
 * @param hint The hint for alignment.
4418
 * @param format The format for weights. If 0, it will have the same format as the input.
4419
 * @param is_trainable Whether the parameters of this model can be trained.
4420
 * @param name The unique name of the model.
4421
 * @return A convolution model.
4422
 */
4423
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_convolution(const int groups, const int filters, const int kdim[CCV_NNC_MAX_DIM_ALLOC], const int dilation[CCV_NNC_MAX_DIM_ALLOC], const int no_bias, ccv_nnc_hint_t hint, const int format, const int is_trainable, const char* const name);
4424
/**
4425
 * A convolution transpose model.
4426
 * @param groups The number of kernel groups in the model.
4427
 * @param filters The total number of filters in the model (filters = groups * per group filters).
4428
 * @param kdim The dimensions of the kernel.
4429
 * @param dilation The dilation factor on each dimension.
4430
 * @param output_padding The padding helps to resolve shape ambiguity when this is inverse of convolution.
4431
 * @param no_bias Whether has bias term or not.
4432
 * @param hint The hint for alignment.
4433
 * @param format The format for weights. If 0, it will have the same format as the input.
4434
 * @param is_trainable Whether the parameters of this model can be trained.
4435
 * @param name The unique name of the model.
4436
 * @return A convolution transpose model.
4437
 */
4438
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_convolution_transpose(const int groups, const int filters, const int kdim[CCV_NNC_MAX_DIM_ALLOC], const int dilation[CCV_NNC_MAX_DIM_ALLOC], const int output_padding, const int no_bias, ccv_nnc_hint_t hint, const int format, const int is_trainable, const char* const name);
4439
/**
4440
 * A dense layer model.
4441
 * @param count The output dimension.
4442
 * @param no_bias Whether has a bias term or not.
4443
 * @param flags The flags to disable / enable certain features.
4444
 * @param is_trainable Whether the parameters of this model can be trained.
4445
 * @param name The unique name of the model.
4446
 * @return A dense layer model.
4447
 */
4448
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_dense(const int count, const int no_bias, const int flags, const int is_trainable, const char* const name);
4449
/**
4450
 * A batch norm layer model.
4451
 * @param momentum The momentum in batch norm parameter.
4452
 * @param epsilon The epsilon in batch norm parameter.
4453
 * @param is_trainable Whether the parameters of this model can be trained.
4454
 * @param name The unique name of the model.
4455
 * @return A batch norm layer model.
4456
 */
4457
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_batch_norm(const float momentum, const float epsilon, const int is_trainable, const char* const name);
4458
/**
4459
 * A RELU activation layer model.
4460
 * @param name The unique name of the model.
4461
 * @return A RELU activation layer model.
4462
 */
4463
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_relu(const char* const name);
4464
/**
4465
 * A sigmoid activation layer model.
4466
 * @param name The unique name of the model.
4467
 * @return A sigmoid activation layer model.
4468
 */
4469
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_sigmoid(const char* const name);
4470
/**
4471
 * A tanh activation layer model.
4472
 * @param name The unique name of the model.
4473
 * @return A tanh activation layer model.
4474
 */
4475
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_tanh(const char* const name);
4476
/**
4477
 * A swish activation layer model.
4478
 * @param beta The beta coefficient in swish: x * sigmoid(beta * x).
4479
 * @param name The unique name of the model.
4480
 * @return A swish activation layer model.
4481
 */
4482
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_swish(const float beta, const char* const name);
4483
/**
4484
 * A GELU activation layer model.
4485
 * @param tanh Whether enable fast approximate GELU.
4486
 * @param name The unique name of the model.
4487
 * @return A GELU activation layer model.
4488
 */
4489
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_gelu(const int tanh, const char* const name);
4490
/**
4491
 * A leaky ReLU activation layer model.
4492
 * @param negative_slope The coefficient to be applied when it is negative.
4493
 * @param name The unique name of the model.
4494
 * @return A leaky ReLU activation layer model.
4495
 */
4496
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_leaky_relu(const float negative_slope, const char* const name);
4497
/**
4498
 * A softmax activation layer model.
4499
 * @param name The unique name of the model.
4500
 * @return A softmax activation layer model.
4501
 */
4502
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_softmax(const char* const name);
4503
/**
4504
 * A max pool model.
4505
 * @param kdim The pooling window dimension.
4506
 * @param hint The hint for alignment.
4507
 * @param name The unique name of the model.
4508
 * @return A max pool model.
4509
 */
4510
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_max_pool(const int kdim[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_hint_t hint, const char* const name);
4511
/**
4512
 * An average pool model.
4513
 * @param kdim The pooling window dimension.
4514
 * @param hint The hint for alignment.
4515
 * @param name The unique name of the model.
4516
 * @return An average pool model.
4517
 */
4518
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_average_pool(const int kdim[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_hint_t hint, const char* const name);
4519
/**
4520
 * Reshape an input into a different dimension.
4521
 * @param format Change the layout format for a given input, 0 is not to change.
4522
 * @param dim The new dimension for the input.
4523
 * @param ofs The offset on each of the dimension.
4524
 * @param stride The line size of each dimension.
4525
 * @param name The unique name of the model.
4526
 * @return A reshape layer model.
4527
 */
4528
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_reshape(const int format, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC], const char* const name);
4529
/**
4530
 * Pad the input with extra dimensions at beginning or the ends. Padding should be > 0.
4531
 * @param type Two types of padding supported: zero and replication.
4532
 * @param begin How many elements to add at the beginning of each dimension.
4533
 * @param end How many elements to add at the end of each dimension.
4534
 * @param name The unique name of the model.
4535
 * @return A pad layer model.
4536
 */
4537
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_pad(const int type, const int begin[CCV_NNC_MAX_DIM_ALLOC], const int end[CCV_NNC_MAX_DIM_ALLOC], const char* const name);
4538
/**
4539
 * Identity op that simply copy from input to output without using any data transfer / format conversion methods.
4540
 * @param name The unique name of the model.
4541
 * @return An identity layer model.
4542
 */
4543
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_identity(const char* const name);
4544
/**
4545
 * Permute the input. For example, [2, 0, 1] means moving dimension 2 to 0, dimension 0 to 1, dimension 1 to 2.
4546
 * @param index The index for each dimensions from.
4547
 * @param name The unique name of the model.
4548
 * @return A permute layer model.
4549
 */
4550
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_permute(const int index[CCV_NNC_MAX_DIM_ALLOC], const char* const name);
4551
/**
4552
 * Extract one of the multi-outputs. This is useful because ccv_cnnp_model_io_t can contain multiple outputs, this
4553
 * helps to extract one of them out to be used later.
4554
 * @param index The index to the output you want to extract.
4555
 * @param name The unique name of the model.
4556
 * @return A model that can extract one output.
4557
 */
4558
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_extract(const int index, const char* const name);
4559
/**
4560
 * Flatten an input tensor into a one dimensional array.
4561
 * @param name The unique name of the model.
4562
 * @return A flatten layer model.
4563
 */
4564
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_flatten(const char* const name);
4565
/**
4566
 * A layer norm model.
4567
 * @param epsilon The epsilon in layer norm parameter.
4568
 * @param axis The axis are the feature axis to compute norm.
4569
 * @param axis_count How many axis we count as feature.
4570
 * @param elementwise_affine Whether it contains scale / bias.
4571
 * @param is_trainable Whether the parameters of this model can be trained.
4572
 * @param name The unique name of the model.
4573
 * @return A layer norm model.
4574
 */
4575
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_layer_norm(const float epsilon, const int axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int elementwise_affine, const int is_trainable, const char* const name);
4576
/**
4577
 * A group norm model.
4578
 * @param group_axis The axis are the feature axis to compute norm.
4579
 * @param groups How many groups per axis channel.
4580
 * @param epsilon The epsilon in layer norm parameter.
4581
 * @param reduce_axis The other axes to be reduced.
4582
 * @param axis_count The number of other axes to be reduced.
4583
 * @param elementwise_affine Whether it contains scale / bias.
4584
 * @param is_trainable Whether the parameters of this model can be trained.
4585
 * @param name The unique name of the model.
4586
 * @return A group norm model.
4587
 */
4588
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_group_norm(const int group_axis, const int groups, const float epsilon, const int reduce_axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int elementwise_affine, const int is_trainable, const char* const name);
4589
/**
4590
 * A rmsnorm model.
4591
 * @param epsilon The epsilon in layer norm parameter.
4592
 * @param axis The axis are the feature axis to compute norm.
4593
 * @param axis_count How many axis we count as feature.
4594
 * @param elementwise_affine Whether it contains scale.
4595
 * @param is_trainable Whether the parameters of this model can be trained.
4596
 * @param name The unique name of the model.
4597
 * @return A rmsnorm model.
4598
 */
4599
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_rmsnorm(const float epsilon, const int axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int elementwise_affine, const int is_trainable, const char* const name);
4600
/**
4601
 * Add two input tensors together. Different from sum because this support broadcasting.
4602
 * @param p The weight for the first input.
4603
 * @param q The weight for the second input.
4604
 * @param name The unique name of the model.
4605
 * @return A model that can be applied with two inputs, and generate output that is a product of the inputs.
4606
 */
4607
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_add(const float p, const float q, const char* const name);
4608
/**
4609
 * Multiply two input tensors together.
4610
 * @param p The weight for the output.
4611
 * @param name The unique name of the model.
4612
 * @return A model that can be applied with two inputs, and generate output that is a product of the inputs.
4613
 */
4614
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_mul(const float p, const char* const name);
4615
/**
4616
 * A scalar multiplication model. Y = aX where a is a scalar.
4617
 * @param a The scalar parameter.
4618
 * @param name The unique name of the model.
4619
 * @return A scalar multiplication model.
4620
 */
4621
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_scalar_mul(const float a, const char* const name);
4622
/**
4623
 * Divide two input tensors together.
4624
 * @param reciprocal Only take one tensor input, effectively compute 1 / input.
4625
 * @param name The unique name of the model.
4626
 * @return A model that can be applied with two inputs, and generate output that is a division of the inputs.
4627
 */
4628
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_div(const int reciprocal, const char* const name);
4629
/**
4630
 * Square root of the input tensor.
4631
 * @param name The unique name of the model.
4632
 * @return A model that can be applied with one input, and generate output that is the square root of the input.
4633
 */
4634
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_sqrt(const char* const name);
4635
/**
4636
 * Natural logarithm of the input tensor.
4637
 * @param name The unique name of the model.
4638
 * @return A model that can be applied with one input, and generate output that is the natural logarithm of the input.
4639
 */
4640
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_log(const char* const name);
4641
/**
4642
 * Raise the input tensor to a constant exponent element-wise.
4643
 * @param exponent The exponent in y = x ^ exponent.
4644
 * @param name The unique name of the model.
4645
 * @return A model that can be applied with one input, and generate output that is the element-wise power.
4646
 */
4647
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_pow(const float exponent, const char* const name);
4648
/**
4649
 * Sine of the input tensor.
4650
 * @param name The unique name of the model.
4651
 * @return A model that can be applied with one input, and generate output that is the sine of the input.
4652
 */
4653
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_sin(const char* const name);
4654
/**
4655
 * Cosine of the input tensor.
4656
 * @param name The unique name of the model.
4657
 * @return A model that can be applied with one input, and generate output that is the cosine of the input.
4658
 */
4659
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_cos(const char* const name);
4660
/**
4661
 * Multiply two input tensors together as if these are complex numbers.
4662
 * @param name The unique name of the model.
4663
 * @return A model that can be applied with two inputs, and generate output that is a product of the inputs.
4664
 */
4665
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_cmul(const char* const name);
4666
/**
4667
 * A matrix transpose model.
4668
 * @param axis_a The axis to be exchanged with axis_b
4669
 * @param axis_b The axis to be exchanged with axis_a
4670
 * @param name The unique name of the model.
4671
 * @return A matrix transpose model.
4672
 */
4673
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_transpose(const int axis_a, const int axis_b, const char* const name);
4674
/**
4675
 * A batched matrix multiplication model.
4676
 * @param transpose_a The axis to be transposed in the first matrix.
4677
 * @param transpose_b The axis to be transposed in the second matrix.
4678
 * @param flags The flags to disable / enable certain features.
4679
 * @param name The unique name of the model.
4680
 * @return A batched matrix multiplication model.
4681
 */
4682
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_matmul(const int transpose_a[2], const int transpose_b[2], const int flags, const char* const name);
4683
/**
4684
 * A dropout model.
4685
 * @param p The probability to drop the current value.
4686
 * @param entirety Drop the whole layer with the given probability.
4687
 * @param name The unique name of the model.
4688
 * @return A dropout model.
4689
 */
4690
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_dropout(const float p, const int entirety, const char* const name);
4691
/**
4692
 * A masked fill model.
4693
 * @param eq If a value in the given mask tensor is equal to this.
4694
 * @param fill Fill in this value to the output tensor.
4695
 * @param name The unique name of the model.
4696
 * @return A masked fill model.
4697
 */
4698
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_masked_fill(const float eq, const float fill, const char* const name);
4699
/**
4700
 * A index select model.
4701
 * @param name The unique name of the model.
4702
 * @return A index select model.
4703
 */
4704
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_index_select(const char* const name);
4705
/**
4706
 * An dictionary embedding model. This can be thought as index select model but the vocabulary
4707
 * tensor is within this model itself.
4708
 * @param datatype The data type of the vocabulary.
4709
 * @param vocab_size The size of the vocabulary.
4710
 * @param embed_size The size of the embedding.
4711
 * @param is_trainable Whether the parameters of this model can be trained.
4712
 * @param name The unique name of the model.
4713
 * @return A index select model.
4714
 */
4715
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_embedding(const int datatype, const int vocab_size, const int embed_size, const int is_trainable, const char* const name);
4716
/**
4717
 * A upsample model.
4718
 * @param type The type of upsample, whether nearest or bilinear.
4719
 * @param width_scale The scale of the width of the input.
4720
 * @param height_scale The scale of the height of the input.
4721
 * @param align_corners Whether to align corners when doing upsample.
4722
 * @param name The unique name of the model.
4723
 * @return A upsample model.
4724
 */
4725
ccv_cnnp_model_t* ccv_cnnp_upsample(const int type, const float width_scale, const float height_scale, const int align_corners, const char* const name);
4726
/**
4727
 * A sum value reducer model.
4728
 * @param axis The axis to be reduced.
4729
 * @param axis_count The size of the axis array.
4730
 * @param name The unique name of the model.
4731
 * @return A sum value reducer model.
4732
 */
4733
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_reduce_sum(const int* const axis, const int axis_count, const char* const name);
4734
/**
4735
 * A mean value reducer model.
4736
 * @param axis The axis to be reduced.
4737
 * @param axis_count The size of the axis array.
4738
 * @param name The unique name of the model.
4739
 * @return A sum value reducer model.
4740
 */
4741
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_reduce_mean(const int* const axis, const int axis_count, const char* const name);
4742
/**
4743
 * A max value reducer model.
4744
 * @param axis The axis to be reduced.
4745
 * @param axis_count The size of the axis array.
4746
 * @param name The unique name of the model.
4747
 * @return A max value reducer model.
4748
 */
4749
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_reduce_max(const int* const axis, const int axis_count, const char* const name);
4750
/**
4751
 * A min value reducer model.
4752
 * @param axis The axis to be reduced.
4753
 * @param axis_count The size of the axis array.
4754
 * @param name The unique name of the model.
4755
 * @return A min value reducer model.
4756
 */
4757
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_reduce_min(const int* const axis, const int axis_count, const char* const name);
4758
/**
4759
 * A norm2 value reducer model.
4760
 * @param axis The axis to be reduced.
4761
 * @param axis_count The size of the axis array.
4762
 * @param name The unique name of the model.
4763
 * @return A norm2 value reducer model.
4764
 */
4765
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_reduce_norm2(const int* const axis, const int axis_count, const char* const name);
4766
/**
4767
 * A argmax model.
4768
 * @param axis The axis to be reduced.
4769
 * @param name The unique name of the model.
4770
 * @return A max indices model.
4771
 */
4772
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_argmax(const int axis, const char* const name);
4773
/**
4774
 * A argmin model.
4775
 * @param axis The axis to be reduced.
4776
 * @param name The unique name of the model.
4777
 * @return A min indices model.
4778
 */
4779
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_argmin(const int axis, const char* const name);
4780
/**
4781
 * A element-wise min model.
4782
 * @param name The unique name of the model.
4783
 * @return A element-wise min model.
4784
 */
4785
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_min(const char* const name);
4786
/**
4787
 * A element-wise max model.
4788
 * @param name The unique name of the model.
4789
 * @return A element-wise max model.
4790
 */
4791
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_max(const char* const name);
4792
/**
4793
 * A Long-Short Term Memory model.
4794
 * @param masked Whether a mask tensor provided.
4795
 * @param hidden_size The number of features in the hidden state h.
4796
 * @param proj_size The number of features in the hidden state h.
4797
 * @param num_layers The number of layers for RNN.
4798
 * @param bias If 0, the layer won't use bias weights.
4799
 * @param batch_first If 1, will batch before sequence.
4800
 * @param bidirectional Enable bidirectional mode of RNN.
4801
 * @param dropout If non-zero, enable dropout at each layer of RNN.
4802
 * @param is_trainable Whether the parameters of this model can be trained.
4803
 * @param name The unique name of the model.
4804
 * @return A LSTM model.
4805
 */
4806
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_lstm(const int masked, const int hidden_size, const int proj_size, const int num_layers, const int bias, const int batch_first, const int bidirectional, const float dropout, const int is_trainable, const char* const name);
4807
/**
4808
 * Perform datatype conversion for input tensors.
4809
 * @param datatype The desired datatype.
4810
 * @param ref_to_last If there are two inputs to the model, use the last one as a datatype reference.
4811
 * @param name The unique name of the model.
4812
 * @return A model that does data conversion.
4813
 */
4814
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_datatype_conversion(const int datatype, const int ref_to_last, const char* const name);
4815
/**
4816
 * Clamp input tensor to a range.
4817
 * @param min NAN will ignore this.
4818
 * @param max NAN will ignore this.
4819
 * @param name The unique name of the model.
4820
 * @return A model that does clamp.
4821
 */
4822
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_clamp(const float min, const float max, const char* const name);
4823
/**
4824
 * A parameter that can be initialized / loaded.
4825
 * @param params The tensor shape / information about this parameter.
4826
 * @param init_bound The bound for the initial values, in uniform distribution.
4827
 * @param name The unique name of the model.
4828
 * @param is_trainable Whether the parameters of this model can be trained.
4829
 * @return A model that can be applied and return the weight.
4830
 */
4831
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_parameter(const ccv_nnc_tensor_param_t params, const float init_bound, const int is_trainable, const char* const name);
4832
/**
4833
 * A scalar value that can be used.
4834
 * @param type The type of this scalar.
4835
 * @param format The format of this scalar.
4836
 * @param datatype The datatype of this scalar.
4837
 * @param value The value in float.
4838
 * @param name The unique name of the model.
4839
 * @return A model that can be applied and return the scalar.
4840
 */
4841
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_scalar(const int type, const int format, const int datatype, const float value, const char* const name);
4842
/**
4843
 * An empty variable that can be used. This is usually paired to ccv_cnnp_move to make this "input"
4844
 * as destination. This is also different from ccv_cnnp_parameter because that will be persisted.
4845
 * @param params The parameters for the tensor.
4846
 * @param name The unique name of the model.
4847
 * @return A model that can be applied and return the variable.
4848
 */
4849
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_variable(const ccv_nnc_tensor_param_t params, const char* const name);
4850
/**
4851
 * A special model that takes two inputs but copies value in the first input to the second. The
4852
 * second input then returned as the output. This is special because it enables you to violate
4853
 * single-static assignment rule otherwise without using this method, it won't trigger. However,
4854
 * it does have a special place because it enables hand-written optimizations that otherwise require
4855
 * you to either implement a new optimization pass in nnc (difficult to do it correctly) or it is
4856
 * not possible to do with CNNP models and you have to go to Level-3 API, which may not be exposed
4857
 * on high-level language bindings such as s4nnc.
4858
 * @param name The unique name of the model.
4859
 * @return A model that can be applied and copies first input to the second.
4860
 */
4861
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_move(const char* const name);
4862
/**
4863
 * If the input is not contiguous, this model will make it contiguous. Normally, such graph operation
4864
 * will be optimized away when calling ccv_nnc_symbolic_graph_simplify. In this case, we will disable
4865
 * such optimization on the generated node. If the input is not contiguous, the output of this model
4866
 * is the same as the input, hence, skipped.
4867
 * @param name The unique name of the model.
4868
 * @return A model that can be applied and making the input contiguous.
4869
 */
4870
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_contiguous(const char* const name);
4871
/**
4872
 * If the input is a reshape, this model will make it a copy. Normally, such graph operation
4873
 * will be optimized away when calling ccv_nnc_symbolic_graph_simplify. In this case, we will disable
4874
 * such optimization on the generated node. This is useful mainly for memory conservation. In case you
4875
 * are working with a reshape of part of the tensor, make a explicit copy would make sure the original
4876
 * tensor is not retained therefore you can now give the compiler more optimization opportunities on
4877
 * memory conservation.
4878
 * @param name The unique name of the model.
4879
 * @return A model that can be applied and making a copy of the input.
4880
 */
4881
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_copy(const char* const name);
4882
/**
4883
 * Apply the scaled dot product attention to input. Accepting input in the form of (q, k, v)
4884
 * or (q, k, v, attn_mask) if has_attn_mask is 1.
4885
 * @param scale The scale to be applied to the qk dot product.
4886
 * @param is_causal Whether to apply is_causal mask to it. If both attn_mask and is_causal supplied, we will cut attn_mask to upper right triangle.
4887
 * @param has_attn_mask Whether the input would accept a 4th parameter the attention mask.
4888
 * @param flags Which precision is preferred for the attention computation be run at (FP16 or FP32).
4889
 * @param fused_unify_head_weights Whether we also have unifying head weight fused into it. The output would be in shape of (N, S, H * Ev).
4890
 * @param no_bias Whether we have bias or not for the unifying head output.
4891
 * @param is_trainable Whether or not it is trainable (if weight / bias provided).
4892
 * @param name The unique name of the model.
4893
 * @return A model that can apply scaled dot product attention compute.
4894
 */
4895
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_scaled_dot_product_attention(const float scale, const int is_causal, const int has_attn_mask, const int flags, const int fused_unify_head_weights, const int no_bias, const int is_trainable, const char* const name);
4896
/**
4897
 * The function prototype to call during the model execution at this position.
4898
 */
4899
typedef void (*ccv_cnnp_model_debug_f)(ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_stream_context_t* const stream_context, void* const context);
4900
/**
4901
 * The function prototype to destruct the context.
4902
 */
4903
typedef void (*ccv_cnnp_model_debug_context_deinit_f)(void* const context);
4904
/**
4905
 * The function prototype to copy the context.
4906
 */
4907
typedef void* (*ccv_cnnp_model_debug_context_copy_f)(void* const context);
4908
/**
4909
 * A special model that takes n inputs and output the first values. This is a special model because it
4910
 * generates a graph violates single-static assignment rule by having the outputs the same symbol
4911
 * as the input. It also inserts a custom op allows you to intercept the model execution and possibly
4912
 * output useful information from it (i.e. debug print tensors, generate stats like max / min / nan
4913
 * etc.). This is safe to insert anywhere because it doesn't impact the graph execution process but
4914
 * you are also advised to not use this method to modify the tensors during the execution. There will
4915
 * be another method for you to insert custom op in the model.
4916
 * @param func The func to call during the model execution.
4917
 * @param context The context object to be passed along the callback.
4918
 * @param deinit The deinit method to be used to free up the context.
4919
 * @param copy The copy method to make a duplicate of the context.
4920
 * @param name The unique name of the model.
4921
 * @return A model that can be applied and copies first input to the second.
4922
 */
4923
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_debug(ccv_cnnp_model_debug_f func, void* const context, ccv_cnnp_model_debug_context_deinit_f deinit, ccv_cnnp_model_debug_context_copy_f copy, const char* const name);
4924
/**
4925
 * A sort model. The result are two tensors: values and indices.
4926
 * @param along_axis Sort along which axis.
4927
 * @param descending Whether sort by descending order.
4928
 * @param name The unique name of the model.
4929
 * @return A sort model.
4930
 */
4931
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_sort(const int along_axis, const int descending, const char* const name);
4932
/**
4933
 * A partition model. The result are two tensors: values and indices.
4934
 * @param kth Took kth elements.
4935
 * @param along_axis Partition along which axis.
4936
 * @param descending Whether partition by descending order.
4937
 * @param name The unique name of the model.
4938
 * @return A partition model.
4939
 */
4940
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_partition(const int kth, const int along_axis, const int descending, const char* const name);
4941
/**
4942
 * A unique consecutive model. Otherwise known as run-length encode.
4943
 * @param bincount How many unique consecutive elements there are, 0 to match the original.
4944
 * @param name The unique name of the model.
4945
 * @return A unique consecutive model.
4946
 */
4947
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_unique_consecutive(const int bincount, const char* const name);
4948
/**
4949
 * A scatter add model.
4950
 * @param name The unique name of the model.
4951
 * @param bincount How many original elements will be, it needs to be non-zero.
4952
 * @return A scatter add model.
4953
 */
4954
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_scatter_add(const int bincount, const char* const name);
4955
/**
4956
 * A segmented dense layer model. Note that the input would be activation, indices and count.
4957
 * @param segments / experts How many segments in this layer.
4958
 * @param count The output dimension.
4959
 * @param no_bias Whether has a bias term or not.
4960
 * @param flags The flags to disable / enable certain features.
4961
 * @param is_trainable Whether the parameters of this model can be trained.
4962
 * @param name The unique name of the model.
4963
 * @return A segmented dense layer model.
4964
 */
4965
CCV_WARN_UNUSED(ccv_cnnp_model_t*) ccv_cnnp_segmented_dense(const int segments, const int count, const int no_bias, const int flags, const int is_trainable, const char* const name);
4966
4967
/** @} */
4968
4969
/** @} */
4970
4971
#endif