Coverage Report

Created: 2026-04-16 18:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_cnnp_model_addons.c
Line
Count
Source
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_internal.h"
5
#include "_ccv_cnnp_model.h"
6
7
// MARK - Add-on Functions
8
9
static int _ccv_cnnp_model_clip_grad_norm_reduce_norm2(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
10
2
{
11
2
  const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[0]->info.type);
12
2
  ccv_nnc_tensor_t* const old_norm2 = outputs[1 + device_id * 2];
13
2
  ccv_nnc_tensor_t* const norm2 = outputs[1 + device_id * 2 + 1];
14
2
  const int tensor_count = ccv_nnc_tensor_count(inputs[0]->info);
15
2
  if (tensor_count == 1)
16
2
    ccv_nnc_cmd_exec(CMD_MUL_FORWARD(1), hint, flags, TENSOR_LIST(inputs[0], inputs[0]), TENSOR_LIST(norm2), stream_context);
17
0
  else {
18
0
    ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(), hint, flags, TENSOR_LIST(inputs[0]), TENSOR_LIST(norm2), stream_context);
19
0
    ccv_nnc_cmd_exec(CMD_MUL_FORWARD(1), hint, flags, TENSOR_LIST(norm2, norm2), TENSOR_LIST(norm2), stream_context);
20
0
  }
21
2
  ccv_nnc_cmd_exec(CMD_ADD_FORWARD(1, 1), hint, flags, TENSOR_LIST(old_norm2, norm2), TENSOR_LIST(old_norm2), stream_context);
22
2
  return CCV_NNC_EXEC_SUCCESS;
23
2
}
24
25
static ccv_nnc_cmd_vtab_t clip_grad_norm_reduce_norm2_vtab = {
26
  .exec = _ccv_cnnp_model_clip_grad_norm_reduce_norm2
27
};
28
29
static int _ccv_cnnp_model_clip_grad_norm_scatter_norm2(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
30
2
{
31
2
  const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[0]->info.type);
32
2
  ccv_nnc_tensor_t* const norm2 = inputs[1 + device_id * 2];
33
2
  ccv_nnc_cmd_exec(CMD_MUL_FORWARD(1), hint, flags, TENSOR_LIST(inputs[0], norm2), TENSOR_LIST(outputs[0]), stream_context);
34
2
  return CCV_NNC_EXEC_SUCCESS;
35
2
}
36
37
static ccv_nnc_cmd_vtab_t clip_grad_norm_scatter_norm2_vtab = {
38
  .exec = _ccv_cnnp_model_clip_grad_norm_scatter_norm2
39
};
40
41
void ccv_cnnp_model_parameters_clip_grad_norm(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int norm_type, float max_norm, ccv_nnc_stream_context_t* const stream_context)
42
2
{
43
2
  assert(norm_type == 2);
44
2
  ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
45
2
  assert(compiled_data);
46
2
  const int parallel_count = ccv_max(model->parallel_count, 1);
47
2
  ccv_nnc_tensor_t* norm2[parallel_count * 2];
48
2
  ccv_nnc_tensor_t* max_normt[parallel_count];
49
2
  const int stream_type = model->compiled_data->stream_type;
50
2
  int i;
51
2
  if (stream_type == CCV_STREAM_CONTEXT_GPU)
52
0
  {
53
0
    for (i = 0; i < parallel_count; i++)
54
0
    {
55
0
      ccv_nnc_tensor_param_t info = {
56
0
        .type = CCV_TENSOR_GPU_MEMORY,
57
0
        .format = CCV_TENSOR_FORMAT_NHWC,
58
0
        .datatype = CCV_32F,
59
0
        .dim = {1},
60
0
      };
61
0
      CCV_TENSOR_SET_DEVICE_ID(info.type, i);
62
0
      norm2[i * 2] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
63
0
      norm2[i * 2 + 1] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
64
0
      max_normt[i] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
65
0
    }
66
2
  } else {
67
4
    for (i = 0; i < parallel_count; 
i++2
)
68
2
    {
69
2
      ccv_nnc_tensor_param_t info = {
70
2
        .type = CCV_TENSOR_CPU_MEMORY,
71
2
        .format = CCV_TENSOR_FORMAT_NHWC,
72
2
        .datatype = CCV_32F,
73
2
        .dim = {1},
74
2
      };
75
2
      norm2[i * 2] = ccv_nnc_tensor_new(0, info, 0);
76
2
      norm2[i * 2 + 1] = ccv_nnc_tensor_new(0, info, 0);
77
2
      max_normt[i] = ccv_nnc_tensor_new(0, info, 0);
78
2
    }
79
2
  }
80
  // zero out old norm2.
81
2
  if (parallel_count > 1)
82
0
  {
83
0
    ccv_nnc_stream_context_t* streams[parallel_count];
84
0
    ccv_nnc_stream_signal_t* signal;
85
0
    if (stream_context)
86
0
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
87
0
    for (i = 0; i < parallel_count; i++)
88
0
    {
89
0
      const int stream_type = CCV_TENSOR_GET_MEMORY(norm2[i * 2]->info.type) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
90
0
      const int device_id = CCV_TENSOR_GET_DEVICE_ID(norm2[i * 2]->info.type);
91
0
      int type = stream_type;
92
0
      CCV_STREAM_SET_DEVICE_ID(type, device_id);
93
0
      ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(compiled_data, type);
94
      // Wait signal to finish.
95
0
      if (stream_context)
96
0
        ccv_nnc_stream_context_wait_signal(stream_0, signal);
97
0
      ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(norm2[i * 2]), stream_0);
98
0
      if (stream_context)
99
0
      {
100
0
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
101
0
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
102
0
      }
103
0
      streams[i] = stream_0;
104
0
    }
105
    // If this should be blocking, blocking it.
106
0
    if (!stream_context)
107
0
      for (i = 0; i < parallel_count; i++)
108
0
        if (streams[i])
109
0
          ccv_nnc_stream_context_wait(streams[i]);
110
2
  } else {
111
2
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(norm2[0]), stream_context);
112
2
  }
113
  // Gather norm2.
114
2
  ccv_nnc_cmd_t reduce_cmd = {
115
2
    .cmd = CCV_NNC_CUSTOM_FORWARD,
116
2
    .isa = &clip_grad_norm_reduce_norm2_vtab,
117
2
  };
118
2
  ccv_cnnp_model_parameter_gradients_map(model, parameters, reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, norm2, parallel_count * 2, stream_context);
119
  // Now compute max(max_norm / norm2, 1.0).
120
2
  if (parallel_count > 1)
121
0
  {
122
0
    ccv_nnc_stream_context_t* streams[parallel_count];
123
0
    ccv_nnc_stream_signal_t* signal;
124
0
    if (stream_context)
125
0
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
126
0
    for (i = 0; i < parallel_count; i++)
127
0
    {
128
0
      const int stream_type = CCV_TENSOR_GET_MEMORY(norm2[i * 2]->info.type) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
129
0
      const int device_id = CCV_TENSOR_GET_DEVICE_ID(norm2[i * 2]->info.type);
130
0
      int type = stream_type;
131
0
      CCV_STREAM_SET_DEVICE_ID(type, device_id);
132
0
      ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(compiled_data, type);
133
      // Wait signal to finish.
134
0
      if (stream_context)
135
0
        ccv_nnc_stream_context_wait_signal(stream_0, signal);
136
0
      ccv_nnc_cmd_exec(CMD_EWSQRT_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[i * 2]), TENSOR_LIST(norm2[i * 2]), stream_0);
137
0
      ccv_nnc_cmd_exec(CMD_SET_FORWARD(max_norm), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(max_normt[i]), stream_0);
138
0
      ccv_nnc_cmd_exec(CMD_EWDIV_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(max_normt[i], norm2[i * 2]), TENSOR_LIST(norm2[i * 2]), stream_0);
139
0
      ccv_nnc_cmd_exec(CMD_CLAMP_FORWARD(NAN, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[i * 2]), TENSOR_LIST(norm2[i * 2]), stream_0);
140
0
      if (stream_context)
141
0
      {
142
0
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
143
0
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
144
0
      }
145
0
      streams[i] = stream_0;
146
0
    }
147
    // If this should be blocking, blocking it.
148
0
    if (!stream_context)
149
0
      for (i = 0; i < parallel_count; i++)
150
0
        if (streams[i])
151
0
          ccv_nnc_stream_context_wait(streams[i]);
152
2
  } else {
153
2
    ccv_nnc_cmd_exec(CMD_EWSQRT_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[0]), TENSOR_LIST(norm2[0]), stream_context);
154
2
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(max_norm), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(max_normt[0]), stream_context);
155
2
    ccv_nnc_cmd_exec(CMD_EWDIV_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(max_normt[0], norm2[0]), TENSOR_LIST(norm2[0]), stream_context);
156
2
    ccv_nnc_cmd_exec(CMD_CLAMP_FORWARD(NAN, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[0]), TENSOR_LIST(norm2[0]), stream_context);
157
2
  }
158
2
  ccv_nnc_cmd_t scatter_cmd = {
159
2
    .cmd = CCV_NNC_CUSTOM_FORWARD,
160
2
    .isa = &clip_grad_norm_scatter_norm2_vtab,
161
2
  };
162
2
  ccv_cnnp_model_parameter_gradients_map(model, parameters, scatter_cmd, ccv_nnc_no_hint, 0, norm2, parallel_count * 2, 0, 0, stream_context);
163
2
  if (stream_type == CCV_STREAM_CONTEXT_GPU)
164
0
    for (i = 0; i < parallel_count; i++)
165
0
    {
166
0
      ccv_nnc_xpu_free(&compiled_data->xpu_alloc, norm2[i * 2]->data.u8);
167
0
      ccv_nnc_xpu_free(&compiled_data->xpu_alloc, norm2[i * 2 + 1]->data.u8);
168
0
      ccv_nnc_xpu_free(&compiled_data->xpu_alloc, max_normt[i]->data.u8);
169
0
    }
170
4
  for (i = 0; i < parallel_count; 
i++2
)
171
2
  {
172
2
    ccv_nnc_tensor_free(norm2[i * 2]);
173
2
    ccv_nnc_tensor_free(norm2[i * 2 + 1]);
174
2
    ccv_nnc_tensor_free(max_normt[i]);
175
2
  }
176
2
}
177
178
// MARK - Add-on Functions
179
180
static int _ccv_cnnp_model_isnan(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
181
0
{
182
0
  const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[0]->info.type);
183
0
  ccv_nnc_tensor_t* const old_isnanr = outputs[1 + device_id * 2];
184
0
  ccv_nnc_tensor_t* const isnanr = outputs[1 + device_id * 2 + 1];
185
0
  ccv_nnc_cmd_t reduce_cmd = CMD_REDUCE_ISNAN_FORWARD();
186
0
  reduce_cmd.info.reduce.count = ccv_nnc_tensor_nd(inputs[0]->info.dim);
187
0
  int i;
188
0
  for (i = 0; i < cmd.info.reduce.count; i++)
189
0
    reduce_cmd.info.reduce.axis[i] = i;
190
0
  ccv_nnc_cmd_exec(reduce_cmd, hint, flags, TENSOR_LIST(inputs[0]), TENSOR_LIST(isnanr), stream_context);
191
0
  ccv_nnc_cmd_exec(CMD_EWSUM_FORWARD(), hint, flags, TENSOR_LIST(old_isnanr, isnanr), TENSOR_LIST(old_isnanr), stream_context);
192
0
  return CCV_NNC_EXEC_SUCCESS;
193
0
}
194
195
static ccv_nnc_cmd_vtab_t reduce_isnan_vtab = {
196
  .exec = _ccv_cnnp_model_isnan
197
};
198
199
int ccv_cnnp_model_parameter_gradients_isnan(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context)
200
0
{
201
0
  ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
202
0
  assert(compiled_data);
203
0
  const int parallel_count = ccv_max(model->parallel_count, 1);
204
0
  ccv_nnc_tensor_t* isnanr[parallel_count * 2];
205
0
  const int stream_type = model->compiled_data->stream_type;
206
0
  int i;
207
0
  if (stream_type == CCV_STREAM_CONTEXT_GPU)
208
0
  {
209
0
    for (i = 0; i < parallel_count; i++)
210
0
    {
211
0
      ccv_nnc_tensor_param_t info = {
212
0
        .type = CCV_TENSOR_GPU_MEMORY,
213
0
        .format = CCV_TENSOR_FORMAT_NHWC,
214
0
        .datatype = CCV_32S,
215
0
        .dim = {1},
216
0
      };
217
0
      CCV_TENSOR_SET_DEVICE_ID(info.type, i);
218
0
      isnanr[i * 2] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
219
0
      isnanr[i * 2 + 1] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
220
0
    }
221
0
  } else {
222
0
    for (i = 0; i < parallel_count; i++)
223
0
    {
224
0
      ccv_nnc_tensor_param_t info = {
225
0
        .type = CCV_TENSOR_CPU_MEMORY,
226
0
        .format = CCV_TENSOR_FORMAT_NHWC,
227
0
        .datatype = CCV_32S,
228
0
        .dim = {1},
229
0
      };
230
0
      isnanr[i * 2] = ccv_nnc_tensor_new(0, info, 0);
231
0
      isnanr[i * 2 + 1] = ccv_nnc_tensor_new(0, info, 0);
232
0
    }
233
0
  }
234
  // zero out old isnanr.
235
0
  if (parallel_count > 1)
236
0
  {
237
0
    ccv_nnc_stream_context_t* streams[parallel_count];
238
0
    ccv_nnc_stream_signal_t* signal;
239
0
    if (stream_context)
240
0
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
241
0
    for (i = 0; i < parallel_count; i++)
242
0
    {
243
0
      const int stream_type = CCV_TENSOR_GET_MEMORY(isnanr[i * 2]->info.type) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
244
0
      const int device_id = CCV_TENSOR_GET_DEVICE_ID(isnanr[i * 2]->info.type);
245
0
      int type = stream_type;
246
0
      CCV_STREAM_SET_DEVICE_ID(type, device_id);
247
0
      ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(compiled_data, type);
248
      // Wait signal to finish.
249
0
      if (stream_context)
250
0
        ccv_nnc_stream_context_wait_signal(stream_0, signal);
251
0
      ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(isnanr[i * 2]), stream_0);
252
0
      if (stream_context)
253
0
      {
254
0
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
255
0
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
256
0
      }
257
0
      streams[i] = stream_0;
258
0
    }
259
    // If this should be blocking, blocking it.
260
0
    if (!stream_context)
261
0
      for (i = 0; i < parallel_count; i++)
262
0
        if (streams[i])
263
0
          ccv_nnc_stream_context_wait(streams[i]);
264
0
  } else
265
0
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(isnanr[0]), stream_context);
266
  // Gather isnanr.
267
0
  ccv_nnc_cmd_t reduce_cmd = {
268
0
    .cmd = CCV_NNC_CUSTOM_FORWARD,
269
0
    .isa = &reduce_isnan_vtab,
270
0
  };
271
0
  ccv_cnnp_model_parameter_gradients_map(model, parameters, reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, isnanr, parallel_count * 2, stream_context);
272
0
  for (i = 0; i < parallel_count; i++)
273
0
    ccv_nnc_tensor_free(isnanr[i * 2 + 1]);
274
0
  int retval = 0;
275
0
  if (stream_type == CCV_TENSOR_GPU_MEMORY)
276
0
  {
277
0
    ccv_nnc_tensor_param_t info = {
278
0
      .type = CCV_TENSOR_CPU_MEMORY,
279
0
      .format = CCV_TENSOR_FORMAT_NHWC,
280
0
      .datatype = CCV_32S,
281
0
      .dim = {1},
282
0
    };
283
0
    ccv_nnc_tensor_t* checknan = ccv_nnc_tensor_new(0, info, 0);
284
0
    for (i = 0; i < parallel_count; i++)
285
0
    {
286
0
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(isnanr[i * 2]), TENSOR_LIST(checknan), 0);
287
0
      if (checknan->data.i32[0] > 0)
288
0
      {
289
0
        retval = 1;
290
0
        break;
291
0
      }
292
0
    }
293
0
    ccv_nnc_tensor_free(checknan);
294
0
  } else {
295
0
    for (i = 0; i < parallel_count; i++)
296
0
      if (isnanr[i * 2]->data.i32[0] > 0)
297
0
      {
298
0
        retval = 1;
299
0
        break;
300
0
      }
301
0
  }
302
0
  for (i = 0; i < parallel_count; i++)
303
0
    ccv_nnc_tensor_free(isnanr[i * 2]);
304
0
  return retval;
305
0
}
306
307
// MARK - Core Layers
308
309
static void _ccv_cnnp_sum_build(ccv_cnnp_model_t* const self, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
310
64
{
311
64
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sum_build] -\n");
312
64
  assert(output_size == 1);
313
64
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, ccv_nnc_tensor_symbol_params(graph, inputs[0]), 0);
314
64
  ccv_nnc_graph_exec_symbol_new(graph, CMD_EWSUM_FORWARD(), inputs, input_size, outputs, output_size, 0);
315
64
}
316
317
static ccv_cnnp_model_t* _ccv_cnnp_sum_copy(const ccv_cnnp_model_t* const self, void* const context);
318
319
static const ccv_cnnp_model_vtab_t ccv_cnnp_sum_isa = {
320
  .build = _ccv_cnnp_sum_build,
321
  .copy = _ccv_cnnp_sum_copy,
322
};
323
324
typedef struct {
325
  ccv_cnnp_model_t super;
326
  ccv_nnc_tensor_symbol_t output;
327
} ccv_cnnp_model_sum_t;
328
329
ccv_cnnp_model_t* ccv_cnnp_sum(const char* const name)
330
63
{
331
63
  ccv_cnnp_model_sum_t* const model_sum = (ccv_cnnp_model_sum_t*)cccalloc(1, sizeof(ccv_cnnp_model_sum_t));
332
63
  model_sum->super.isa = &ccv_cnnp_sum_isa;
333
63
  model_sum->super.input_size = 0;
334
63
  model_sum->super.outputs = &model_sum->output;
335
63
  model_sum->super.output_size = 1;
336
63
  ccv_cnnp_model_copy_name(&model_sum->super, name);
337
63
  return (ccv_cnnp_model_t*)model_sum;
338
63
}
339
340
static ccv_cnnp_model_t* _ccv_cnnp_sum_copy(const ccv_cnnp_model_t* const self, void* const context)
341
3
{
342
3
  return ccv_cnnp_sum(self->name);
343
3
}
344
345
typedef struct {
346
  ccv_cnnp_model_t super;
347
  int axis;
348
  ccv_nnc_tensor_symbol_t output;
349
} ccv_cnnp_model_concat_t;
350
351
static void _ccv_cnnp_concat_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
352
4
{
353
4
  const ccv_cnnp_model_concat_t* const self = (const ccv_cnnp_model_concat_t*)super;
354
4
  PRINT(CCV_CLI_VERBOSE, "[cnnp_concat_build] 1. -\n");
355
4
  assert(output_size == 1);
356
4
  ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
357
4
  int i, j;
358
4
  if (output_params.dim[0] == 0)
359
0
    for (i = 1; i < input_size; i++)
360
0
    {
361
0
      output_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
362
0
      if (output_params.dim[0] != 0)
363
0
        break;
364
0
    }
365
4
  const int nd = ccv_nnc_tensor_nd(output_params.dim);
366
4
  const int axis = self->axis;
367
4
  assert(axis < nd);
368
4
  output_params.dim[axis] = 0;
369
4
  int input_is_contiguous = 1;
370
12
  for (i = 0; i < input_size; 
i++8
)
371
8
  {
372
8
    const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
373
8
    const int input_nd = ccv_nnc_tensor_nd(input_params.dim);
374
8
    if (input_nd == 0)
375
0
    {
376
0
      PRINT(CCV_CLI_VERBOSE, "[cnnp_concat_build] %d. input[%d]: -\n", i + 2, i);
377
0
      input_is_contiguous = 0;
378
0
      continue;
379
0
    }
380
8
    if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
381
0
    {
382
0
      PRINT(CCV_CLI_VERBOSE, "[cnnp_concat_build] %d. input[%d]: (%d", i + 2, i, input_params.dim[0]);
383
0
      int i;
384
0
      for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && input_params.dim[i] > 0; i++)
385
0
        PRINT(CCV_CLI_VERBOSE, ", %d", input_params.dim[i]);
386
0
      PRINT(CCV_CLI_VERBOSE, ")\n");
387
0
    }
388
8
    assert(input_nd == nd);
389
16
    
for (j = 0; 8
j < nd;
j++8
)
390
8
      if (j != axis)
391
0
        { assert(input_params.dim[j] == output_params.dim[j]); }
392
8
    output_params.dim[axis] += input_params.dim[axis];
393
8
  }
394
4
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
395
4
  int ofs[CCV_NNC_MAX_DIM_ALLOC] = {};
396
4
  int stride[CCV_NNC_MAX_DIM_ALLOC] = {};
397
4
  ccv_nnc_tensor_get_stride(output_params.dim, stride);
398
4
  if (input_is_contiguous)
399
4
  {
400
4
    ccv_nnc_tensor_symbol_t aliases[input_size];
401
12
    for (i = 0; i < input_size; 
i++8
)
402
8
    {
403
8
      const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
404
8
      aliases[i] = ccv_nnc_tensor_symbol_alias_new(graph, outputs[0], ofs, stride, input_params, 0);
405
8
      ofs[axis] += input_params.dim[axis];
406
8
    }
407
    // Format transform is more flexible.
408
4
    ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, input_size, aliases, input_size, "concat");
409
4
  } else {
410
0
    ccv_nnc_tensor_symbol_t aliases[input_size];
411
0
    for (i = 0; i < input_size; i++)
412
0
    {
413
0
      const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
414
0
      if (input_params.dim[0] == 0)
415
0
      {
416
        // Create a new alias anyway, but not going to use it, in this way, the alias count will match during absorb.
417
0
        aliases[i] = ccv_nnc_tensor_symbol_alias_new(graph, outputs[0], ofs, stride, input_params, 0);
418
0
        continue;
419
0
      }
420
0
      aliases[i] = ccv_nnc_tensor_symbol_alias_new(graph, outputs[0], ofs, stride, input_params, 0);
421
0
      ofs[axis] += input_params.dim[axis];
422
0
    }
423
    // Format transform is more flexible.
424
0
    ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, input_size, aliases, input_size, "concat");
425
0
  }
426
4
}
427
428
static ccv_cnnp_model_t* _ccv_cnnp_concat_copy(const ccv_cnnp_model_t* const self, void* const context);
429
430
static const ccv_cnnp_model_vtab_t ccv_cnnp_concat_isa = {
431
  .build = _ccv_cnnp_concat_build,
432
  .copy = _ccv_cnnp_concat_copy,
433
};
434
435
ccv_cnnp_model_t* ccv_cnnp_concat(const int axis, const char* const name)
436
4
{
437
4
  ccv_cnnp_model_concat_t* const model_concat = (ccv_cnnp_model_concat_t*)cccalloc(1, sizeof(ccv_cnnp_model_concat_t));
438
4
  model_concat->super.isa = &ccv_cnnp_concat_isa;
439
4
  model_concat->super.input_size = 0;
440
4
  model_concat->super.outputs = &model_concat->output;
441
4
  model_concat->super.output_size = 1;
442
4
  model_concat->axis = axis;
443
4
  ccv_cnnp_model_copy_name(&model_concat->super, name);
444
4
  return (ccv_cnnp_model_t*)model_concat;
445
4
}
446
447
static ccv_cnnp_model_t* _ccv_cnnp_concat_copy(const ccv_cnnp_model_t* const super, void* const context)
448
0
{
449
0
  const ccv_cnnp_model_concat_t* const self = (const ccv_cnnp_model_concat_t*)super;
450
0
  return ccv_cnnp_concat(self->axis, self->super.name);
451
0
}
452
453
typedef struct {
454
  ccv_cnnp_model_t super;
455
  int axis;
456
  ccv_nnc_tensor_symbol_t outputs[1];
457
} ccv_cnnp_model_chunk_t;
458
459
static void _ccv_cnnp_chunk_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
460
2
{
461
2
  const ccv_cnnp_model_concat_t* const self = (const ccv_cnnp_model_concat_t*)super;
462
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_chunk_build] 1. axis: %d\n", self->axis);
463
2
  assert(input_size == 1);
464
2
  const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
465
2
  if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
466
0
  {
467
0
    PRINT(CCV_CLI_VERBOSE, "[cnnp_chunk_build] 2. input: (%d", input_params.dim[0]);
468
0
    int i;
469
0
    for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && input_params.dim[i] > 0; i++)
470
0
      PRINT(CCV_CLI_VERBOSE, ", %d", input_params.dim[i]);
471
0
    PRINT(CCV_CLI_VERBOSE, ")\n");
472
0
  }
473
2
  ccv_nnc_tensor_param_t output_params = input_params;
474
2
  int i;
475
2
  const int nd = ccv_nnc_tensor_nd(output_params.dim);
476
2
  const int axis = self->axis;
477
2
  assert(axis < nd);
478
2
  const int n = self->super.output_size;
479
2
  assert(n == output_size);
480
2
  assert(output_params.dim[axis] % n == 0);
481
2
  output_params.dim[axis] = output_params.dim[axis] / n;
482
2
  int ofs[CCV_NNC_MAX_DIM_ALLOC] = {};
483
2
  int stride[CCV_NNC_MAX_DIM_ALLOC] = {};
484
2
  ccv_nnc_tensor_get_stride(input_params.dim, stride);
485
2
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
486
2
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
487
2
  {
488
6
    for (i = 0; i < output_size; 
i++4
)
489
4
    {
490
4
      outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, stride, output_params, 0);
491
4
      ofs[axis] += output_params.dim[axis];
492
4
    }
493
2
  } else {
494
    // Otherwise, we need to check if it is permute. For permute, we cannot do alias directly.
495
    // We need to first materialize the permute and then run reshape on top of it, otherwise it will be wrong.
496
0
    int old_stride[CCV_NNC_MAX_DIM_ALLOC];
497
0
    ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], 0, old_stride);
498
    // We identify permute by checking if the stride is not in descending order.
499
    // This also covered "permute" through reshape, rather than using ccv_cnnp_permute directly.
500
0
    int i, no_permute = 1;
501
0
    for (i = 1; no_permute && i < nd; i++)
502
0
      if (old_stride[i - 1] < old_stride[i])
503
0
        no_permute = 0;
504
0
    if (no_permute)
505
0
    { // Just straightforward reshape if there is no no permute.
506
0
      for (i = 0; i < output_size; i++)
507
0
      {
508
0
        outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, old_stride, output_params, 0);
509
0
        ofs[axis] += output_params.dim[axis];
510
0
      }
511
0
    } else {
512
      // Otherwise, we first do format transform to plain tensor and then do reshape.
513
0
      ccv_nnc_tensor_symbol_t permuted = ccv_nnc_tensor_symbol_new(graph, input_params, 0);
514
0
      ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(permuted), "reshape");
515
0
      for (i = 0; i < output_size; i++)
516
0
      {
517
0
        outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, permuted, ofs, stride, output_params, 0);
518
0
        ofs[axis] += output_params.dim[axis];
519
0
      }
520
0
    }
521
0
  }
522
2
}
523
524
static ccv_cnnp_model_t* _ccv_cnnp_chunk_copy(const ccv_cnnp_model_t* const self, void* const context);
525
526
static const ccv_cnnp_model_vtab_t ccv_cnnp_chunk_isa = {
527
  .build = _ccv_cnnp_chunk_build,
528
  .copy = _ccv_cnnp_chunk_copy,
529
};
530
531
ccv_cnnp_model_t* ccv_cnnp_chunk(const int n, const int axis, const char* const name)
532
2
{
533
2
  assert(n >= 1);
534
2
  ccv_cnnp_model_chunk_t* const model_chunk = (ccv_cnnp_model_chunk_t*)cccalloc(1, sizeof(ccv_cnnp_model_chunk_t) + sizeof(ccv_nnc_tensor_symbol_t) * (n - 1));
535
2
  model_chunk->super.isa = &ccv_cnnp_chunk_isa;
536
2
  model_chunk->super.input_size = 1;
537
2
  model_chunk->super.outputs = model_chunk->outputs;
538
2
  model_chunk->super.output_size = n;
539
2
  model_chunk->axis = axis;
540
2
  ccv_cnnp_model_copy_name(&model_chunk->super, name);
541
2
  return (ccv_cnnp_model_t*)model_chunk;
542
2
}
543
544
static ccv_cnnp_model_t* _ccv_cnnp_chunk_copy(const ccv_cnnp_model_t* const super, void* const context)
545
0
{
546
0
  const ccv_cnnp_model_chunk_t* const self = (const ccv_cnnp_model_chunk_t*)super;
547
0
  return ccv_cnnp_chunk(self->super.output_size, self->axis, self->super.name);
548
0
}
549
550
typedef struct {
551
  ccv_cnnp_model_t super;
552
  ccv_nnc_tensor_symbol_t output;
553
  int format;
554
  int dim[CCV_NNC_MAX_DIM_ALLOC];
555
  int ofs[CCV_NNC_MAX_DIM_ALLOC];
556
  int stride[CCV_NNC_MAX_DIM_ALLOC];
557
} ccv_cnnp_model_reshape_t;
558
559
static void _ccv_cnnp_reshape_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
560
1.06k
{
561
1.06k
  assert(input_size == 1);
562
1.06k
  assert(output_size == 1);
563
1.06k
  ccv_cnnp_model_reshape_t* const self = (ccv_cnnp_model_reshape_t*)super;
564
1.06k
  if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
565
0
  {
566
0
    PRINT(CCV_CLI_VERBOSE, "[cnnp_reshape_build] 1. dim: (%d", self->dim[0]);
567
0
    int i;
568
0
    for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && self->dim[i] > 0; i++)
569
0
      PRINT(CCV_CLI_VERBOSE, ", %d", self->dim[i]);
570
0
    const int count = i;
571
0
    PRINT(CCV_CLI_VERBOSE, "), ofs: (%d", self->ofs[0]);
572
0
    for (i = 1; i < count; i++)
573
0
      PRINT(CCV_CLI_VERBOSE, ", %d", self->ofs[i]);
574
0
    PRINT(CCV_CLI_VERBOSE, "), stride: (%d", self->stride[0]);
575
0
    for (i = 1; i < count; i++)
576
0
      PRINT(CCV_CLI_VERBOSE, ", %d", self->stride[i]);
577
0
    PRINT(CCV_CLI_VERBOSE, ")\n");
578
0
  }
579
1.06k
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
580
1.06k
  int dim[CCV_NNC_MAX_DIM_ALLOC];
581
1.06k
  memcpy(dim, self->dim, sizeof(dim));
582
1.06k
  int i, auto_idx = -1;
583
1.06k
  size_t known = 1;
584
1.06k
  const size_t tensor_count = ccv_nnc_tensor_count(params);
585
2.26k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && dim[i]; 
i++1.19k
)
586
1.19k
    if (dim[i] == -1)
587
1
      auto_idx = i;
588
1.19k
    else
589
1.19k
      known *= dim[i];
590
1.06k
  if (auto_idx >= 0)
591
1
  {
592
1
    assert(known > 0 && tensor_count % known == 0);
593
1
    dim[auto_idx] = tensor_count / known;
594
1
  }
595
1.06k
  if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
596
0
  {
597
0
    PRINT(CCV_CLI_VERBOSE, "[cnnp_reshape_build] 2. input: (%d", params.dim[0]);
598
0
    int i;
599
0
    for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && params.dim[i] > 0; i++)
600
0
      PRINT(CCV_CLI_VERBOSE, ", %d", params.dim[i]);
601
0
    PRINT(CCV_CLI_VERBOSE, ")\n");
602
0
  }
603
1.06k
  assert(ccv_nnc_dimension_count(dim) <= ccv_nnc_tensor_count(params));
604
1.06k
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
605
1.06k
  int stride_from_dim[CCV_NNC_MAX_DIM_ALLOC];
606
1.06k
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
607
1.06k
  {
608
1.06k
    memcpy(params.dim, dim, sizeof(params.dim));
609
1.06k
    int* stride;
610
1.06k
    if (self->stride[0] == 0)
611
1.06k
    {
612
1.06k
      ccv_nnc_tensor_get_stride(dim, stride_from_dim);
613
1.06k
      stride = stride_from_dim;
614
1.06k
    } else
615
5
      stride = self->stride;
616
1.06k
    if (self->format > 0)
617
5
      params.format = self->format;
618
1.06k
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], self->ofs, stride, params, 0);
619
1.06k
  } else {
620
    // Otherwise, we need to check if it is permute. For permute, we cannot do alias directly.
621
    // We need to first materialize the permute and then run reshape on top of it, otherwise it will be wrong.
622
1
    int old_stride[CCV_NNC_MAX_DIM_ALLOC];
623
1
    ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], 0, old_stride);
624
    // We identify permute by checking if the stride is not in descending order.
625
    // This also covered "permute" through reshape, rather than using ccv_cnnp_permute directly.
626
1
    const int nd = ccv_nnc_tensor_nd(params.dim);
627
1
    const int new_nd = ccv_nnc_tensor_nd(dim);
628
1
    int i, no_permute = 1;
629
    // If the new dim has different nd, or we actually have a stride, we need to check if it is no permute or not.
630
1
    if (new_nd != nd || 
(0
self->stride[0] != 00
&&
memcmp(self->stride, old_stride, sizeof(self->stride))0
))
631
2
      
for (i = 1; 1
no_permute &&
i < nd1
;
i++1
)
632
1
        if (old_stride[i - 1] < old_stride[i])
633
1
          no_permute = 0;
634
1
    if (no_permute)
635
0
    { // Just straightforward reshape if there is no no permute.
636
0
      memcpy(params.dim, dim, sizeof(params.dim));
637
0
      int* stride;
638
0
      if (self->stride[0] == 0)
639
0
      {
640
0
        if (new_nd != nd) // Cannot use old stride.
641
0
        {
642
0
          ccv_nnc_tensor_get_stride(dim, stride_from_dim);
643
0
          stride = stride_from_dim;
644
0
        } else
645
0
          stride = old_stride;
646
0
      } else
647
0
        stride = self->stride;
648
0
      if (self->format > 0)
649
0
        params.format = self->format;
650
0
      outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], self->ofs, stride, params, 0);
651
1
    } else {
652
      // Otherwise, we first do format transform to plain tensor and then do reshape.
653
1
      ccv_nnc_tensor_symbol_t permuted = ccv_nnc_tensor_symbol_new(graph, params, 0);
654
1
      ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(permuted), "reshape");
655
1
      memcpy(params.dim, dim, sizeof(params.dim));
656
1
      int* stride;
657
1
      if (self->stride[0] == 0)
658
1
      {
659
1
        ccv_nnc_tensor_get_stride(dim, stride_from_dim);
660
1
        stride = stride_from_dim;
661
1
      } else
662
0
        stride = self->stride;
663
1
      if (self->format > 0)
664
0
        params.format = self->format;
665
      // And then we create alias against the permuted one.
666
1
      outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, permuted, self->ofs, stride, params, 0);
667
1
    }
668
1
  }
669
1.06k
}
670
671
static ccv_cnnp_model_t* _ccv_cnnp_reshape_copy(const ccv_cnnp_model_t* const super, void* const context);
672
673
static const ccv_cnnp_model_vtab_t ccv_cnnp_reshape_isa = {
674
  .build = _ccv_cnnp_reshape_build,
675
  .copy = _ccv_cnnp_reshape_copy,
676
};
677
678
ccv_cnnp_model_t* ccv_cnnp_reshape(const int format, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC], const char* const name)
679
1.06k
{
680
1.06k
  ccv_cnnp_model_reshape_t* const model_reshape = (ccv_cnnp_model_reshape_t*)cccalloc(1, sizeof(ccv_cnnp_model_reshape_t));
681
1.06k
  model_reshape->super.isa = &ccv_cnnp_reshape_isa;
682
1.06k
  model_reshape->super.input_size = 1;
683
1.06k
  model_reshape->super.outputs = &model_reshape->output;
684
1.06k
  model_reshape->super.output_size = 1;
685
1.06k
  ccv_cnnp_model_copy_name(&model_reshape->super, name);
686
1.06k
  model_reshape->format = format;
687
1.06k
  memcpy(model_reshape->dim, dim, sizeof(model_reshape->dim));
688
1.06k
  memcpy(model_reshape->ofs, ofs, sizeof(model_reshape->ofs));
689
1.06k
  if (stride[0] != 0)
690
5
    memcpy(model_reshape->stride, stride, sizeof(model_reshape->stride));
691
1.06k
  return (ccv_cnnp_model_t*)model_reshape;
692
1.06k
}
693
694
static ccv_cnnp_model_t* _ccv_cnnp_reshape_copy(const ccv_cnnp_model_t* const super, void* const context)
695
1.00k
{
696
1.00k
  const ccv_cnnp_model_reshape_t* const self = (const ccv_cnnp_model_reshape_t*)super;
697
1.00k
  return ccv_cnnp_reshape(self->format, self->dim, self->ofs, self->stride, self->super.name);
698
1.00k
}
699
700
typedef struct {
701
  ccv_cnnp_model_t super;
702
  ccv_nnc_tensor_symbol_t output;
703
  int type;
704
  int begin[CCV_NNC_MAX_DIM_ALLOC];
705
  int end[CCV_NNC_MAX_DIM_ALLOC];
706
} ccv_cnnp_model_pad_t;
707
708
static void _ccv_cnnp_pad_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
709
1
{
710
1
  assert(input_size == 1);
711
1
  assert(output_size == 1);
712
1
  ccv_cnnp_model_pad_t* const self = (ccv_cnnp_model_pad_t*)super;
713
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_pad_build] -\n");
714
1
  const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
715
1
  const int nd = ccv_nnc_tensor_nd(input_params.dim);
716
1
  ccv_nnc_tensor_param_t params = input_params;
717
1
  int i;
718
5
  for (i = 0 ; i < nd; 
i++4
)
719
4
    params.dim[i] += self->begin[i] + self->end[i];
720
1
  const ccv_nnc_tensor_symbol_t padded = ccv_nnc_tensor_symbol_new(graph, params, 0);
721
1
  ccv_nnc_cmd_t pad = CMD_PAD_FORWARD(self->type, (), ());
722
1
  memcpy(pad.info.size.dim, self->begin, sizeof(pad.info.size.dim));
723
1
  memcpy(pad.info.pad.end, self->end, sizeof(pad.info.pad.end));
724
1
  ccv_nnc_graph_exec_symbol_new(graph, pad, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(padded), "pad");
725
1
  outputs[0] = padded;
726
1
}
727
728
static ccv_cnnp_model_t* _ccv_cnnp_pad_copy(const ccv_cnnp_model_t* const super, void* const context);
729
730
static const ccv_cnnp_model_vtab_t ccv_cnnp_pad_isa = {
731
  .build = _ccv_cnnp_pad_build,
732
  .copy = _ccv_cnnp_pad_copy,
733
};
734
735
ccv_cnnp_model_t* ccv_cnnp_pad(const int type, const int begin[CCV_NNC_MAX_DIM_ALLOC], const int end[CCV_NNC_MAX_DIM_ALLOC], const char* const name)
736
1
{
737
1
  ccv_cnnp_model_pad_t* const model_pad = (ccv_cnnp_model_pad_t*)cccalloc(1, sizeof(ccv_cnnp_model_pad_t));
738
1
  model_pad->super.isa = &ccv_cnnp_pad_isa;
739
1
  model_pad->super.input_size = 1;
740
1
  model_pad->super.outputs = &model_pad->output;
741
1
  model_pad->super.output_size = 1;
742
1
  ccv_cnnp_model_copy_name(&model_pad->super, name);
743
1
  model_pad->type = type;
744
1
  memcpy(model_pad->begin, begin, sizeof(model_pad->begin));
745
1
  memcpy(model_pad->end, end, sizeof(model_pad->end));
746
1
  return (ccv_cnnp_model_t*)model_pad;
747
1
}
748
749
static ccv_cnnp_model_t* _ccv_cnnp_pad_copy(const ccv_cnnp_model_t* const super, void* const context)
750
0
{
751
0
  const ccv_cnnp_model_pad_t* const self = (const ccv_cnnp_model_pad_t*)super;
752
0
  return ccv_cnnp_pad(self->type, self->begin, self->end, self->super.name);
753
0
}
754
755
typedef struct {
756
  ccv_cnnp_model_t super;
757
  ccv_nnc_tensor_symbol_t output;
758
} ccv_cnnp_model_identity_t;
759
760
static void _ccv_cnnp_identity_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
761
0
{
762
0
  assert(input_size == 1);
763
0
  assert(output_size == 1);
764
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_identity_build] -\n");
765
0
  outputs[0] = inputs[0];
766
0
}
767
768
static ccv_cnnp_model_t* _ccv_cnnp_identity_copy(const ccv_cnnp_model_t* const super, void* const context);
769
770
static const ccv_cnnp_model_vtab_t ccv_cnnp_identity_isa = {
771
  .build = _ccv_cnnp_identity_build,
772
  .copy = _ccv_cnnp_identity_copy,
773
};
774
775
ccv_cnnp_model_t* ccv_cnnp_identity(const char* const name)
776
0
{
777
0
  ccv_cnnp_model_identity_t* const model_identity = (ccv_cnnp_model_identity_t*)cccalloc(1, sizeof(ccv_cnnp_model_identity_t));
778
0
  model_identity->super.isa = &ccv_cnnp_identity_isa;
779
0
  model_identity->super.input_size = 1;
780
0
  model_identity->super.outputs = &model_identity->output;
781
0
  model_identity->super.output_size = 1;
782
0
  ccv_cnnp_model_copy_name(&model_identity->super, name);
783
0
  return (ccv_cnnp_model_t*)model_identity;
784
0
}
785
786
static ccv_cnnp_model_t* _ccv_cnnp_identity_copy(const ccv_cnnp_model_t* const super, void* const context)
787
0
{
788
0
  const ccv_cnnp_model_identity_t* const self = (const ccv_cnnp_model_identity_t*)super;
789
0
  return ccv_cnnp_identity(self->super.name);
790
0
}
791
792
typedef struct {
793
  ccv_cnnp_model_t super;
794
  ccv_nnc_tensor_symbol_t output;
795
  int index[CCV_NNC_MAX_DIM_ALLOC];
796
} ccv_cnnp_model_permute_t;
797
798
static void _ccv_cnnp_permute_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
799
1
{
800
1
  assert(input_size == 1);
801
1
  assert(output_size == 1);
802
1
  ccv_cnnp_model_permute_t* const self = (ccv_cnnp_model_permute_t*)super;
803
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_permute_build] -\n");
804
1
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
805
1
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
806
1
  const int nd = ccv_nnc_tensor_nd(params.dim);
807
1
  int input_dim[CCV_NNC_MAX_DIM_ALLOC];
808
1
  memcpy(input_dim, params.dim, sizeof(params.dim));
809
1
  int input_stride[CCV_NNC_MAX_DIM_ALLOC] = {};
810
1
  int output_stride[CCV_NNC_MAX_DIM_ALLOC] = {};
811
1
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If it is not an alias. Find stride and permute.
812
0
  {
813
0
    ccv_nnc_tensor_get_stride(input_dim, input_stride);
814
0
    int i;
815
0
    for (i = 0; i < nd; i++)
816
0
    {
817
0
      const int idx = self->index[i];
818
0
      assert(idx >= 0 && idx < nd);
819
0
      params.dim[i] = input_dim[idx];
820
0
      output_stride[i] = input_stride[idx];
821
0
    }
822
0
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ccv_nnc_no_ofs, output_stride, params, 0);
823
1
  } else {
824
    // if it is an alias, we can get the stride from it and use that.
825
1
    int input_ofs[CCV_NNC_MAX_DIM_ALLOC];
826
1
    ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], input_ofs, input_stride);
827
1
    assert(input_stride[0] != 0);
828
1
    int output_ofs[CCV_NNC_MAX_DIM_ALLOC] = {};
829
1
    int i;
830
4
    for (i = 0; i < nd; 
i++3
)
831
3
    {
832
3
      const int idx = self->index[i];
833
3
      assert(idx >= 0 && idx < nd);
834
3
      params.dim[i] = input_dim[idx];
835
3
      output_stride[i] = input_stride[idx];
836
3
      output_ofs[i] = input_ofs[idx];
837
3
    }
838
1
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], output_ofs, output_stride, params, 0);
839
1
  }
840
1
}
841
842
static ccv_cnnp_model_t* _ccv_cnnp_permute_copy(const ccv_cnnp_model_t* const super, void* const context);
843
844
static const ccv_cnnp_model_vtab_t ccv_cnnp_permute_isa = {
845
  .build = _ccv_cnnp_permute_build,
846
  .copy = _ccv_cnnp_permute_copy,
847
};
848
849
ccv_cnnp_model_t* ccv_cnnp_permute(const int index[CCV_NNC_MAX_DIM_ALLOC], const char* const name)
850
1
{
851
1
  ccv_cnnp_model_permute_t* const model_permute = (ccv_cnnp_model_permute_t*)cccalloc(1, sizeof(ccv_cnnp_model_permute_t));
852
1
  model_permute->super.isa = &ccv_cnnp_permute_isa;
853
1
  model_permute->super.input_size = 1;
854
1
  model_permute->super.outputs = &model_permute->output;
855
1
  model_permute->super.output_size = 1;
856
1
  ccv_cnnp_model_copy_name(&model_permute->super, name);
857
1
  memcpy(model_permute->index, index, sizeof(model_permute->index));
858
1
  return (ccv_cnnp_model_t*)model_permute;
859
1
}
860
861
static ccv_cnnp_model_t* _ccv_cnnp_permute_copy(const ccv_cnnp_model_t* const super, void* const context)
862
0
{
863
0
  const ccv_cnnp_model_permute_t* const self = (const ccv_cnnp_model_permute_t*)super;
864
0
  return ccv_cnnp_permute(self->index, self->super.name);
865
0
}
866
867
typedef struct {
868
  ccv_cnnp_model_t super;
869
  int index;
870
  ccv_nnc_tensor_symbol_t output;
871
} ccv_cnnp_model_extract_t;
872
873
static void _ccv_cnnp_extract_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
874
6
{
875
6
  assert(output_size == 1);
876
6
  ccv_cnnp_model_extract_t* const self = (ccv_cnnp_model_extract_t*)super;
877
6
  PRINT(CCV_CLI_VERBOSE, "[cnnp_extract_build] index: %d\n", self->index);
878
6
  outputs[0] = inputs[self->index];
879
6
}
880
881
static ccv_cnnp_model_t* _ccv_cnnp_extract_copy(const ccv_cnnp_model_t* const self, void* const context);
882
883
static const ccv_cnnp_model_vtab_t ccv_cnnp_extract_isa = {
884
  .build = _ccv_cnnp_extract_build,
885
  .copy = _ccv_cnnp_extract_copy,
886
};
887
888
ccv_cnnp_model_t* ccv_cnnp_extract(const int index, const char* const name)
889
6
{
890
6
  ccv_cnnp_model_extract_t* const model_extract = (ccv_cnnp_model_extract_t*)cccalloc(1, sizeof(ccv_cnnp_model_extract_t));
891
6
  model_extract->index = index;
892
6
  model_extract->super.isa = &ccv_cnnp_extract_isa;
893
6
  model_extract->super.input_size = 0;
894
6
  model_extract->super.outputs = &model_extract->output;
895
6
  model_extract->super.output_size = 1;
896
6
  ccv_cnnp_model_copy_name(&model_extract->super, name);
897
6
  return (ccv_cnnp_model_t*)model_extract;
898
6
}
899
900
static ccv_cnnp_model_t* _ccv_cnnp_extract_copy(const ccv_cnnp_model_t* const super, void* const context)
901
0
{
902
0
  ccv_cnnp_model_extract_t* const self = (ccv_cnnp_model_extract_t*)super;
903
0
  return ccv_cnnp_extract(self->index, self->super.name);
904
0
}
905
906
typedef struct {
907
  ccv_cnnp_model_t super;
908
  ccv_nnc_tensor_symbol_t output;
909
} ccv_cnnp_model_flatten_t;
910
911
static void _ccv_cnnp_flatten_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
912
10
{
913
10
  PRINT(CCV_CLI_VERBOSE, "[cnnp_flatten_build] -\n");
914
10
  assert(input_size == 1);
915
10
  assert(output_size == 1);
916
10
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
917
10
  ccv_nnc_tensor_param_t output_params = params;
918
10
  memset(output_params.dim, 0, sizeof(output_params.dim));
919
10
  output_params.dim[0] = ccv_nnc_tensor_get_n(params);
920
10
  assert(output_params.dim[0] > 0);
921
10
  output_params.dim[1] = ccv_nnc_tensor_count(params) / output_params.dim[0];
922
10
  int stride[CCV_NNC_MAX_DIM_ALLOC] = {};
923
10
  ccv_nnc_tensor_get_stride(output_params.dim, stride);
924
10
  outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], DIM_ALLOC(), stride, output_params, 0);
925
10
}
926
927
static ccv_cnnp_model_t* _ccv_cnnp_flatten_copy(const ccv_cnnp_model_t* const self, void* const context);
928
929
static const ccv_cnnp_model_vtab_t ccv_cnnp_flatten_isa = {
930
  .build = _ccv_cnnp_flatten_build,
931
  .copy = _ccv_cnnp_flatten_copy,
932
};
933
934
ccv_cnnp_model_t* ccv_cnnp_flatten(const char* const name)
935
12
{
936
12
  ccv_cnnp_model_flatten_t* const model_flatten = (ccv_cnnp_model_flatten_t*)cccalloc(1, sizeof(ccv_cnnp_model_flatten_t));
937
12
  model_flatten->super.isa = &ccv_cnnp_flatten_isa;
938
12
  model_flatten->super.input_size = 1;
939
12
  model_flatten->super.outputs = &model_flatten->output;
940
12
  model_flatten->super.output_size = 1;
941
12
  ccv_cnnp_model_copy_name(&model_flatten->super, name);
942
12
  return (ccv_cnnp_model_t*)model_flatten;
943
12
}
944
945
static ccv_cnnp_model_t* _ccv_cnnp_flatten_copy(const ccv_cnnp_model_t* const self, void* const context)
946
2
{
947
2
  return ccv_cnnp_flatten(self->name);
948
2
}
949
950
// MARK - Batch Norm Layer
951
952
typedef struct {
953
  ccv_cnnp_model_t super;
954
  ccv_nnc_tensor_symbol_t output;
955
  ccv_nnc_tensor_symbol_t bias;
956
  ccv_nnc_tensor_symbol_t scale;
957
  ccv_nnc_graph_exec_symbol_t batch_norm;
958
  ccv_nnc_cmd_param_t params;
959
  ccv_array_t* zero_inits;
960
  ccv_array_t* retainables;
961
} ccv_cnnp_model_batch_norm_t;
962
963
static void _ccv_cnnp_batch_norm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
964
75
{
965
75
  assert(input_size == 1);
966
75
  assert(output_size == 1);
967
75
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
968
75
  PRINT(CCV_CLI_VERBOSE, "[cnnp_batch_norm_build] -\n");
969
75
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
970
75
  const int nd = ccv_nnc_tensor_nd(params.dim);
971
75
  ccv_nnc_tensor_param_t bias_params = params;
972
75
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
973
  // If the accuracy is not enough, bump it to 32-bit floating point.
974
75
  if (bias_params.datatype != CCV_32F && 
bias_params.datatype != CCV_64F16
)
975
16
    bias_params.datatype = CCV_32F;
976
75
  bias_params.dim[0] = nd > 1 ? ccv_nnc_tensor_get_c(params) : 
params.dim[0]0
;
977
75
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, params, 0);
978
  // Both scale and bias are shared between if this model is reused.
979
75
  if (!self->scale.graph)
980
75
    self->scale = ccv_nnc_tensor_symbol_new(graph, bias_params, "scale");
981
75
  if (!self->bias.graph)
982
75
    self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
983
75
  const ccv_nnc_tensor_symbol_t mean = ccv_nnc_tensor_symbol_new(graph, bias_params, "mean");
984
75
  const ccv_nnc_tensor_symbol_t var = ccv_nnc_tensor_symbol_new(graph, bias_params, "var");
985
  // Otherwise, notice mean, var, saved_mean, saved_inv_std are not reused.
986
75
  if (!self->zero_inits)
987
75
    self->zero_inits = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
988
75
  ccv_array_push(self->zero_inits, &mean);
989
75
  ccv_array_push(self->zero_inits, &var);
990
75
  const ccv_nnc_tensor_symbol_t out_mean = ccv_nnc_tensor_symbol_new(graph, bias_params, "out_mean");
991
75
  const ccv_nnc_tensor_symbol_t out_var = ccv_nnc_tensor_symbol_new(graph, bias_params, "out_var");
992
75
  if (!self->retainables)
993
75
    self->retainables = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
994
75
  ccv_array_push(self->retainables, &out_mean);
995
75
  ccv_array_push(self->retainables, &out_var);
996
75
  const ccv_nnc_tensor_symbol_t saved_mean = ccv_nnc_tensor_symbol_new(graph, bias_params, "saved_mean");
997
75
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, bias_params, "saved_inv_std");
998
75
  const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim), CCV_NNC_MAX_DIM);
999
75
  ccv_nnc_cmd_param_t batch_norm = self->params;
1000
75
  batch_norm.bnorm.count = hw >= 0 ? CCV_NNC_MAX_DIM + 1 : 
10
;
1001
75
  int i;
1002
75
  batch_norm.bnorm.axis[0] = (params.format == CCV_TENSOR_FORMAT_CHWN) ? 
30
: 0;
1003
75
  if (hw >= 0)
1004
225
    
for (i = 0; 75
i < CCV_NNC_MAX_DIM;
i++150
)
1005
150
      batch_norm.bnorm.axis[i + 1] = i + hw;
1006
75
  self->params = batch_norm;
1007
75
  self->batch_norm = ccv_nnc_graph_exec_symbol_new(graph, ccv_nnc_cmd(CCV_NNC_BATCH_NORM_FORWARD, 0, batch_norm, 0), TENSOR_SYMBOL_LIST(inputs[0], self->scale, self->bias, mean, var), TENSOR_SYMBOL_LIST(output, out_mean, out_var, saved_mean, saved_inv_std), "batch_norm");
1008
75
  outputs[0] = output;
1009
75
}
1010
1011
static void _ccv_cnnp_batch_norm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
1012
24
{
1013
24
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1014
24
  if (self->scale.graph)
1015
24
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(0, 1), ccv_nnc_no_hint, 0, 0, self->scale);
1016
24
  if (self->bias.graph)
1017
24
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
1018
24
  int i;
1019
24
  if (self->zero_inits)
1020
72
    
for (i = 0; 24
i < self->zero_inits->rnum;
i++48
)
1021
48
      initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, *(ccv_nnc_tensor_symbol_t*)ccv_array_get(self->zero_inits, i));
1022
24
}
1023
1024
static void _ccv_cnnp_batch_norm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1025
75
{
1026
75
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1027
75
  if (self->scale.graph)
1028
75
    add_to_array(parameters, self->scale, is_trainable);
1029
75
  if (self->bias.graph)
1030
75
    add_to_array(parameters, self->bias, is_trainable);
1031
75
}
1032
1033
static void _ccv_cnnp_batch_norm_add_to_output(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const outputs)
1034
75
{
1035
75
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1036
75
  int i;
1037
75
  if (self->retainables)
1038
225
    
for (i = 0; 75
i < self->retainables->rnum;
i++150
)
1039
150
    {
1040
150
      const ccv_nnc_tensor_symbol_t symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(self->retainables, i);
1041
150
      add_to_array(outputs, symbol, 0);
1042
150
    }
1043
75
}
1044
1045
static void _ccv_cnnp_batch_norm_set_is_test(ccv_cnnp_model_t* const super, const int is_test, const ccv_cnnp_cmd_updater_f updater, void* const context)
1046
32
{
1047
32
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1048
32
  if (self->batch_norm.graph)
1049
32
  {
1050
32
    self->params.bnorm.is_test = is_test;
1051
32
    updater(context, self->batch_norm, ccv_nnc_cmd(CCV_NNC_BATCH_NORM_FORWARD, 0, self->params, 0), ccv_nnc_no_hint);
1052
32
  }
1053
32
}
1054
1055
static void _ccv_cnnp_batch_norm_deinit(ccv_cnnp_model_t* const super)
1056
83
{
1057
83
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1058
83
  if (self->zero_inits)
1059
75
    ccv_array_free(self->zero_inits);
1060
83
  if (self->retainables)
1061
75
    ccv_array_free(self->retainables);
1062
83
}
1063
1064
static ccv_cnnp_model_t* _ccv_cnnp_batch_norm_copy(const ccv_cnnp_model_t* const super, void* const context);
1065
1066
static const ccv_cnnp_model_vtab_t ccv_cnnp_batch_norm_isa = {
1067
  .build = _ccv_cnnp_batch_norm_build,
1068
  .init_states = _ccv_cnnp_batch_norm_init_states,
1069
  .add_to_parameter = _ccv_cnnp_batch_norm_add_to_parameter,
1070
  .add_to_output = _ccv_cnnp_batch_norm_add_to_output,
1071
  .copy = _ccv_cnnp_batch_norm_copy,
1072
  .set_is_test = _ccv_cnnp_batch_norm_set_is_test,
1073
  .deinit = _ccv_cnnp_batch_norm_deinit,
1074
};
1075
1076
ccv_cnnp_model_t* ccv_cnnp_batch_norm(const float momentum, const float epsilon, const int is_trainable, const char* const name)
1077
83
{
1078
83
  ccv_cnnp_model_batch_norm_t* const model_batch_norm = (ccv_cnnp_model_batch_norm_t*)cccalloc(1, sizeof(ccv_cnnp_model_batch_norm_t));
1079
83
  model_batch_norm->super.isa = &ccv_cnnp_batch_norm_isa;
1080
83
  model_batch_norm->super.input_size = 1;
1081
83
  model_batch_norm->super.outputs = &model_batch_norm->output;
1082
83
  model_batch_norm->super.output_size = 1;
1083
83
  model_batch_norm->super.is_trainable = is_trainable;
1084
83
  ccv_cnnp_model_copy_name(&model_batch_norm->super, name);
1085
83
  model_batch_norm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
1086
83
  model_batch_norm->scale.graph = 0;
1087
83
  model_batch_norm->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1088
83
  model_batch_norm->bias.graph = 0;
1089
83
  model_batch_norm->params.bnorm.momentum = momentum;
1090
83
  model_batch_norm->params.bnorm.epsilon = epsilon;
1091
83
  return (ccv_cnnp_model_t*)model_batch_norm;
1092
83
}
1093
1094
static ccv_cnnp_model_t* _ccv_cnnp_batch_norm_copy(const ccv_cnnp_model_t* const super, void* const context)
1095
8
{
1096
8
  const ccv_cnnp_model_batch_norm_t* const self = (const ccv_cnnp_model_batch_norm_t*)super;
1097
8
  return ccv_cnnp_batch_norm(self->params.bnorm.momentum, self->params.bnorm.epsilon, self->super.is_trainable, self->super.name);
1098
8
}
1099
1100
// MARK - Convolution Layer
1101
1102
typedef struct {
1103
  ccv_cnnp_model_t super;
1104
  ccv_nnc_tensor_symbol_t output;
1105
  ccv_nnc_tensor_symbol_t weights;
1106
  ccv_nnc_tensor_symbol_t bias;
1107
  int groups;
1108
  int filters;
1109
  int kdim[CCV_NNC_MAX_DIM_ALLOC];
1110
  int dilation[CCV_NNC_MAX_DIM_ALLOC];
1111
  int no_bias;
1112
  int format;
1113
  ccv_nnc_hint_t hint;
1114
} ccv_cnnp_model_convolution_t;
1115
1116
static void _ccv_cnnp_convolution_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1117
114
{
1118
114
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1119
114
  PRINT(CCV_CLI_VERBOSE, "[cnnp_convolution_build] -\n");
1120
114
  assert(input_size == 1);
1121
114
  assert(output_size == 1);
1122
114
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1123
114
  int i;
1124
114
  const int k_nd = ccv_nnc_tensor_nd(self->kdim);
1125
114
  const int nd = k_nd + 2;
1126
114
  ccv_nnc_tensor_param_t weights_params = params;
1127
114
  if (self->format)
1128
0
    weights_params.format = self->format;
1129
114
  ccv_nnc_tensor_set_n(&weights_params, self->filters);
1130
114
  const int a_nd = ccv_nnc_tensor_nd(params.dim);
1131
114
  int c;
1132
114
  switch (params.format)
1133
114
  {
1134
15
    case CCV_TENSOR_FORMAT_NHWC:
1135
15
      c = params.dim[a_nd - 1];
1136
15
      break;
1137
99
    case CCV_TENSOR_FORMAT_NCHW:
1138
99
      if (a_nd == k_nd + 1)
1139
0
        c = params.dim[0];
1140
99
      else
1141
99
        c = params.dim[a_nd <= 1 ? 
00
: 1];
1142
99
      break;
1143
0
    case CCV_TENSOR_FORMAT_CHWN:
1144
0
      c = params.dim[0];
1145
0
      break;
1146
114
  }
1147
114
  assert(c % self->groups == 0);
1148
114
  ccv_nnc_tensor_set_c(&weights_params, nd, c / self->groups);
1149
114
  int hw = -1;
1150
114
  if (weights_params.format == CCV_TENSOR_FORMAT_NHWC || 
weights_params.format == CCV_TENSOR_FORMAT_CHWN99
)
1151
15
    hw = 1;
1152
99
  else if (weights_params.format == CCV_TENSOR_FORMAT_NCHW)
1153
99
    hw = 2;
1154
114
  assert(hw >= 0);
1155
342
  
for (i = 0; 114
i < k_nd;
i++228
)
1156
228
    weights_params.dim[i + hw] = self->kdim[i];
1157
114
  if (!self->weights.graph)
1158
110
    self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
1159
114
  assert(self->weights.graph == graph);
1160
114
  ccv_nnc_tensor_param_t bias_params = params;
1161
114
  if (self->format)
1162
0
    bias_params.format = self->format;
1163
114
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
1164
114
  bias_params.dim[0] = self->filters;
1165
114
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(self->groups, self->filters);
1166
342
  for (i = 0; i < k_nd; 
i++228
)
1167
228
    cmd.info.size.dim[i] = self->kdim[i];
1168
114
  cmd.info.size.dim[k_nd] = c;
1169
114
  memcpy(cmd.info.convolution.dilation, self->dilation, sizeof(self->dilation));
1170
114
  ccv_nnc_tensor_param_t output_params;
1171
  // Dilate weight size based on the dilation factor.
1172
342
  for (i = 0; i < k_nd; 
i++228
)
1173
228
    weights_params.dim[i + hw] = (self->kdim[i] - 1) * ccv_max(self->dilation[i], 1) + 1;
1174
114
  ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
1175
114
      params,
1176
114
      weights_params,
1177
114
      bias_params,
1178
114
    }, 3, self->hint, &output_params, 1);
1179
114
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1180
114
  ccv_nnc_graph_exec_symbol_t convolution;
1181
114
  if (self->no_bias)
1182
10
    convolution = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights), TENSOR_SYMBOL_LIST(output), "convolution");
1183
104
  else {
1184
104
    if (!self->bias.graph)
1185
100
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
1186
104
    convolution = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights, self->bias), TENSOR_SYMBOL_LIST(output), "convolution");
1187
104
  }
1188
114
  ccv_nnc_graph_exec_symbol_set_hint(graph, convolution, self->hint);
1189
114
  outputs[0] = output;
1190
114
}
1191
1192
static void _ccv_cnnp_convolution_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
1193
36
{
1194
36
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1195
36
  const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
1196
36
  const int n = ccv_max(ccv_nnc_tensor_get_n(weight_params), 1);
1197
36
  const int count = ccv_nnc_tensor_count(weight_params);
1198
36
  const float std = sqrtf(2) / sqrtf(count / n);
1199
36
  const float bound = sqrtf(3) * std;
1200
36
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
1201
36
  if (self->bias.graph)
1202
36
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
1203
36
}
1204
1205
static void _ccv_cnnp_convolution_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1206
114
{
1207
114
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1208
114
  add_to_array(parameters, self->weights, is_trainable);
1209
114
  if (self->bias.graph)
1210
104
    add_to_array(parameters, self->bias, is_trainable);
1211
114
}
1212
1213
static ccv_cnnp_model_t* _ccv_cnnp_convolution_copy(const ccv_cnnp_model_t* const super, void* const context);
1214
1215
static const ccv_cnnp_model_vtab_t ccv_cnnp_convolution_isa = {
1216
  .build = _ccv_cnnp_convolution_build,
1217
  .init_states = _ccv_cnnp_convolution_init_states,
1218
  .add_to_parameter = _ccv_cnnp_convolution_add_to_parameter,
1219
  .copy = _ccv_cnnp_convolution_copy,
1220
};
1221
1222
ccv_cnnp_model_t* ccv_cnnp_convolution(const int groups, const int filters, const int kdim[CCV_NNC_MAX_DIM_ALLOC], const int dilation[CCV_NNC_MAX_DIM_ALLOC], const int no_bias, ccv_nnc_hint_t hint, const int format, const int is_trainable, const char* const name)
1223
126
{
1224
126
  ccv_cnnp_model_convolution_t* const model_convolution = (ccv_cnnp_model_convolution_t*)cccalloc(1, sizeof(ccv_cnnp_model_convolution_t));
1225
126
  model_convolution->super.isa = &ccv_cnnp_convolution_isa;
1226
126
  model_convolution->super.input_size = 1;
1227
126
  model_convolution->super.outputs = &model_convolution->output;
1228
126
  model_convolution->super.output_size = 1;
1229
126
  model_convolution->super.is_trainable = is_trainable;
1230
126
  ccv_cnnp_model_copy_name(&model_convolution->super, name);
1231
126
  model_convolution->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
1232
126
  model_convolution->weights.graph = 0;
1233
126
  model_convolution->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1234
126
  model_convolution->bias.graph = 0;
1235
126
  model_convolution->groups = groups;
1236
126
  model_convolution->filters = filters;
1237
126
  memcpy(model_convolution->kdim, kdim, sizeof(model_convolution->kdim));
1238
126
  memcpy(model_convolution->dilation, dilation, sizeof(model_convolution->dilation));
1239
126
  model_convolution->no_bias = no_bias;
1240
126
  model_convolution->hint = hint;
1241
126
  model_convolution->format = format;
1242
126
  return (ccv_cnnp_model_t*)model_convolution;
1243
126
}
1244
1245
static ccv_cnnp_model_t* _ccv_cnnp_convolution_copy(const ccv_cnnp_model_t* const super, void* const context)
1246
16
{
1247
16
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1248
16
  return ccv_cnnp_convolution(self->groups, self->filters, self->kdim, self->dilation, self->no_bias, self->hint, self->format, self->super.is_trainable, self->super.name);
1249
16
}
1250
1251
// MARK - Convolution Transpose Layer
1252
1253
typedef struct {
1254
  ccv_cnnp_model_t super;
1255
  ccv_nnc_tensor_symbol_t output;
1256
  ccv_nnc_tensor_symbol_t weights;
1257
  ccv_nnc_tensor_symbol_t bias;
1258
  int groups;
1259
  int filters;
1260
  int kdim[CCV_NNC_MAX_DIM_ALLOC];
1261
  int dilation[CCV_NNC_MAX_DIM_ALLOC];
1262
  int output_padding;
1263
  int no_bias;
1264
  int format;
1265
  ccv_nnc_hint_t hint;
1266
} ccv_cnnp_model_convolution_transpose_t;
1267
1268
static void _ccv_cnnp_convolution_transpose_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1269
0
{
1270
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1271
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_convolution_transpose_build] -\n");
1272
0
  assert(input_size == 1);
1273
0
  assert(output_size == 1);
1274
0
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1275
0
  int i;
1276
0
  const int nd = CCV_NNC_MAX_DIM + 2;
1277
0
  ccv_nnc_tensor_param_t weights_params = params;
1278
0
  if (self->format)
1279
0
    weights_params.format = self->format;
1280
0
  const int c = ccv_nnc_tensor_get_c(params);
1281
0
  ccv_nnc_tensor_set_n(&weights_params, c);
1282
0
  assert(c % self->groups == 0);
1283
0
  ccv_nnc_tensor_set_c(&weights_params, nd, self->filters / self->groups);
1284
0
  const int hw = ccv_nnc_tensor_hw(weights_params, nd, CCV_NNC_MAX_DIM);
1285
0
  assert(hw >= 0);
1286
0
  for (i = 0; i < CCV_NNC_MAX_DIM; i++)
1287
0
    weights_params.dim[i + hw] = self->kdim[i];
1288
0
  if (!self->weights.graph)
1289
0
    self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
1290
0
  assert(self->weights.graph == graph);
1291
0
  ccv_nnc_tensor_param_t bias_params = params;
1292
0
  if (self->format)
1293
0
    bias_params.format = self->format;
1294
0
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
1295
0
  bias_params.dim[0] = self->filters;
1296
0
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(self->groups, self->filters, self->output_padding);
1297
0
  for (i = 0; i < CCV_NNC_MAX_DIM; i++)
1298
0
    cmd.info.size.dim[i] = self->kdim[i];
1299
0
  cmd.info.size.dim[CCV_NNC_MAX_DIM] = c;
1300
0
  memcpy(cmd.info.convolution_transpose.dilation, self->dilation, sizeof(self->dilation));
1301
0
  ccv_nnc_tensor_param_t output_params;
1302
  // Dilate weight size based on the dilation factor.
1303
0
  for (i = 0; i < CCV_NNC_MAX_DIM; i++)
1304
0
    weights_params.dim[i + hw] = (self->kdim[i] - 1) * ccv_max(self->dilation[i], 1) + 1;
1305
0
  ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
1306
0
      params,
1307
0
      weights_params,
1308
0
      bias_params,
1309
0
    }, 3, self->hint, &output_params, 1);
1310
0
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1311
0
  ccv_nnc_graph_exec_symbol_t convolution_transpose;
1312
0
  if (self->no_bias)
1313
0
    convolution_transpose = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights), TENSOR_SYMBOL_LIST(output), "convolution_transpose");
1314
0
  else {
1315
0
    if (!self->bias.graph)
1316
0
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
1317
0
    convolution_transpose = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights, self->bias), TENSOR_SYMBOL_LIST(output), "convolution_transpose");
1318
0
  }
1319
0
  ccv_nnc_graph_exec_symbol_set_hint(graph, convolution_transpose, self->hint);
1320
0
  outputs[0] = output;
1321
0
}
1322
1323
static void _ccv_cnnp_convolution_transpose_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
1324
0
{
1325
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1326
0
  const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
1327
0
  const int n = ccv_max(ccv_nnc_tensor_get_n(weight_params), 1);
1328
0
  const int count = ccv_nnc_tensor_count(weight_params);
1329
0
  const float std = sqrtf(2) / sqrtf(count / n);
1330
0
  const float bound = sqrtf(3) * std;
1331
0
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
1332
0
  if (self->bias.graph)
1333
0
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
1334
0
}
1335
1336
static void _ccv_cnnp_convolution_transpose_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1337
0
{
1338
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1339
0
  add_to_array(parameters, self->weights, is_trainable);
1340
0
  if (self->bias.graph)
1341
0
    add_to_array(parameters, self->bias, is_trainable);
1342
0
}
1343
1344
static ccv_cnnp_model_t* _ccv_cnnp_convolution_transpose_copy(const ccv_cnnp_model_t* const super, void* const context);
1345
1346
static const ccv_cnnp_model_vtab_t ccv_cnnp_convolution_transpose_isa = {
1347
  .build = _ccv_cnnp_convolution_transpose_build,
1348
  .init_states = _ccv_cnnp_convolution_transpose_init_states,
1349
  .add_to_parameter = _ccv_cnnp_convolution_transpose_add_to_parameter,
1350
  .copy = _ccv_cnnp_convolution_transpose_copy,
1351
};
1352
1353
ccv_cnnp_model_t* ccv_cnnp_convolution_transpose(const int groups, const int filters, const int kdim[CCV_NNC_MAX_DIM_ALLOC], const int dilation[CCV_NNC_MAX_DIM_ALLOC], const int output_padding, const int no_bias, ccv_nnc_hint_t hint, const int format, const int is_trainable, const char* const name)
1354
0
{
1355
0
  ccv_cnnp_model_convolution_transpose_t* const model_convolution_transpose = (ccv_cnnp_model_convolution_transpose_t*)cccalloc(1, sizeof(ccv_cnnp_model_convolution_transpose_t));
1356
0
  model_convolution_transpose->super.isa = &ccv_cnnp_convolution_transpose_isa;
1357
0
  model_convolution_transpose->super.input_size = 1;
1358
0
  model_convolution_transpose->super.outputs = &model_convolution_transpose->output;
1359
0
  model_convolution_transpose->super.output_size = 1;
1360
0
  model_convolution_transpose->super.is_trainable = is_trainable;
1361
0
  ccv_cnnp_model_copy_name(&model_convolution_transpose->super, name);
1362
0
  model_convolution_transpose->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
1363
0
  model_convolution_transpose->weights.graph = 0;
1364
0
  model_convolution_transpose->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1365
0
  model_convolution_transpose->bias.graph = 0;
1366
0
  model_convolution_transpose->groups = groups;
1367
0
  model_convolution_transpose->filters = filters;
1368
0
  memcpy(model_convolution_transpose->kdim, kdim, sizeof(model_convolution_transpose->kdim));
1369
0
  memcpy(model_convolution_transpose->dilation, dilation, sizeof(model_convolution_transpose->dilation));
1370
0
  model_convolution_transpose->output_padding = output_padding;
1371
0
  model_convolution_transpose->no_bias = no_bias;
1372
0
  model_convolution_transpose->hint = hint;
1373
0
  model_convolution_transpose->format = format;
1374
0
  return (ccv_cnnp_model_t*)model_convolution_transpose;
1375
0
}
1376
1377
static ccv_cnnp_model_t* _ccv_cnnp_convolution_transpose_copy(const ccv_cnnp_model_t* const super, void* const context)
1378
0
{
1379
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1380
0
  return ccv_cnnp_convolution_transpose(self->groups, self->filters, self->kdim, self->dilation, self->output_padding, self->no_bias, self->hint, self->format, self->super.is_trainable, self->super.name);
1381
0
}
1382
1383
// MARK - Dense Layer
1384
1385
typedef struct {
1386
  ccv_cnnp_model_t super;
1387
  ccv_nnc_tensor_symbol_t output;
1388
  ccv_nnc_tensor_symbol_t weights;
1389
  ccv_nnc_tensor_symbol_t bias;
1390
  int count;
1391
  int no_bias;
1392
  int flags;
1393
} ccv_cnnp_model_dense_t;
1394
1395
static void _ccv_cnnp_dense_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1396
2.33k
{
1397
2.33k
  ccv_cnnp_model_dense_t* const self = (ccv_cnnp_model_dense_t*)super;
1398
2.33k
  PRINT(CCV_CLI_VERBOSE, "[cnnp_dense_build] -\n");
1399
2.33k
  assert(input_size == 1);
1400
2.33k
  assert(output_size == 1);
1401
2.33k
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1402
2.33k
  ccv_nnc_tensor_param_t weights_params = params;
1403
2.33k
  memset(weights_params.dim, 0, sizeof(weights_params.dim));
1404
2.33k
  weights_params.dim[0] = self->count;
1405
2.33k
  weights_params.dim[1] = params.dim[ccv_nnc_tensor_nd(params.dim) - 1];
1406
2.33k
  if (!self->weights.graph)
1407
2.31k
    self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
1408
2.33k
  assert(self->weights.graph == graph);
1409
2.33k
  ccv_nnc_tensor_param_t bias_params = params;
1410
2.33k
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
1411
2.33k
  bias_params.dim[0] = self->count;
1412
2.33k
  ccv_nnc_cmd_t cmd = {0};
1413
2.33k
  cmd.cmd = CCV_NNC_GEMM_FORWARD;
1414
2.33k
  cmd.info.blas.a[0] = 1;
1415
2.33k
  cmd.info.blas.a[1] = 1;
1416
2.33k
  cmd.info.blas.transpose_b[0] = 0;
1417
2.33k
  cmd.info.blas.transpose_b[1] = 1;
1418
2.33k
  cmd.info.blas.flags = self->flags;
1419
2.33k
  ccv_nnc_tensor_param_t output_params;
1420
2.33k
  ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
1421
2.33k
      params,
1422
2.33k
      weights_params,
1423
2.33k
      bias_params,
1424
2.33k
    }, 3, ccv_nnc_no_hint, &output_params, 1);
1425
2.33k
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1426
2.33k
  if (self->no_bias)
1427
2.08k
    ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights), TENSOR_SYMBOL_LIST(output), "dense");
1428
246
  else {
1429
246
    if (!self->bias.graph)
1430
243
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
1431
246
    ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights, self->bias), TENSOR_SYMBOL_LIST(output), "dense");
1432
246
  }
1433
2.33k
  outputs[0] = output;
1434
2.33k
}
1435
1436
static void _ccv_cnnp_dense_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
1437
79
{
1438
79
  ccv_cnnp_model_dense_t* const self = (ccv_cnnp_model_dense_t*)super;
1439
79
  const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
1440
79
  const int c = weight_params.dim[1];
1441
79
  const float std = sqrtf(2) / sqrtf(c);
1442
79
  const float bound = sqrtf(3) * std;
1443
79
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
1444
79
  if (self->bias.graph)
1445
33
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
1446
79
}
1447
1448
static void _ccv_cnnp_dense_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1449
2.33k
{
1450
2.33k
  ccv_cnnp_model_dense_t* const self = (ccv_cnnp_model_dense_t*)super;
1451
2.33k
  add_to_array(parameters, self->weights, is_trainable);
1452
2.33k
  if (self->bias.graph)
1453
246
    add_to_array(parameters, self->bias, is_trainable);
1454
2.33k
}
1455
1456
static ccv_cnnp_model_t* _ccv_cnnp_dense_copy(const ccv_cnnp_model_t* const super, void* const context);
1457
1458
static const ccv_cnnp_model_vtab_t ccv_cnnp_dense_isa = {
1459
  .build = _ccv_cnnp_dense_build,
1460
  .init_states = _ccv_cnnp_dense_init_states,
1461
  .add_to_parameter = _ccv_cnnp_dense_add_to_parameter,
1462
  .copy = _ccv_cnnp_dense_copy,
1463
};
1464
1465
ccv_cnnp_model_t* ccv_cnnp_dense(const int count, const int no_bias, const int flags, const int is_trainable, const char* const name)
1466
2.31k
{
1467
2.31k
  ccv_cnnp_model_dense_t* const model_dense = (ccv_cnnp_model_dense_t*)cccalloc(1, sizeof(ccv_cnnp_model_dense_t));
1468
2.31k
  model_dense->super.isa = &ccv_cnnp_dense_isa;
1469
2.31k
  model_dense->super.input_size = 1;
1470
2.31k
  model_dense->super.outputs = &model_dense->output;
1471
2.31k
  model_dense->super.output_size = 1;
1472
2.31k
  model_dense->super.is_trainable = is_trainable;
1473
2.31k
  ccv_cnnp_model_copy_name(&model_dense->super, name);
1474
2.31k
  model_dense->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
1475
2.31k
  model_dense->weights.graph = 0;
1476
2.31k
  model_dense->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1477
2.31k
  model_dense->bias.graph = 0;
1478
2.31k
  model_dense->count = count;
1479
2.31k
  model_dense->no_bias = no_bias;
1480
2.31k
  model_dense->flags = flags;
1481
2.31k
  return (ccv_cnnp_model_t*)model_dense;
1482
2.31k
}
1483
1484
static ccv_cnnp_model_t* _ccv_cnnp_dense_copy(const ccv_cnnp_model_t* const super, void* const context)
1485
2.20k
{
1486
2.20k
  const ccv_cnnp_model_dense_t* const self = (const ccv_cnnp_model_dense_t*)super;
1487
2.20k
  return ccv_cnnp_dense(self->count, self->no_bias, self->flags, self->super.is_trainable, self->super.name);
1488
2.20k
}
1489
1490
// MARK - Pool Layers
1491
1492
typedef struct {
1493
  ccv_cnnp_model_t super;
1494
  ccv_nnc_tensor_symbol_t output;
1495
  int kdim[CCV_NNC_MAX_DIM_ALLOC];
1496
  ccv_nnc_hint_t hint;
1497
} ccv_cnnp_model_pool_t;
1498
1499
static void _ccv_cnnp_max_pool_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1500
18
{
1501
18
  ccv_cnnp_model_pool_t* const self = (ccv_cnnp_model_pool_t*)super;
1502
18
  PRINT(CCV_CLI_VERBOSE, "[cnnp_max_pool_build] -\n");
1503
18
  assert(input_size == 1);
1504
18
  assert(output_size == 1);
1505
18
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1506
18
  const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim), CCV_NNC_MAX_DIM);
1507
18
  ccv_nnc_cmd_t cmd;
1508
18
  if (hw >= 0 && self->kdim[0] == 0 && 
self->kdim[1] == 03
)
1509
3
    cmd = CMD_MAX_POOL_FORWARD(params.dim[hw], params.dim[hw + 1]);
1510
15
  else
1511
15
    cmd = CMD_MAX_POOL_FORWARD(self->kdim[0], self->kdim[1]);
1512
18
  ccv_nnc_tensor_param_t output_params;
1513
18
  ccv_nnc_hint_tensor_auto(cmd, &params, 1, self->hint, &output_params, 1);
1514
18
  const ccv_nnc_tensor_symbol_t pool_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1515
18
  const ccv_nnc_graph_exec_symbol_t exec = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(pool_output), "max_pool");
1516
18
  ccv_nnc_graph_exec_symbol_set_hint(graph, exec, self->hint);
1517
18
  outputs[0] = pool_output;
1518
18
}
1519
1520
static ccv_cnnp_model_t* _ccv_cnnp_max_pool_copy(const ccv_cnnp_model_t* const super, void* const context);
1521
1522
static const ccv_cnnp_model_vtab_t ccv_cnnp_max_pool_isa = {
1523
  .build = _ccv_cnnp_max_pool_build,
1524
  .copy = _ccv_cnnp_max_pool_copy,
1525
};
1526
1527
ccv_cnnp_model_t* ccv_cnnp_max_pool(const int kdim[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_hint_t hint, const char* const name)
1528
24
{
1529
24
  ccv_cnnp_model_pool_t* const model_pool = (ccv_cnnp_model_pool_t*)cccalloc(1, sizeof(ccv_cnnp_model_pool_t));
1530
24
  model_pool->super.isa = &ccv_cnnp_max_pool_isa;
1531
24
  model_pool->super.input_size = 1;
1532
24
  model_pool->super.outputs = &model_pool->output;
1533
24
  model_pool->super.output_size = 1;
1534
24
  ccv_cnnp_model_copy_name(&model_pool->super, name);
1535
24
  memcpy(model_pool->kdim, kdim, sizeof(model_pool->kdim));
1536
24
  model_pool->hint = hint;
1537
24
  return (ccv_cnnp_model_t*)model_pool;
1538
24
}
1539
1540
static ccv_cnnp_model_t* _ccv_cnnp_max_pool_copy(const ccv_cnnp_model_t* const super, void* const context)
1541
6
{
1542
6
  const ccv_cnnp_model_pool_t* const self = (const ccv_cnnp_model_pool_t*)super;
1543
6
  return ccv_cnnp_max_pool(self->kdim, self->hint, self->super.name);
1544
6
}
1545
1546
static void _ccv_cnnp_average_pool_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1547
15
{
1548
15
  ccv_cnnp_model_pool_t* const self = (ccv_cnnp_model_pool_t*)super;
1549
15
  PRINT(CCV_CLI_VERBOSE, "[cnnp_average_pool_build] -\n");
1550
15
  assert(input_size == 1);
1551
15
  assert(output_size == 1);
1552
15
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1553
15
  const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim), CCV_NNC_MAX_DIM);
1554
15
  ccv_nnc_cmd_t cmd;
1555
15
  if (hw >= 0 && self->kdim[0] == 0 && 
self->kdim[1] == 02
)
1556
2
    cmd = CMD_AVERAGE_POOL_FORWARD(params.dim[hw], params.dim[hw + 1]);
1557
13
  else
1558
13
    cmd = CMD_AVERAGE_POOL_FORWARD(self->kdim[0], self->kdim[1]);
1559
15
  ccv_nnc_tensor_param_t output_params;
1560
15
  ccv_nnc_hint_tensor_auto(cmd, &params, 1, self->hint, &output_params, 1);
1561
15
  const ccv_nnc_tensor_symbol_t pool_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1562
15
  const ccv_nnc_graph_exec_symbol_t exec = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(pool_output), "average_pool");
1563
15
  ccv_nnc_graph_exec_symbol_set_hint(graph, exec, self->hint);
1564
15
  outputs[0] = pool_output;
1565
15
}
1566
1567
static ccv_cnnp_model_t* _ccv_cnnp_average_pool_copy(const ccv_cnnp_model_t* const super, void* const context);
1568
1569
static const ccv_cnnp_model_vtab_t ccv_cnnp_average_pool_isa = {
1570
  .build = _ccv_cnnp_average_pool_build,
1571
  .copy = _ccv_cnnp_average_pool_copy,
1572
};
1573
1574
ccv_cnnp_model_t* ccv_cnnp_average_pool(const int kdim[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_hint_t hint, const char* const name)
1575
17
{
1576
17
  ccv_cnnp_model_pool_t* const model_pool = (ccv_cnnp_model_pool_t*)cccalloc(1, sizeof(ccv_cnnp_model_pool_t));
1577
17
  model_pool->super.isa = &ccv_cnnp_average_pool_isa;
1578
17
  model_pool->super.input_size = 1;
1579
17
  model_pool->super.outputs = &model_pool->output;
1580
17
  model_pool->super.output_size = 1;
1581
17
  ccv_cnnp_model_copy_name(&model_pool->super, name);
1582
17
  memcpy(model_pool->kdim, kdim, sizeof(model_pool->kdim));
1583
17
  model_pool->hint = hint;
1584
17
  return (ccv_cnnp_model_t*)model_pool;
1585
17
}
1586
1587
static ccv_cnnp_model_t* _ccv_cnnp_average_pool_copy(const ccv_cnnp_model_t* const super, void* const context)
1588
2
{
1589
2
  const ccv_cnnp_model_pool_t* const self = (const ccv_cnnp_model_pool_t*)super;
1590
2
  return ccv_cnnp_average_pool(self->kdim, self->hint, self->super.name);
1591
2
}
1592
1593
// MARK - RELU Layer
1594
1595
typedef struct {
1596
  ccv_cnnp_model_t super;
1597
  ccv_nnc_tensor_symbol_t output;
1598
} ccv_cnnp_model_relu_t;
1599
1600
static void _ccv_cnnp_relu_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1601
103
{
1602
103
  PRINT(CCV_CLI_VERBOSE, "[cnnp_relu_build] -\n");
1603
103
  assert(input_size == 1);
1604
103
  assert(output_size == 1);
1605
103
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1606
103
  ccv_nnc_tensor_param_t output_params;
1607
103
  const ccv_nnc_cmd_t relu = CMD_RELU_FORWARD();
1608
103
  ccv_nnc_hint_tensor_auto(relu, (ccv_nnc_tensor_param_t []){
1609
103
      params,
1610
103
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1611
103
  const ccv_nnc_tensor_symbol_t relu_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1612
103
  ccv_nnc_graph_exec_symbol_new(graph, relu, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(relu_output), "relu");
1613
103
  outputs[0] = relu_output;
1614
103
}
1615
1616
static ccv_cnnp_model_t* _ccv_cnnp_relu_copy(const ccv_cnnp_model_t* const self, void* const context);
1617
1618
static const ccv_cnnp_model_vtab_t ccv_cnnp_relu_isa = {
1619
  .build = _ccv_cnnp_relu_build,
1620
  .copy = _ccv_cnnp_relu_copy,
1621
};
1622
1623
ccv_cnnp_model_t* ccv_cnnp_relu(const char* const name)
1624
120
{
1625
120
  ccv_cnnp_model_relu_t* const model_relu = (ccv_cnnp_model_relu_t*)cccalloc(1, sizeof(ccv_cnnp_model_relu_t));
1626
120
  model_relu->super.isa = &ccv_cnnp_relu_isa;
1627
120
  model_relu->super.input_size = 1;
1628
120
  model_relu->super.outputs = &model_relu->output;
1629
120
  model_relu->super.output_size = 1;
1630
120
  ccv_cnnp_model_copy_name(&model_relu->super, name);
1631
120
  return (ccv_cnnp_model_t*)model_relu;
1632
120
}
1633
1634
static ccv_cnnp_model_t* _ccv_cnnp_relu_copy(const ccv_cnnp_model_t* const self, void* const context)
1635
17
{
1636
17
  return ccv_cnnp_relu(self->name);
1637
17
}
1638
1639
// MARK - Sigmoid Layer
1640
1641
typedef struct {
1642
  ccv_cnnp_model_t super;
1643
  ccv_nnc_tensor_symbol_t output;
1644
} ccv_cnnp_model_sigmoid_t;
1645
1646
static void _ccv_cnnp_sigmoid_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1647
5
{
1648
5
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sigmoid_build] -\n");
1649
5
  assert(input_size == 1);
1650
5
  assert(output_size == 1);
1651
5
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1652
5
  ccv_nnc_tensor_param_t output_params;
1653
5
  const ccv_nnc_cmd_t sigmoid = CMD_SIGMOID_FORWARD();
1654
5
  ccv_nnc_hint_tensor_auto(sigmoid, (ccv_nnc_tensor_param_t []){
1655
5
      params,
1656
5
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1657
5
  const ccv_nnc_tensor_symbol_t sigmoid_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1658
5
  ccv_nnc_graph_exec_symbol_new(graph, sigmoid, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(sigmoid_output), "sigmoid");
1659
5
  outputs[0] = sigmoid_output;
1660
5
}
1661
1662
static ccv_cnnp_model_t* _ccv_cnnp_sigmoid_copy(const ccv_cnnp_model_t* const self, void* const context);
1663
1664
static const ccv_cnnp_model_vtab_t ccv_cnnp_sigmoid_isa = {
1665
  .build = _ccv_cnnp_sigmoid_build,
1666
  .copy = _ccv_cnnp_sigmoid_copy,
1667
};
1668
1669
ccv_cnnp_model_t* ccv_cnnp_sigmoid(const char* const name)
1670
5
{
1671
5
  ccv_cnnp_model_sigmoid_t* const model_sigmoid = (ccv_cnnp_model_sigmoid_t*)cccalloc(1, sizeof(ccv_cnnp_model_sigmoid_t));
1672
5
  model_sigmoid->super.isa = &ccv_cnnp_sigmoid_isa;
1673
5
  model_sigmoid->super.input_size = 1;
1674
5
  model_sigmoid->super.outputs = &model_sigmoid->output;
1675
5
  model_sigmoid->super.output_size = 1;
1676
5
  ccv_cnnp_model_copy_name(&model_sigmoid->super, name);
1677
5
  return (ccv_cnnp_model_t*)model_sigmoid;
1678
5
}
1679
1680
static ccv_cnnp_model_t* _ccv_cnnp_sigmoid_copy(const ccv_cnnp_model_t* const self, void* const context)
1681
0
{
1682
0
  return ccv_cnnp_sigmoid(self->name);
1683
0
}
1684
1685
// MARK - Tanh Layer
1686
1687
typedef struct {
1688
  ccv_cnnp_model_t super;
1689
  ccv_nnc_tensor_symbol_t output;
1690
} ccv_cnnp_model_tanh_t;
1691
1692
static void _ccv_cnnp_tanh_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1693
0
{
1694
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_tanh_build] -\n");
1695
0
  assert(input_size == 1);
1696
0
  assert(output_size == 1);
1697
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1698
0
  ccv_nnc_tensor_param_t output_params;
1699
0
  const ccv_nnc_cmd_t tanh = CMD_TANH_FORWARD();
1700
0
  ccv_nnc_hint_tensor_auto(tanh, (ccv_nnc_tensor_param_t []){
1701
0
      params,
1702
0
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1703
0
  const ccv_nnc_tensor_symbol_t tanh_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1704
0
  ccv_nnc_graph_exec_symbol_new(graph, tanh, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(tanh_output), "tanh");
1705
0
  outputs[0] = tanh_output;
1706
0
}
1707
1708
static ccv_cnnp_model_t* _ccv_cnnp_tanh_copy(const ccv_cnnp_model_t* const self, void* const context);
1709
1710
static const ccv_cnnp_model_vtab_t ccv_cnnp_tanh_isa = {
1711
  .build = _ccv_cnnp_tanh_build,
1712
  .copy = _ccv_cnnp_tanh_copy,
1713
};
1714
1715
ccv_cnnp_model_t* ccv_cnnp_tanh(const char* const name)
1716
0
{
1717
0
  ccv_cnnp_model_tanh_t* const model_tanh = (ccv_cnnp_model_tanh_t*)cccalloc(1, sizeof(ccv_cnnp_model_tanh_t));
1718
0
  model_tanh->super.isa = &ccv_cnnp_tanh_isa;
1719
0
  model_tanh->super.input_size = 1;
1720
0
  model_tanh->super.outputs = &model_tanh->output;
1721
0
  model_tanh->super.output_size = 1;
1722
0
  ccv_cnnp_model_copy_name(&model_tanh->super, name);
1723
0
  return (ccv_cnnp_model_t*)model_tanh;
1724
0
}
1725
1726
static ccv_cnnp_model_t* _ccv_cnnp_tanh_copy(const ccv_cnnp_model_t* const self, void* const context)
1727
0
{
1728
0
  return ccv_cnnp_tanh(self->name);
1729
0
}
1730
1731
// MARK - Swish Layer
1732
1733
typedef struct {
1734
  ccv_cnnp_model_t super;
1735
  ccv_nnc_tensor_symbol_t output;
1736
  float beta;
1737
} ccv_cnnp_model_swish_t;
1738
1739
static void _ccv_cnnp_swish_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1740
0
{
1741
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_swish_build] -\n");
1742
0
  assert(input_size == 1);
1743
0
  assert(output_size == 1);
1744
0
  ccv_cnnp_model_swish_t* const self = (ccv_cnnp_model_swish_t*)super;
1745
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1746
0
  ccv_nnc_tensor_param_t output_params;
1747
0
  const ccv_nnc_cmd_t swish = CMD_SWISH_FORWARD(self->beta);
1748
0
  ccv_nnc_hint_tensor_auto(swish, (ccv_nnc_tensor_param_t []){
1749
0
      params,
1750
0
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1751
0
  const ccv_nnc_tensor_symbol_t swish_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1752
0
  ccv_nnc_graph_exec_symbol_new(graph, swish, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(swish_output), "swish");
1753
0
  outputs[0] = swish_output;
1754
0
}
1755
1756
static ccv_cnnp_model_t* _ccv_cnnp_swish_copy(const ccv_cnnp_model_t* const self, void* const context);
1757
1758
static const ccv_cnnp_model_vtab_t ccv_cnnp_swish_isa = {
1759
  .build = _ccv_cnnp_swish_build,
1760
  .copy = _ccv_cnnp_swish_copy,
1761
};
1762
1763
ccv_cnnp_model_t* ccv_cnnp_swish(const float beta, const char* const name)
1764
0
{
1765
0
  ccv_cnnp_model_swish_t* const model_swish = (ccv_cnnp_model_swish_t*)cccalloc(1, sizeof(ccv_cnnp_model_swish_t));
1766
0
  model_swish->super.isa = &ccv_cnnp_swish_isa;
1767
0
  model_swish->super.input_size = 1;
1768
0
  model_swish->super.outputs = &model_swish->output;
1769
0
  model_swish->super.output_size = 1;
1770
0
  model_swish->beta = beta;
1771
0
  ccv_cnnp_model_copy_name(&model_swish->super, name);
1772
0
  return (ccv_cnnp_model_t*)model_swish;
1773
0
}
1774
1775
static ccv_cnnp_model_t* _ccv_cnnp_swish_copy(const ccv_cnnp_model_t* const self, void* const context)
1776
0
{
1777
0
  const ccv_cnnp_model_swish_t* const swish = (const ccv_cnnp_model_swish_t*)self;
1778
0
  return ccv_cnnp_swish(swish->beta, self->name);
1779
0
}
1780
1781
// MARK - GELU Layer
1782
1783
typedef struct {
1784
  ccv_cnnp_model_t super;
1785
  ccv_nnc_tensor_symbol_t output;
1786
  int tanh;
1787
} ccv_cnnp_model_gelu_t;
1788
1789
static void _ccv_cnnp_gelu_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1790
2
{
1791
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_gelu_build] -\n");
1792
2
  assert(input_size == 1);
1793
2
  assert(output_size == 1);
1794
2
  ccv_cnnp_model_gelu_t* const self = (ccv_cnnp_model_gelu_t*)super;
1795
2
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1796
2
  ccv_nnc_tensor_param_t output_params;
1797
2
  const ccv_nnc_cmd_t gelu = CMD_GELU_FORWARD(self->tanh);
1798
2
  ccv_nnc_hint_tensor_auto(gelu, (ccv_nnc_tensor_param_t []){
1799
2
      params,
1800
2
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1801
2
  const ccv_nnc_tensor_symbol_t gelu_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1802
2
  ccv_nnc_graph_exec_symbol_new(graph, gelu, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(gelu_output), "gelu");
1803
2
  outputs[0] = gelu_output;
1804
2
}
1805
1806
static ccv_cnnp_model_t* _ccv_cnnp_gelu_copy(const ccv_cnnp_model_t* const self, void* const context);
1807
1808
static const ccv_cnnp_model_vtab_t ccv_cnnp_gelu_isa = {
1809
  .build = _ccv_cnnp_gelu_build,
1810
  .copy = _ccv_cnnp_gelu_copy,
1811
};
1812
1813
ccv_cnnp_model_t* ccv_cnnp_gelu(const int tanh, const char* const name)
1814
1
{
1815
1
  ccv_cnnp_model_gelu_t* const model_gelu = (ccv_cnnp_model_gelu_t*)cccalloc(1, sizeof(ccv_cnnp_model_gelu_t));
1816
1
  model_gelu->super.isa = &ccv_cnnp_gelu_isa;
1817
1
  model_gelu->super.input_size = 1;
1818
1
  model_gelu->super.outputs = &model_gelu->output;
1819
1
  model_gelu->super.output_size = 1;
1820
1
  model_gelu->tanh = tanh;
1821
1
  ccv_cnnp_model_copy_name(&model_gelu->super, name);
1822
1
  return (ccv_cnnp_model_t*)model_gelu;
1823
1
}
1824
1825
static ccv_cnnp_model_t* _ccv_cnnp_gelu_copy(const ccv_cnnp_model_t* const super, void* const context)
1826
0
{
1827
0
  ccv_cnnp_model_gelu_t* const self = (ccv_cnnp_model_gelu_t*)super;
1828
0
  return ccv_cnnp_gelu(self->tanh, self->super.name);
1829
0
}
1830
1831
// MARK - Leaky ReLU Layer
1832
1833
typedef struct {
1834
  ccv_cnnp_model_t super;
1835
  ccv_nnc_tensor_symbol_t output;
1836
  float negative_slope;
1837
} ccv_cnnp_model_leaky_relu_t;
1838
1839
static void _ccv_cnnp_leaky_relu_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1840
0
{
1841
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_leaky_relu_build] -\n");
1842
0
  assert(input_size == 1);
1843
0
  assert(output_size == 1);
1844
0
  ccv_cnnp_model_leaky_relu_t* const self = (ccv_cnnp_model_leaky_relu_t*)super;
1845
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1846
0
  ccv_nnc_tensor_param_t output_params;
1847
0
  const ccv_nnc_cmd_t leaky_relu = CMD_LEAKY_RELU_FORWARD(self->negative_slope);
1848
0
  ccv_nnc_hint_tensor_auto(leaky_relu, (ccv_nnc_tensor_param_t []){
1849
0
      params,
1850
0
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1851
0
  const ccv_nnc_tensor_symbol_t leaky_relu_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1852
0
  ccv_nnc_graph_exec_symbol_new(graph, leaky_relu, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(leaky_relu_output), "leaky_relu");
1853
0
  outputs[0] = leaky_relu_output;
1854
0
}
1855
1856
static ccv_cnnp_model_t* _ccv_cnnp_leaky_relu_copy(const ccv_cnnp_model_t* const self, void* const context);
1857
1858
static const ccv_cnnp_model_vtab_t ccv_cnnp_leaky_relu_isa = {
1859
  .build = _ccv_cnnp_leaky_relu_build,
1860
  .copy = _ccv_cnnp_leaky_relu_copy,
1861
};
1862
1863
ccv_cnnp_model_t* ccv_cnnp_leaky_relu(const float negative_slope, const char* const name)
1864
0
{
1865
0
  ccv_cnnp_model_leaky_relu_t* const model_leaky_relu = (ccv_cnnp_model_leaky_relu_t*)cccalloc(1, sizeof(ccv_cnnp_model_leaky_relu_t));
1866
0
  model_leaky_relu->super.isa = &ccv_cnnp_leaky_relu_isa;
1867
0
  model_leaky_relu->super.input_size = 1;
1868
0
  model_leaky_relu->super.outputs = &model_leaky_relu->output;
1869
0
  model_leaky_relu->super.output_size = 1;
1870
0
  model_leaky_relu->negative_slope = negative_slope;
1871
0
  ccv_cnnp_model_copy_name(&model_leaky_relu->super, name);
1872
0
  return (ccv_cnnp_model_t*)model_leaky_relu;
1873
0
}
1874
1875
static ccv_cnnp_model_t* _ccv_cnnp_leaky_relu_copy(const ccv_cnnp_model_t* const super, void* const context)
1876
0
{
1877
0
  ccv_cnnp_model_leaky_relu_t* const self = (ccv_cnnp_model_leaky_relu_t*)super;
1878
0
  return ccv_cnnp_leaky_relu(self->negative_slope, self->super.name);
1879
0
}
1880
1881
// MARK - Softmax Layer
1882
1883
typedef struct {
1884
  ccv_cnnp_model_t super;
1885
  ccv_nnc_tensor_symbol_t output;
1886
} ccv_cnnp_model_softmax_t;
1887
1888
static void _ccv_cnnp_softmax_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1889
8
{
1890
8
  PRINT(CCV_CLI_VERBOSE, "[cnnp_softmax_build] -\n");
1891
8
  assert(input_size == 1);
1892
8
  assert(output_size == 1);
1893
8
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1894
8
  ccv_nnc_tensor_param_t output_params;
1895
8
  const ccv_nnc_cmd_t softmax = CMD_SOFTMAX_FORWARD();
1896
8
  ccv_nnc_hint_tensor_auto(softmax, (ccv_nnc_tensor_param_t []){
1897
8
      params,
1898
8
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1899
8
  const ccv_nnc_tensor_symbol_t softmax_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1900
8
  ccv_nnc_graph_exec_symbol_new(graph, softmax, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(softmax_output), "softmax");
1901
8
  outputs[0] = softmax_output;
1902
8
}
1903
1904
static ccv_cnnp_model_t* _ccv_cnnp_softmax_copy(const ccv_cnnp_model_t* const self, void* const context);
1905
1906
static const ccv_cnnp_model_vtab_t ccv_cnnp_softmax_isa = {
1907
  .build = _ccv_cnnp_softmax_build,
1908
  .copy = _ccv_cnnp_softmax_copy,
1909
};
1910
1911
ccv_cnnp_model_t* ccv_cnnp_softmax(const char* const name)
1912
9
{
1913
9
  ccv_cnnp_model_softmax_t* const model_softmax = (ccv_cnnp_model_softmax_t*)cccalloc(1, sizeof(ccv_cnnp_model_softmax_t));
1914
9
  model_softmax->super.isa = &ccv_cnnp_softmax_isa;
1915
9
  model_softmax->super.input_size = 1;
1916
9
  model_softmax->super.outputs = &model_softmax->output;
1917
9
  model_softmax->super.output_size = 1;
1918
9
  ccv_cnnp_model_copy_name(&model_softmax->super, name);
1919
9
  return (ccv_cnnp_model_t*)model_softmax;
1920
9
}
1921
1922
static ccv_cnnp_model_t* _ccv_cnnp_softmax_copy(const ccv_cnnp_model_t* const self, void* const context)
1923
1
{
1924
1
  return ccv_cnnp_softmax(self->name);
1925
1
}
1926
1927
// MARK - Add Layer
1928
1929
typedef struct {
1930
  ccv_cnnp_model_t super;
1931
  float p;
1932
  float q;
1933
  ccv_nnc_tensor_symbol_t output;
1934
} ccv_cnnp_model_add_t;
1935
1936
static void _ccv_cnnp_add_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1937
0
{
1938
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_add_build] -\n");
1939
0
  const ccv_cnnp_model_add_t* const self = (const ccv_cnnp_model_add_t*)super;
1940
0
  assert(input_size == 2);
1941
0
  assert(output_size == 1);
1942
0
  ccv_nnc_tensor_param_t input_params[2];
1943
0
  int i;
1944
0
  for (i = 0; i < 2; i++)
1945
0
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
1946
0
  ccv_nnc_tensor_param_t output_params;
1947
0
  const ccv_nnc_cmd_t add = CMD_ADD_FORWARD(self->p, self->q);
1948
0
  ccv_nnc_hint_tensor_auto(add, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
1949
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1950
0
  ccv_nnc_graph_exec_symbol_new(graph, add, inputs, input_size, outputs, output_size, "add");
1951
0
}
1952
1953
static ccv_cnnp_model_t* _ccv_cnnp_add_copy(const ccv_cnnp_model_t* const self, void* const context);
1954
1955
static const ccv_cnnp_model_vtab_t ccv_cnnp_add_isa = {
1956
  .build = _ccv_cnnp_add_build,
1957
  .copy = _ccv_cnnp_add_copy,
1958
};
1959
1960
ccv_cnnp_model_t* ccv_cnnp_add(const float p, const float q, const char* const name)
1961
0
{
1962
0
  ccv_cnnp_model_add_t* const model_add = (ccv_cnnp_model_add_t*)cccalloc(1, sizeof(ccv_cnnp_model_add_t));
1963
0
  model_add->super.isa = &ccv_cnnp_add_isa;
1964
0
  model_add->super.input_size = 2;
1965
0
  model_add->super.outputs = &model_add->output;
1966
0
  model_add->super.output_size = 1;
1967
0
  model_add->p = p;
1968
0
  model_add->q = q;
1969
0
  ccv_cnnp_model_copy_name(&model_add->super, name);
1970
0
  return (ccv_cnnp_model_t*)model_add;
1971
0
}
1972
1973
static ccv_cnnp_model_t* _ccv_cnnp_add_copy(const ccv_cnnp_model_t* const super, void* const context)
1974
0
{
1975
0
  const ccv_cnnp_model_add_t* const self = (const ccv_cnnp_model_add_t*)super;
1976
0
  return ccv_cnnp_add(self->p, self->q, self->super.name);
1977
0
}
1978
1979
// MARK - Mul Layer
1980
1981
typedef struct {
1982
  ccv_cnnp_model_t super;
1983
  ccv_nnc_tensor_symbol_t output;
1984
  float p;
1985
} ccv_cnnp_model_mul_t;
1986
1987
static void _ccv_cnnp_mul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1988
6
{
1989
6
  PRINT(CCV_CLI_VERBOSE, "[cnnp_mul_build] -\n");
1990
6
  const ccv_cnnp_model_mul_t* const self = (const ccv_cnnp_model_mul_t*)super;
1991
6
  assert(input_size == 2);
1992
6
  assert(output_size == 1);
1993
6
  ccv_nnc_tensor_param_t input_params[2];
1994
6
  int i;
1995
18
  for (i = 0; i < 2; 
i++12
)
1996
12
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
1997
6
  ccv_nnc_tensor_param_t output_params;
1998
6
  const ccv_nnc_cmd_t mul = CMD_MUL_FORWARD(self->p);
1999
6
  ccv_nnc_hint_tensor_auto(mul, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
2000
6
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2001
6
  ccv_nnc_graph_exec_symbol_new(graph, mul, inputs, input_size, outputs, output_size, "mul");
2002
6
}
2003
2004
static ccv_cnnp_model_t* _ccv_cnnp_mul_copy(const ccv_cnnp_model_t* const self, void* const context);
2005
2006
static const ccv_cnnp_model_vtab_t ccv_cnnp_mul_isa = {
2007
  .build = _ccv_cnnp_mul_build,
2008
  .copy = _ccv_cnnp_mul_copy,
2009
};
2010
2011
ccv_cnnp_model_t* ccv_cnnp_mul(const float p, const char* const name)
2012
5
{
2013
5
  ccv_cnnp_model_mul_t* const model_mul = (ccv_cnnp_model_mul_t*)cccalloc(1, sizeof(ccv_cnnp_model_mul_t));
2014
5
  model_mul->super.isa = &ccv_cnnp_mul_isa;
2015
5
  model_mul->super.input_size = 2;
2016
5
  model_mul->super.outputs = &model_mul->output;
2017
5
  model_mul->super.output_size = 1;
2018
5
  model_mul->p = p;
2019
5
  ccv_cnnp_model_copy_name(&model_mul->super, name);
2020
5
  return (ccv_cnnp_model_t*)model_mul;
2021
5
}
2022
2023
static ccv_cnnp_model_t* _ccv_cnnp_mul_copy(const ccv_cnnp_model_t* const super, void* const context)
2024
0
{
2025
0
  const ccv_cnnp_model_mul_t* const self = (const ccv_cnnp_model_mul_t*)super;
2026
0
  return ccv_cnnp_mul(self->p, self->super.name);
2027
0
}
2028
2029
// MARK - Scalar Mul Layer
2030
2031
typedef struct {
2032
  ccv_cnnp_model_t super;
2033
  ccv_nnc_tensor_symbol_t output;
2034
  float a;
2035
} ccv_cnnp_model_scalar_mul_t;
2036
2037
static void _ccv_cnnp_scalar_mul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2038
4
{
2039
4
  PRINT(CCV_CLI_VERBOSE, "[cnnp_scalar_mul_build] -\n");
2040
4
  assert(input_size == 1);
2041
4
  assert(output_size == 1);
2042
4
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2043
4
  ccv_nnc_tensor_param_t output_params;
2044
4
  ccv_cnnp_model_scalar_mul_t* const self = (ccv_cnnp_model_scalar_mul_t*)super;
2045
4
  const ccv_nnc_cmd_t scalar_mul = CMD_SCALAR_MUL_FORWARD(self->a);
2046
4
  ccv_nnc_hint_tensor_auto(scalar_mul, (ccv_nnc_tensor_param_t []){
2047
4
      params,
2048
4
    }, 1, ccv_nnc_no_hint, &output_params, 1);
2049
4
  const ccv_nnc_tensor_symbol_t scalar_mul_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2050
4
  ccv_nnc_graph_exec_symbol_new(graph, scalar_mul, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(scalar_mul_output), "scalar_mul");
2051
4
  outputs[0] = scalar_mul_output;
2052
4
}
2053
2054
static ccv_cnnp_model_t* _ccv_cnnp_scalar_mul_copy(const ccv_cnnp_model_t* const super, void* const context);
2055
2056
static const ccv_cnnp_model_vtab_t ccv_cnnp_scalar_mul_isa = {
2057
  .build = _ccv_cnnp_scalar_mul_build,
2058
  .copy = _ccv_cnnp_scalar_mul_copy,
2059
};
2060
2061
ccv_cnnp_model_t* ccv_cnnp_scalar_mul(const float a, const char* const name)
2062
4
{
2063
4
  ccv_cnnp_model_scalar_mul_t* const model_scalar_mul = (ccv_cnnp_model_scalar_mul_t*)cccalloc(1, sizeof(ccv_cnnp_model_scalar_mul_t));
2064
4
  model_scalar_mul->super.isa = &ccv_cnnp_scalar_mul_isa;
2065
4
  model_scalar_mul->super.input_size = 1;
2066
4
  model_scalar_mul->super.outputs = &model_scalar_mul->output;
2067
4
  model_scalar_mul->super.output_size = 1;
2068
4
  model_scalar_mul->a = a;
2069
4
  ccv_cnnp_model_copy_name(&model_scalar_mul->super, name);
2070
4
  return (ccv_cnnp_model_t*)model_scalar_mul;
2071
4
}
2072
2073
static ccv_cnnp_model_t* _ccv_cnnp_scalar_mul_copy(const ccv_cnnp_model_t* const super, void* const context)
2074
0
{
2075
0
  const ccv_cnnp_model_scalar_mul_t* const self = (const ccv_cnnp_model_scalar_mul_t*)super;
2076
0
  return ccv_cnnp_scalar_mul(self->a, self->super.name);
2077
0
}
2078
2079
// MARK - Div Layer
2080
2081
typedef struct {
2082
  ccv_cnnp_model_t super;
2083
  ccv_nnc_tensor_symbol_t output;
2084
  int reciprocal;
2085
} ccv_cnnp_model_div_t;
2086
2087
static void _ccv_cnnp_div_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2088
2
{
2089
2
  const ccv_cnnp_model_div_t* const self = (const ccv_cnnp_model_div_t*)super;
2090
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_div_build] -\n");
2091
2
  assert(output_size == 1);
2092
2
  ccv_nnc_tensor_param_t input_params[2];
2093
2
  int i;
2094
2
  ccv_nnc_tensor_param_t output_params;
2095
2
  const ccv_nnc_cmd_t div = CMD_EWDIV_FORWARD();
2096
2
  if (self->reciprocal)
2097
1
  {
2098
1
    assert(input_size == 1);
2099
1
    input_params[0] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2100
1
    input_params[1] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2101
1
    ccv_nnc_hint_tensor_auto(div, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
2102
1
    outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2103
1
    ccv_nnc_graph_exec_symbol_new(graph, div, TENSOR_SYMBOL_LIST(NO_TENSOR_SYMBOL, inputs[0]), outputs, output_size, "div");
2104
1
  } else {
2105
1
    assert(input_size == 2);
2106
3
    
for (i = 0; 1
i < 2;
i++2
)
2107
2
      input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
2108
1
    ccv_nnc_hint_tensor_auto(div, input_params, input_size, ccv_nnc_no_hint, &output_params, 1);
2109
1
    outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2110
1
    ccv_nnc_graph_exec_symbol_new(graph, div, inputs, input_size, outputs, output_size, "div");
2111
1
  }
2112
2
}
2113
2114
static ccv_cnnp_model_t* _ccv_cnnp_div_copy(const ccv_cnnp_model_t* const self, void* const context);
2115
2116
static const ccv_cnnp_model_vtab_t ccv_cnnp_div_isa = {
2117
  .build = _ccv_cnnp_div_build,
2118
  .copy = _ccv_cnnp_div_copy,
2119
};
2120
2121
ccv_cnnp_model_t* ccv_cnnp_div(const int reciprocal, const char* const name)
2122
2
{
2123
2
  ccv_cnnp_model_div_t* const model_div = (ccv_cnnp_model_div_t*)cccalloc(1, sizeof(ccv_cnnp_model_div_t));
2124
2
  model_div->super.isa = &ccv_cnnp_div_isa;
2125
2
  model_div->super.input_size = reciprocal ? 
11
:
21
;
2126
2
  model_div->super.outputs = &model_div->output;
2127
2
  model_div->super.output_size = 1;
2128
2
  model_div->reciprocal = reciprocal;
2129
2
  ccv_cnnp_model_copy_name(&model_div->super, name);
2130
2
  return (ccv_cnnp_model_t*)model_div;
2131
2
}
2132
2133
static ccv_cnnp_model_t* _ccv_cnnp_div_copy(const ccv_cnnp_model_t* const super, void* const context)
2134
0
{
2135
0
  const ccv_cnnp_model_div_t* const self = (const ccv_cnnp_model_div_t*)super;
2136
0
  return ccv_cnnp_div(self->reciprocal, self->super.name);
2137
0
}
2138
2139
// MARK - Sqrt Layer
2140
2141
typedef struct {
2142
  ccv_cnnp_model_t super;
2143
  ccv_nnc_tensor_symbol_t output;
2144
} ccv_cnnp_model_sqrt_t;
2145
2146
static void _ccv_cnnp_sqrt_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2147
0
{
2148
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sqrt_build] -\n");
2149
0
  assert(output_size == 1);
2150
0
  ccv_nnc_tensor_param_t input_params[1];
2151
0
  ccv_nnc_tensor_param_t output_params;
2152
0
  const ccv_nnc_cmd_t sqrt = CMD_EWSQRT_FORWARD();
2153
0
  assert(input_size == 1);
2154
0
  input_params[0] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2155
0
  ccv_nnc_hint_tensor_auto(sqrt, input_params, 1, ccv_nnc_no_hint, &output_params, 1);
2156
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2157
0
  ccv_nnc_graph_exec_symbol_new(graph, sqrt, inputs, 1, outputs, output_size, "sqrt");
2158
0
}
2159
2160
static ccv_cnnp_model_t* _ccv_cnnp_sqrt_copy(const ccv_cnnp_model_t* const self, void* const context);
2161
2162
static const ccv_cnnp_model_vtab_t ccv_cnnp_sqrt_isa = {
2163
  .build = _ccv_cnnp_sqrt_build,
2164
  .copy = _ccv_cnnp_sqrt_copy,
2165
};
2166
2167
ccv_cnnp_model_t* ccv_cnnp_sqrt(const char* const name)
2168
0
{
2169
0
  ccv_cnnp_model_sqrt_t* const model_sqrt = (ccv_cnnp_model_sqrt_t*)cccalloc(1, sizeof(ccv_cnnp_model_sqrt_t));
2170
0
  model_sqrt->super.isa = &ccv_cnnp_sqrt_isa;
2171
0
  model_sqrt->super.input_size = 1;
2172
0
  model_sqrt->super.outputs = &model_sqrt->output;
2173
0
  model_sqrt->super.output_size = 1;
2174
0
  ccv_cnnp_model_copy_name(&model_sqrt->super, name);
2175
0
  return (ccv_cnnp_model_t*)model_sqrt;
2176
0
}
2177
2178
static ccv_cnnp_model_t* _ccv_cnnp_sqrt_copy(const ccv_cnnp_model_t* const super, void* const context)
2179
0
{
2180
0
  const ccv_cnnp_model_sqrt_t* const self = (const ccv_cnnp_model_sqrt_t*)super;
2181
0
  return ccv_cnnp_sqrt(self->super.name);
2182
0
}
2183
2184
// MARK - Log Layer
2185
2186
typedef struct {
2187
  ccv_cnnp_model_t super;
2188
  ccv_nnc_tensor_symbol_t output;
2189
} ccv_cnnp_model_log_t;
2190
2191
static void _ccv_cnnp_log_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2192
1
{
2193
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_log_build] -\n");
2194
1
  assert(output_size == 1);
2195
1
  ccv_nnc_tensor_param_t input_params[1];
2196
1
  ccv_nnc_tensor_param_t output_params;
2197
1
  const ccv_nnc_cmd_t log = CMD_EWLOG_FORWARD();
2198
1
  assert(input_size == 1);
2199
1
  input_params[0] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2200
1
  ccv_nnc_hint_tensor_auto(log, input_params, 1, ccv_nnc_no_hint, &output_params, 1);
2201
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2202
1
  ccv_nnc_graph_exec_symbol_new(graph, log, inputs, 1, outputs, output_size, "log");
2203
1
}
2204
2205
static ccv_cnnp_model_t* _ccv_cnnp_log_copy(const ccv_cnnp_model_t* const self, void* const context);
2206
2207
static const ccv_cnnp_model_vtab_t ccv_cnnp_log_isa = {
2208
  .build = _ccv_cnnp_log_build,
2209
  .copy = _ccv_cnnp_log_copy,
2210
};
2211
2212
ccv_cnnp_model_t* ccv_cnnp_log(const char* const name)
2213
1
{
2214
1
  ccv_cnnp_model_log_t* const model_log = (ccv_cnnp_model_log_t*)cccalloc(1, sizeof(ccv_cnnp_model_log_t));
2215
1
  model_log->super.isa = &ccv_cnnp_log_isa;
2216
1
  model_log->super.input_size = 1;
2217
1
  model_log->super.outputs = &model_log->output;
2218
1
  model_log->super.output_size = 1;
2219
1
  ccv_cnnp_model_copy_name(&model_log->super, name);
2220
1
  return (ccv_cnnp_model_t*)model_log;
2221
1
}
2222
2223
static ccv_cnnp_model_t* _ccv_cnnp_log_copy(const ccv_cnnp_model_t* const super, void* const context)
2224
0
{
2225
0
  return ccv_cnnp_log(super->name);
2226
0
}
2227
2228
// MARK - Pow Layer
2229
2230
typedef struct {
2231
  ccv_cnnp_model_t super;
2232
  ccv_nnc_tensor_symbol_t output;
2233
  ccv_nnc_cmd_param_t params;
2234
} ccv_cnnp_model_pow_t;
2235
2236
static void _ccv_cnnp_pow_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2237
1
{
2238
1
  ccv_cnnp_model_pow_t* const self = (ccv_cnnp_model_pow_t*)super;
2239
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_pow_build] -\n");
2240
1
  assert(input_size == 1);
2241
1
  assert(output_size == 1);
2242
1
  ccv_nnc_tensor_param_t input_params[1];
2243
1
  input_params[0] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2244
1
  ccv_nnc_tensor_param_t output_params;
2245
1
  const ccv_nnc_cmd_t pow = ccv_nnc_cmd(CCV_NNC_EWPOW_FORWARD, 0, self->params, 0);
2246
1
  ccv_nnc_hint_tensor_auto(pow, input_params, 1, ccv_nnc_no_hint, &output_params, 1);
2247
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2248
1
  ccv_nnc_graph_exec_symbol_new(graph, pow, inputs, input_size, outputs, output_size, "pow");
2249
1
}
2250
2251
static ccv_cnnp_model_t* _ccv_cnnp_pow_copy(const ccv_cnnp_model_t* const self, void* const context);
2252
2253
static const ccv_cnnp_model_vtab_t ccv_cnnp_pow_isa = {
2254
  .build = _ccv_cnnp_pow_build,
2255
  .copy = _ccv_cnnp_pow_copy,
2256
};
2257
2258
ccv_cnnp_model_t* ccv_cnnp_pow(const float exponent, const char* const name)
2259
1
{
2260
1
  ccv_cnnp_model_pow_t* const model_pow = (ccv_cnnp_model_pow_t*)cccalloc(1, sizeof(ccv_cnnp_model_pow_t));
2261
1
  model_pow->super.isa = &ccv_cnnp_pow_isa;
2262
1
  model_pow->super.input_size = 1;
2263
1
  model_pow->super.outputs = &model_pow->output;
2264
1
  model_pow->super.output_size = 1;
2265
1
  model_pow->params = (ccv_nnc_cmd_param_t){
2266
1
    .size = {
2267
1
      .dim = { 1, 1, 1 }
2268
1
    },
2269
1
    .pow = {
2270
1
      .exponent = exponent,
2271
1
    },
2272
1
  };
2273
1
  ccv_cnnp_model_copy_name(&model_pow->super, name);
2274
1
  return (ccv_cnnp_model_t*)model_pow;
2275
1
}
2276
2277
static ccv_cnnp_model_t* _ccv_cnnp_pow_copy(const ccv_cnnp_model_t* const super, void* const context)
2278
0
{
2279
0
  const ccv_cnnp_model_pow_t* const self = (const ccv_cnnp_model_pow_t*)super;
2280
0
  return ccv_cnnp_pow(self->params.pow.exponent, super->name);
2281
0
}
2282
2283
// MARK - Sin Layer
2284
2285
typedef struct {
2286
  ccv_cnnp_model_t super;
2287
  ccv_nnc_tensor_symbol_t output;
2288
} ccv_cnnp_model_sin_t;
2289
2290
static void _ccv_cnnp_sin_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2291
1
{
2292
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sin_build] -\n");
2293
1
  assert(output_size == 1);
2294
1
  assert(input_size == 1);
2295
1
  ccv_nnc_tensor_param_t input_params[1];
2296
1
  ccv_nnc_tensor_param_t output_params;
2297
1
  const ccv_nnc_cmd_t sin = CMD_EWSIN_FORWARD();
2298
1
  input_params[0] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2299
1
  ccv_nnc_hint_tensor_auto(sin, input_params, 1, ccv_nnc_no_hint, &output_params, 1);
2300
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2301
1
  ccv_nnc_graph_exec_symbol_new(graph, sin, inputs, 1, outputs, output_size, "sin");
2302
1
}
2303
2304
static ccv_cnnp_model_t* _ccv_cnnp_sin_copy(const ccv_cnnp_model_t* const self, void* const context);
2305
2306
static const ccv_cnnp_model_vtab_t ccv_cnnp_sin_isa = {
2307
  .build = _ccv_cnnp_sin_build,
2308
  .copy = _ccv_cnnp_sin_copy,
2309
};
2310
2311
ccv_cnnp_model_t* ccv_cnnp_sin(const char* const name)
2312
1
{
2313
1
  ccv_cnnp_model_sin_t* const model_sin = (ccv_cnnp_model_sin_t*)cccalloc(1, sizeof(ccv_cnnp_model_sin_t));
2314
1
  model_sin->super.isa = &ccv_cnnp_sin_isa;
2315
1
  model_sin->super.input_size = 1;
2316
1
  model_sin->super.outputs = &model_sin->output;
2317
1
  model_sin->super.output_size = 1;
2318
1
  ccv_cnnp_model_copy_name(&model_sin->super, name);
2319
1
  return (ccv_cnnp_model_t*)model_sin;
2320
1
}
2321
2322
static ccv_cnnp_model_t* _ccv_cnnp_sin_copy(const ccv_cnnp_model_t* const super, void* const context)
2323
0
{
2324
0
  return ccv_cnnp_sin(super->name);
2325
0
}
2326
2327
// MARK - Cos Layer
2328
2329
typedef struct {
2330
  ccv_cnnp_model_t super;
2331
  ccv_nnc_tensor_symbol_t output;
2332
} ccv_cnnp_model_cos_t;
2333
2334
static void _ccv_cnnp_cos_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2335
1
{
2336
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_cos_build] -\n");
2337
1
  assert(output_size == 1);
2338
1
  assert(input_size == 1);
2339
1
  ccv_nnc_tensor_param_t input_params[1];
2340
1
  ccv_nnc_tensor_param_t output_params;
2341
1
  const ccv_nnc_cmd_t cos = CMD_EWCOS_FORWARD();
2342
1
  input_params[0] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2343
1
  ccv_nnc_hint_tensor_auto(cos, input_params, 1, ccv_nnc_no_hint, &output_params, 1);
2344
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2345
1
  ccv_nnc_graph_exec_symbol_new(graph, cos, inputs, 1, outputs, output_size, "cos");
2346
1
}
2347
2348
static ccv_cnnp_model_t* _ccv_cnnp_cos_copy(const ccv_cnnp_model_t* const self, void* const context);
2349
2350
static const ccv_cnnp_model_vtab_t ccv_cnnp_cos_isa = {
2351
  .build = _ccv_cnnp_cos_build,
2352
  .copy = _ccv_cnnp_cos_copy,
2353
};
2354
2355
ccv_cnnp_model_t* ccv_cnnp_cos(const char* const name)
2356
1
{
2357
1
  ccv_cnnp_model_cos_t* const model_cos = (ccv_cnnp_model_cos_t*)cccalloc(1, sizeof(ccv_cnnp_model_cos_t));
2358
1
  model_cos->super.isa = &ccv_cnnp_cos_isa;
2359
1
  model_cos->super.input_size = 1;
2360
1
  model_cos->super.outputs = &model_cos->output;
2361
1
  model_cos->super.output_size = 1;
2362
1
  ccv_cnnp_model_copy_name(&model_cos->super, name);
2363
1
  return (ccv_cnnp_model_t*)model_cos;
2364
1
}
2365
2366
static ccv_cnnp_model_t* _ccv_cnnp_cos_copy(const ccv_cnnp_model_t* const super, void* const context)
2367
0
{
2368
0
  return ccv_cnnp_cos(super->name);
2369
0
}
2370
2371
// MARK - Rotate Half Layer
2372
2373
typedef struct {
2374
  ccv_cnnp_model_t super;
2375
  ccv_nnc_tensor_symbol_t output;
2376
} ccv_cnnp_model_rotate_half_t;
2377
2378
static void _ccv_cnnp_rotate_half_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2379
0
{
2380
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_rotate_half_build] -\n");
2381
0
  assert(input_size == 1);
2382
0
  assert(output_size == 1);
2383
0
  ccv_nnc_tensor_param_t input_params[1];
2384
0
  ccv_nnc_tensor_param_t output_params;
2385
0
  const ccv_nnc_cmd_t rotate_half = CMD_ROTATE_HALF_FORWARD();
2386
0
  input_params[0] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2387
0
  ccv_nnc_hint_tensor_auto(rotate_half, input_params, 1, ccv_nnc_no_hint, &output_params, 1);
2388
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2389
0
  ccv_nnc_graph_exec_symbol_new(graph, rotate_half, inputs, 1, outputs, output_size, "rotate_half");
2390
0
}
2391
2392
static ccv_cnnp_model_t* _ccv_cnnp_rotate_half_copy(const ccv_cnnp_model_t* const self, void* const context);
2393
2394
static const ccv_cnnp_model_vtab_t ccv_cnnp_rotate_half_isa = {
2395
  .build = _ccv_cnnp_rotate_half_build,
2396
  .copy = _ccv_cnnp_rotate_half_copy,
2397
};
2398
2399
ccv_cnnp_model_t* ccv_cnnp_rotate_half(const char* const name)
2400
0
{
2401
0
  ccv_cnnp_model_rotate_half_t* const model_rotate_half = (ccv_cnnp_model_rotate_half_t*)cccalloc(1, sizeof(ccv_cnnp_model_rotate_half_t));
2402
0
  model_rotate_half->super.isa = &ccv_cnnp_rotate_half_isa;
2403
0
  model_rotate_half->super.input_size = 1;
2404
0
  model_rotate_half->super.outputs = &model_rotate_half->output;
2405
0
  model_rotate_half->super.output_size = 1;
2406
0
  ccv_cnnp_model_copy_name(&model_rotate_half->super, name);
2407
0
  return (ccv_cnnp_model_t*)model_rotate_half;
2408
0
}
2409
2410
static ccv_cnnp_model_t* _ccv_cnnp_rotate_half_copy(const ccv_cnnp_model_t* const super, void* const context)
2411
0
{
2412
0
  return ccv_cnnp_rotate_half(super->name);
2413
0
}
2414
2415
// MARK - Cmul Layer
2416
2417
typedef struct {
2418
  ccv_cnnp_model_t super;
2419
  ccv_nnc_tensor_symbol_t output;
2420
} ccv_cnnp_model_cmul_t;
2421
2422
static void _ccv_cnnp_cmul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2423
0
{
2424
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_cmul_build] -\n");
2425
0
  assert(input_size == 2);
2426
0
  assert(output_size == 1);
2427
0
  ccv_nnc_tensor_param_t input_params[2];
2428
0
  int i;
2429
0
  for (i = 0; i < 2; i++)
2430
0
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
2431
0
  ccv_nnc_tensor_param_t output_params;
2432
0
  const ccv_nnc_cmd_t mul = CMD_CMUL_FORWARD();
2433
0
  ccv_nnc_hint_tensor_auto(mul, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
2434
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2435
0
  ccv_nnc_graph_exec_symbol_new(graph, mul, inputs, input_size, outputs, output_size, "cmul");
2436
0
}
2437
2438
static ccv_cnnp_model_t* _ccv_cnnp_cmul_copy(const ccv_cnnp_model_t* const self, void* const context);
2439
2440
static const ccv_cnnp_model_vtab_t ccv_cnnp_cmul_isa = {
2441
  .build = _ccv_cnnp_cmul_build,
2442
  .copy = _ccv_cnnp_cmul_copy,
2443
};
2444
2445
ccv_cnnp_model_t* ccv_cnnp_cmul(const char* const name)
2446
0
{
2447
0
  ccv_cnnp_model_cmul_t* const model_cmul = (ccv_cnnp_model_cmul_t*)cccalloc(1, sizeof(ccv_cnnp_model_cmul_t));
2448
0
  model_cmul->super.isa = &ccv_cnnp_cmul_isa;
2449
0
  model_cmul->super.input_size = 2;
2450
0
  model_cmul->super.outputs = &model_cmul->output;
2451
0
  model_cmul->super.output_size = 1;
2452
0
  ccv_cnnp_model_copy_name(&model_cmul->super, name);
2453
0
  return (ccv_cnnp_model_t*)model_cmul;
2454
0
}
2455
2456
static ccv_cnnp_model_t* _ccv_cnnp_cmul_copy(const ccv_cnnp_model_t* const super, void* const context)
2457
0
{
2458
0
  return ccv_cnnp_cmul(super->name);
2459
0
}
2460
2461
// MARK - Transpose Layer
2462
2463
typedef struct {
2464
  ccv_cnnp_model_t super;
2465
  ccv_nnc_tensor_symbol_t output;
2466
  int transpose[2];
2467
} ccv_cnnp_model_transpose_t;
2468
2469
static void _ccv_cnnp_transpose_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2470
22
{
2471
22
  ccv_cnnp_model_transpose_t* const self = (ccv_cnnp_model_transpose_t*)super;
2472
22
  PRINT(CCV_CLI_VERBOSE, "[cnnp_transpose_build] (%d, %d)\n", self->transpose[0], self->transpose[1]);
2473
22
  assert(input_size == 1);
2474
22
  assert(output_size == 1);
2475
22
  if (self->transpose[0] == self->transpose[1])
2476
0
  {
2477
0
    outputs[0] = inputs[0];
2478
0
    return;
2479
0
  }
2480
22
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2481
22
  ccv_nnc_tensor_param_t output_params;
2482
22
  const ccv_nnc_cmd_t transpose = CMD_TRANSPOSE_FORWARD(self->transpose[0], self->transpose[1]);
2483
22
  ccv_nnc_hint_tensor_auto(transpose, (ccv_nnc_tensor_param_t []){
2484
22
      params,
2485
22
    }, 1, ccv_nnc_no_hint, &output_params, 1);
2486
22
  const ccv_nnc_tensor_symbol_t transpose_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2487
22
  ccv_nnc_graph_exec_symbol_new(graph, transpose, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(transpose_output), "transpose");
2488
22
  outputs[0] = transpose_output;
2489
22
}
2490
2491
static ccv_cnnp_model_t* _ccv_cnnp_transpose_copy(const ccv_cnnp_model_t* const super, void* const context);
2492
2493
static const ccv_cnnp_model_vtab_t ccv_cnnp_transpose_isa = {
2494
  .build = _ccv_cnnp_transpose_build,
2495
  .copy = _ccv_cnnp_transpose_copy,
2496
};
2497
2498
ccv_cnnp_model_t* ccv_cnnp_transpose(const int axis_a, const int axis_b, const char* const name)
2499
22
{
2500
22
  ccv_cnnp_model_transpose_t* const model_transpose = (ccv_cnnp_model_transpose_t*)cccalloc(1, sizeof(ccv_cnnp_model_transpose_t));
2501
22
  model_transpose->super.isa = &ccv_cnnp_transpose_isa;
2502
22
  model_transpose->super.input_size = 1;
2503
22
  model_transpose->super.outputs = &model_transpose->output;
2504
22
  model_transpose->super.output_size = 1;
2505
22
  model_transpose->transpose[0] = axis_a;
2506
22
  model_transpose->transpose[1] = axis_b;
2507
22
  ccv_cnnp_model_copy_name(&model_transpose->super, name);
2508
22
  return (ccv_cnnp_model_t*)model_transpose;
2509
22
}
2510
2511
static ccv_cnnp_model_t* _ccv_cnnp_transpose_copy(const ccv_cnnp_model_t* const super, void* const context)
2512
0
{
2513
0
  const ccv_cnnp_model_transpose_t* const self = (const ccv_cnnp_model_transpose_t*)super;
2514
0
  return ccv_cnnp_transpose(self->transpose[0], self->transpose[1], self->super.name);
2515
0
}
2516
2517
// MARK - Layer Norm Layer
2518
2519
typedef struct {
2520
  ccv_cnnp_model_t super;
2521
  ccv_nnc_tensor_symbol_t output;
2522
  ccv_nnc_tensor_symbol_t bias;
2523
  ccv_nnc_tensor_symbol_t scale;
2524
  ccv_nnc_cmd_param_t params;
2525
} ccv_cnnp_model_layer_norm_t;
2526
2527
static void _ccv_cnnp_layer_norm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2528
8
{
2529
8
  PRINT(CCV_CLI_VERBOSE, "[cnnp_layer_norm_build] -\n");
2530
8
  assert(input_size == 1);
2531
8
  assert(output_size == 1);
2532
8
  ccv_cnnp_model_layer_norm_t* const self = (ccv_cnnp_model_layer_norm_t*)super;
2533
8
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2534
8
  ccv_nnc_tensor_param_t bias_params = params;
2535
8
  const int nd = ccv_nnc_tensor_nd(params.dim);
2536
8
  int i;
2537
32
  for (i = 0; i < nd; 
i++24
)
2538
24
    bias_params.dim[i] = 1;
2539
16
  for (i = 0; i < self->params.lnorm.count; 
i++8
)
2540
8
    bias_params.dim[self->params.lnorm.axis[i]] = params.dim[self->params.lnorm.axis[i]];
2541
8
  if (self->params.lnorm.elementwise_affine)
2542
8
  {
2543
    // Both scale and bias are shared between if this model is reused.
2544
8
    if (!self->scale.graph)
2545
8
      self->scale = ccv_nnc_tensor_symbol_new(graph, bias_params, "scale");
2546
8
    if (!self->bias.graph)
2547
8
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
2548
8
  }
2549
8
  const ccv_nnc_cmd_t layer_norm = ccv_nnc_cmd(CCV_NNC_LAYER_NORM_FORWARD, 0, self->params, 0);
2550
8
  ccv_nnc_tensor_param_t output_params[3];
2551
8
  if (self->params.lnorm.elementwise_affine)
2552
8
    ccv_nnc_hint_tensor_auto(layer_norm, (ccv_nnc_tensor_param_t []){
2553
8
        params,
2554
8
        bias_params,
2555
8
        bias_params,
2556
8
      }, 3, ccv_nnc_no_hint, output_params, 3);
2557
0
  else
2558
0
    ccv_nnc_hint_tensor_auto(layer_norm, (ccv_nnc_tensor_param_t []){
2559
0
        params,
2560
0
      }, 1, ccv_nnc_no_hint, output_params, 3);
2561
8
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2562
8
  const ccv_nnc_tensor_symbol_t saved_mean = ccv_nnc_tensor_symbol_new(graph, output_params[1], "saved_mean");
2563
8
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, output_params[2], "saved_inv_std");
2564
8
  if (self->params.lnorm.elementwise_affine)
2565
8
    ccv_nnc_graph_exec_symbol_new(graph, layer_norm, TENSOR_SYMBOL_LIST(inputs[0], self->scale, self->bias), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "layer_norm");
2566
0
  else
2567
0
    ccv_nnc_graph_exec_symbol_new(graph, layer_norm, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "layer_norm");
2568
8
  outputs[0] = output;
2569
8
}
2570
2571
static void _ccv_cnnp_layer_norm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
2572
8
{
2573
8
  ccv_cnnp_model_layer_norm_t* const self = (ccv_cnnp_model_layer_norm_t*)super;
2574
8
  if (self->scale.graph)
2575
8
    initializer(context, CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, self->scale);
2576
8
  if (self->bias.graph)
2577
8
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
2578
8
}
2579
2580
static void _ccv_cnnp_layer_norm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
2581
8
{
2582
8
  ccv_cnnp_model_layer_norm_t* const self = (ccv_cnnp_model_layer_norm_t*)super;
2583
8
  if (self->scale.graph)
2584
8
    add_to_array(parameters, self->scale, is_trainable);
2585
8
  if (self->bias.graph)
2586
8
    add_to_array(parameters, self->bias, is_trainable);
2587
8
}
2588
2589
static ccv_cnnp_model_t* _ccv_cnnp_layer_norm_copy(const ccv_cnnp_model_t* const super, void* const context);
2590
2591
static const ccv_cnnp_model_vtab_t ccv_cnnp_layer_norm_isa = {
2592
  .build = _ccv_cnnp_layer_norm_build,
2593
  .init_states = _ccv_cnnp_layer_norm_init_states,
2594
  .add_to_parameter = _ccv_cnnp_layer_norm_add_to_parameter,
2595
  .copy = _ccv_cnnp_layer_norm_copy,
2596
};
2597
2598
ccv_cnnp_model_t* ccv_cnnp_layer_norm(const float epsilon, const int axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int elementwise_affine, const int is_trainable, const char* const name)
2599
8
{
2600
8
  ccv_cnnp_model_layer_norm_t* const model_layer_norm = (ccv_cnnp_model_layer_norm_t*)cccalloc(1, sizeof(ccv_cnnp_model_layer_norm_t));
2601
8
  model_layer_norm->super.isa = &ccv_cnnp_layer_norm_isa;
2602
8
  model_layer_norm->super.input_size = 1;
2603
8
  model_layer_norm->super.outputs = &model_layer_norm->output;
2604
8
  model_layer_norm->super.output_size = 1;
2605
8
  model_layer_norm->super.is_trainable = is_trainable;
2606
8
  ccv_cnnp_model_copy_name(&model_layer_norm->super, name);
2607
8
  model_layer_norm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
2608
8
  model_layer_norm->scale.graph = 0;
2609
8
  model_layer_norm->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
2610
8
  model_layer_norm->bias.graph = 0;
2611
8
  model_layer_norm->params.lnorm.epsilon = epsilon;
2612
8
  model_layer_norm->params.lnorm.count = axis_count;
2613
8
  model_layer_norm->params.lnorm.elementwise_affine = elementwise_affine;
2614
8
  memcpy(model_layer_norm->params.lnorm.axis, axis, sizeof(int) * axis_count);
2615
8
  return (ccv_cnnp_model_t*)model_layer_norm;
2616
8
}
2617
2618
static ccv_cnnp_model_t* _ccv_cnnp_layer_norm_copy(const ccv_cnnp_model_t* const super, void* const context)
2619
0
{
2620
0
  const ccv_cnnp_model_layer_norm_t* const self = (const ccv_cnnp_model_layer_norm_t*)super;
2621
0
  return ccv_cnnp_layer_norm(self->params.lnorm.epsilon, self->params.lnorm.axis, self->params.lnorm.count, self->params.lnorm.elementwise_affine, self->super.is_trainable, self->super.name);
2622
0
}
2623
2624
// MARK - Group Norm Layer
2625
2626
typedef struct {
2627
  ccv_cnnp_model_t super;
2628
  ccv_nnc_tensor_symbol_t output;
2629
  ccv_nnc_tensor_symbol_t bias;
2630
  ccv_nnc_tensor_symbol_t scale;
2631
  ccv_nnc_cmd_param_t params;
2632
} ccv_cnnp_model_group_norm_t;
2633
2634
static void _ccv_cnnp_group_norm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2635
0
{
2636
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_group_norm_build] -\n");
2637
0
  assert(input_size == 1);
2638
0
  assert(output_size == 1);
2639
0
  ccv_cnnp_model_group_norm_t* const self = (ccv_cnnp_model_group_norm_t*)super;
2640
0
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2641
0
  ccv_nnc_tensor_param_t bias_params = params;
2642
0
  const int nd = ccv_nnc_tensor_nd(params.dim);
2643
0
  int i;
2644
0
  for (i = 0; i < nd; i++)
2645
0
    bias_params.dim[i] = 1;
2646
0
  bias_params.dim[self->params.gnorm.group_axis] = params.dim[self->params.gnorm.group_axis];
2647
0
  if (self->params.gnorm.elementwise_affine)
2648
0
  {
2649
    // Both scale and bias are shared between if this model is reused.
2650
0
    if (!self->scale.graph)
2651
0
      self->scale = ccv_nnc_tensor_symbol_new(graph, bias_params, "scale");
2652
0
    if (!self->bias.graph)
2653
0
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
2654
0
  }
2655
0
  const ccv_nnc_cmd_t group_norm = ccv_nnc_cmd(CCV_NNC_GROUP_NORM_FORWARD, 0, self->params, 0);
2656
0
  ccv_nnc_tensor_param_t output_params[3];
2657
0
  if (self->params.gnorm.elementwise_affine)
2658
0
    ccv_nnc_hint_tensor_auto(group_norm, (ccv_nnc_tensor_param_t []){
2659
0
        params,
2660
0
        bias_params,
2661
0
        bias_params,
2662
0
      }, 3, ccv_nnc_no_hint, output_params, 3);
2663
0
  else
2664
0
    ccv_nnc_hint_tensor_auto(group_norm, (ccv_nnc_tensor_param_t []){
2665
0
        params,
2666
0
      }, 1, ccv_nnc_no_hint, output_params, 3);
2667
0
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2668
0
  const ccv_nnc_tensor_symbol_t saved_mean = ccv_nnc_tensor_symbol_new(graph, output_params[1], "saved_mean");
2669
0
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, output_params[2], "saved_inv_std");
2670
0
  if (self->params.gnorm.elementwise_affine)
2671
0
    ccv_nnc_graph_exec_symbol_new(graph, group_norm, TENSOR_SYMBOL_LIST(inputs[0], self->scale, self->bias), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "group_norm");
2672
0
  else
2673
0
    ccv_nnc_graph_exec_symbol_new(graph, group_norm, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "group_norm");
2674
0
  outputs[0] = output;
2675
0
}
2676
2677
static void _ccv_cnnp_group_norm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
2678
0
{
2679
0
  ccv_cnnp_model_group_norm_t* const self = (ccv_cnnp_model_group_norm_t*)super;
2680
0
  if (self->scale.graph)
2681
0
    initializer(context, CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, self->scale);
2682
0
  if (self->bias.graph)
2683
0
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
2684
0
}
2685
2686
static void _ccv_cnnp_group_norm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
2687
0
{
2688
0
  ccv_cnnp_model_group_norm_t* const self = (ccv_cnnp_model_group_norm_t*)super;
2689
0
  if (self->scale.graph)
2690
0
    add_to_array(parameters, self->scale, is_trainable);
2691
0
  if (self->bias.graph)
2692
0
    add_to_array(parameters, self->bias, is_trainable);
2693
0
}
2694
2695
static ccv_cnnp_model_t* _ccv_cnnp_group_norm_copy(const ccv_cnnp_model_t* const super, void* const context);
2696
2697
static const ccv_cnnp_model_vtab_t ccv_cnnp_group_norm_isa = {
2698
  .build = _ccv_cnnp_group_norm_build,
2699
  .init_states = _ccv_cnnp_group_norm_init_states,
2700
  .add_to_parameter = _ccv_cnnp_group_norm_add_to_parameter,
2701
  .copy = _ccv_cnnp_group_norm_copy,
2702
};
2703
2704
ccv_cnnp_model_t* ccv_cnnp_group_norm(const int group_axis, const int groups, const float epsilon, const int reduce_axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int elementwise_affine, const int is_trainable, const char* const name)
2705
0
{
2706
0
  ccv_cnnp_model_group_norm_t* const model_group_norm = (ccv_cnnp_model_group_norm_t*)cccalloc(1, sizeof(ccv_cnnp_model_group_norm_t));
2707
0
  model_group_norm->super.isa = &ccv_cnnp_group_norm_isa;
2708
0
  model_group_norm->super.input_size = 1;
2709
0
  model_group_norm->super.outputs = &model_group_norm->output;
2710
0
  model_group_norm->super.output_size = 1;
2711
0
  model_group_norm->super.is_trainable = is_trainable;
2712
0
  ccv_cnnp_model_copy_name(&model_group_norm->super, name);
2713
0
  model_group_norm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
2714
0
  model_group_norm->scale.graph = 0;
2715
0
  model_group_norm->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
2716
0
  model_group_norm->bias.graph = 0;
2717
0
  model_group_norm->params.gnorm.group_axis = group_axis;
2718
0
  model_group_norm->params.gnorm.groups = groups;
2719
0
  model_group_norm->params.gnorm.epsilon = epsilon;
2720
0
  model_group_norm->params.gnorm.reduce_count = axis_count;
2721
0
  model_group_norm->params.gnorm.elementwise_affine = elementwise_affine;
2722
0
  memcpy(model_group_norm->params.gnorm.reduce_axis, reduce_axis, sizeof(int) * axis_count);
2723
0
  return (ccv_cnnp_model_t*)model_group_norm;
2724
0
}
2725
2726
static ccv_cnnp_model_t* _ccv_cnnp_group_norm_copy(const ccv_cnnp_model_t* const super, void* const context)
2727
0
{
2728
0
  const ccv_cnnp_model_group_norm_t* const self = (const ccv_cnnp_model_group_norm_t*)super;
2729
0
  return ccv_cnnp_group_norm(self->params.gnorm.group_axis, self->params.gnorm.groups, self->params.gnorm.epsilon, self->params.gnorm.reduce_axis, self->params.gnorm.reduce_count, self->params.gnorm.elementwise_affine, self->super.is_trainable, self->super.name);
2730
0
}
2731
2732
// MARK - RMSNorm Layer
2733
2734
typedef struct {
2735
  ccv_cnnp_model_t super;
2736
  ccv_nnc_tensor_symbol_t output;
2737
  ccv_nnc_tensor_symbol_t scale;
2738
  ccv_nnc_cmd_param_t params;
2739
} ccv_cnnp_model_rmsnorm_t;
2740
2741
static void _ccv_cnnp_rmsnorm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2742
0
{
2743
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_rmsnorm_build] -\n");
2744
0
  assert(input_size == 1);
2745
0
  assert(output_size == 1);
2746
0
  ccv_cnnp_model_rmsnorm_t* const self = (ccv_cnnp_model_rmsnorm_t*)super;
2747
0
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2748
0
  ccv_nnc_tensor_param_t scale_params = params;
2749
0
  const int nd = ccv_nnc_tensor_nd(params.dim);
2750
0
  int i;
2751
0
  for (i = 0; i < nd; i++)
2752
0
    scale_params.dim[i] = 1;
2753
0
  for (i = 0; i < self->params.rmsnorm.count; i++)
2754
0
    scale_params.dim[self->params.rmsnorm.axis[i]] = params.dim[self->params.rmsnorm.axis[i]];
2755
  // Both scale and bias are shared between if this model is reused.
2756
0
  if (self->params.rmsnorm.elementwise_affine)
2757
0
  {
2758
0
    if (!self->scale.graph)
2759
0
      self->scale = ccv_nnc_tensor_symbol_new(graph, scale_params, "scale");
2760
0
  }
2761
0
  const ccv_nnc_cmd_t rmsnorm = ccv_nnc_cmd(CCV_NNC_RMSNORM_FORWARD, 0, self->params, 0);
2762
0
  ccv_nnc_tensor_param_t output_params[2];
2763
0
  if (self->params.rmsnorm.elementwise_affine)
2764
0
    ccv_nnc_hint_tensor_auto(rmsnorm, (ccv_nnc_tensor_param_t []){
2765
0
        params,
2766
0
        scale_params,
2767
0
      }, 2, ccv_nnc_no_hint, output_params, 2);
2768
0
  else
2769
0
    ccv_nnc_hint_tensor_auto(rmsnorm, (ccv_nnc_tensor_param_t []){
2770
0
        params,
2771
0
      }, 1, ccv_nnc_no_hint, output_params, 2);
2772
0
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2773
0
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, output_params[1], "saved_inv_std");
2774
0
  if (self->params.rmsnorm.elementwise_affine)
2775
0
    ccv_nnc_graph_exec_symbol_new(graph, rmsnorm, TENSOR_SYMBOL_LIST(inputs[0], self->scale), TENSOR_SYMBOL_LIST(output, saved_inv_std), "rmsnorm");
2776
0
  else
2777
0
    ccv_nnc_graph_exec_symbol_new(graph, rmsnorm, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(output, saved_inv_std), "rmsnorm");
2778
0
  outputs[0] = output;
2779
0
}
2780
2781
static void _ccv_cnnp_rmsnorm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
2782
0
{
2783
0
  ccv_cnnp_model_rmsnorm_t* const self = (ccv_cnnp_model_rmsnorm_t*)super;
2784
0
  if (self->scale.graph)
2785
0
    initializer(context, CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, self->scale);
2786
0
}
2787
2788
static void _ccv_cnnp_rmsnorm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
2789
0
{
2790
0
  ccv_cnnp_model_rmsnorm_t* const self = (ccv_cnnp_model_rmsnorm_t*)super;
2791
0
  if (self->scale.graph)
2792
0
    add_to_array(parameters, self->scale, is_trainable);
2793
0
}
2794
2795
static ccv_cnnp_model_t* _ccv_cnnp_rmsnorm_copy(const ccv_cnnp_model_t* const super, void* const context);
2796
2797
static const ccv_cnnp_model_vtab_t ccv_cnnp_rmsnorm_isa = {
2798
  .build = _ccv_cnnp_rmsnorm_build,
2799
  .init_states = _ccv_cnnp_rmsnorm_init_states,
2800
  .add_to_parameter = _ccv_cnnp_rmsnorm_add_to_parameter,
2801
  .copy = _ccv_cnnp_rmsnorm_copy,
2802
};
2803
2804
ccv_cnnp_model_t* ccv_cnnp_rmsnorm(const float epsilon, const int axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int elementwise_affine, const int is_trainable, const char* const name)
2805
0
{
2806
0
  ccv_cnnp_model_rmsnorm_t* const model_rmsnorm = (ccv_cnnp_model_rmsnorm_t*)cccalloc(1, sizeof(ccv_cnnp_model_rmsnorm_t));
2807
0
  model_rmsnorm->super.isa = &ccv_cnnp_rmsnorm_isa;
2808
0
  model_rmsnorm->super.input_size = 1;
2809
0
  model_rmsnorm->super.outputs = &model_rmsnorm->output;
2810
0
  model_rmsnorm->super.output_size = 1;
2811
0
  model_rmsnorm->super.is_trainable = is_trainable;
2812
0
  ccv_cnnp_model_copy_name(&model_rmsnorm->super, name);
2813
0
  model_rmsnorm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
2814
0
  model_rmsnorm->scale.graph = 0;
2815
0
  model_rmsnorm->params.rmsnorm.epsilon = epsilon;
2816
0
  model_rmsnorm->params.rmsnorm.count = axis_count;
2817
0
  model_rmsnorm->params.rmsnorm.elementwise_affine = elementwise_affine;
2818
0
  memcpy(model_rmsnorm->params.lnorm.axis, axis, sizeof(int) * axis_count);
2819
0
  return (ccv_cnnp_model_t*)model_rmsnorm;
2820
0
}
2821
2822
static ccv_cnnp_model_t* _ccv_cnnp_rmsnorm_copy(const ccv_cnnp_model_t* const super, void* const context)
2823
0
{
2824
0
  const ccv_cnnp_model_rmsnorm_t* const self = (const ccv_cnnp_model_rmsnorm_t*)super;
2825
0
  return ccv_cnnp_rmsnorm(self->params.rmsnorm.epsilon, self->params.rmsnorm.axis, self->params.rmsnorm.count, self->params.rmsnorm.elementwise_affine, self->super.is_trainable, self->super.name);
2826
0
}
2827
2828
// MARK - Batched Matrix Mul Layer
2829
2830
typedef struct {
2831
  ccv_cnnp_model_t super;
2832
  ccv_nnc_tensor_symbol_t output;
2833
  int transpose_a[2];
2834
  int transpose_b[2];
2835
  int flags;
2836
} ccv_cnnp_model_matmul_t;
2837
2838
static void _ccv_cnnp_matmul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2839
10
{
2840
10
  PRINT(CCV_CLI_VERBOSE, "[cnnp_matmul_build] -\n");
2841
10
  assert(input_size == 2);
2842
10
  assert(output_size == 1);
2843
10
  ccv_cnnp_model_matmul_t* const self = (ccv_cnnp_model_matmul_t*)super;
2844
10
  ccv_nnc_tensor_param_t a_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2845
10
  ccv_nnc_tensor_param_t b_params = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
2846
10
  ccv_nnc_tensor_param_t output_params;
2847
10
  ccv_nnc_cmd_t matmul = CMD_GEMM_FORWARD(self->transpose_a, self->transpose_b);
2848
10
  matmul.info.blas.flags = self->flags;
2849
10
  ccv_nnc_hint_tensor_auto(matmul, (ccv_nnc_tensor_param_t []){
2850
10
      a_params,
2851
10
      b_params,
2852
10
    }, 2, ccv_nnc_no_hint, &output_params, 1);
2853
10
  const ccv_nnc_tensor_symbol_t matmul_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2854
10
  ccv_nnc_graph_exec_symbol_new(graph, matmul, inputs, input_size, TENSOR_SYMBOL_LIST(matmul_output), "matmul");
2855
10
  outputs[0] = matmul_output;
2856
10
}
2857
2858
static ccv_cnnp_model_t* _ccv_cnnp_matmul_copy(const ccv_cnnp_model_t* const super, void* const context);
2859
2860
static const ccv_cnnp_model_vtab_t ccv_cnnp_matmul_isa = {
2861
  .build = _ccv_cnnp_matmul_build,
2862
  .copy = _ccv_cnnp_matmul_copy,
2863
};
2864
2865
ccv_cnnp_model_t* ccv_cnnp_matmul(const int transpose_a[2], const int transpose_b[2], const int flags, const char* const name)
2866
10
{
2867
10
  ccv_cnnp_model_matmul_t* const model_matmul = (ccv_cnnp_model_matmul_t*)cccalloc(1, sizeof(ccv_cnnp_model_matmul_t));
2868
10
  model_matmul->super.isa = &ccv_cnnp_matmul_isa;
2869
10
  model_matmul->super.input_size = 2;
2870
10
  model_matmul->super.outputs = &model_matmul->output;
2871
10
  model_matmul->super.output_size = 1;
2872
10
  model_matmul->transpose_a[0] = transpose_a[0];
2873
10
  model_matmul->transpose_a[1] = transpose_a[1];
2874
10
  model_matmul->transpose_b[0] = transpose_b[0];
2875
10
  model_matmul->transpose_b[1] = transpose_b[1];
2876
10
  model_matmul->flags = flags;
2877
10
  ccv_cnnp_model_copy_name(&model_matmul->super, name);
2878
10
  return (ccv_cnnp_model_t*)model_matmul;
2879
10
}
2880
2881
static ccv_cnnp_model_t* _ccv_cnnp_matmul_copy(const ccv_cnnp_model_t* const super, void* const context)
2882
1
{
2883
1
  const ccv_cnnp_model_matmul_t* const self = (const ccv_cnnp_model_matmul_t*)super;
2884
1
  return ccv_cnnp_matmul(self->transpose_a, self->transpose_b, self->flags, self->super.name);
2885
1
}
2886
2887
// MARK - Dropout Layer
2888
2889
typedef struct {
2890
  ccv_cnnp_model_t super;
2891
  ccv_nnc_tensor_symbol_t output;
2892
  ccv_nnc_graph_exec_symbol_t dropout;
2893
  float p;
2894
  int entirety;
2895
} ccv_cnnp_model_dropout_t;
2896
2897
static void _ccv_cnnp_dropout_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2898
12
{
2899
12
  PRINT(CCV_CLI_VERBOSE, "[cnnp_dropout_build] -\n");
2900
12
  assert(input_size == 1);
2901
12
  assert(output_size == 1);
2902
12
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2903
12
  ccv_nnc_tensor_param_t output_params[2];
2904
12
  ccv_cnnp_model_dropout_t* const self = (ccv_cnnp_model_dropout_t*)super;
2905
12
  const ccv_nnc_cmd_t dropout = CMD_DROPOUT_FORWARD(self->p, self->entirety);
2906
12
  ccv_nnc_hint_tensor_auto(dropout, (ccv_nnc_tensor_param_t []){
2907
12
      params,
2908
12
    }, 1, ccv_nnc_no_hint, output_params, 2);
2909
12
  const ccv_nnc_tensor_symbol_t dropout_output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2910
12
  const ccv_nnc_tensor_symbol_t mask = ccv_nnc_tensor_symbol_new(graph, output_params[1], "mask");
2911
12
  self->dropout = ccv_nnc_graph_exec_symbol_new(graph, dropout, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(dropout_output, mask), "dropout");
2912
12
  outputs[0] = dropout_output;
2913
12
}
2914
2915
static void _ccv_cnnp_dropout_set_is_test(ccv_cnnp_model_t* const super, const int is_test, const ccv_cnnp_cmd_updater_f updater, void* const context)
2916
24
{
2917
24
  ccv_cnnp_model_dropout_t* const self = (ccv_cnnp_model_dropout_t*)super;
2918
24
  if (self->dropout.graph)
2919
24
  {
2920
24
    if (is_test)
2921
      // During test, the dropout is not applied. Data transfer is perfect because if these are the same tensor, it will skip.
2922
12
      updater(context, self->dropout, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint);
2923
12
    else
2924
12
      updater(context, self->dropout, CMD_DROPOUT_FORWARD(self->p, self->entirety), ccv_nnc_no_hint);
2925
24
  }
2926
24
}
2927
2928
static ccv_cnnp_model_t* _ccv_cnnp_dropout_copy(const ccv_cnnp_model_t* const super, void* const context);
2929
2930
static const ccv_cnnp_model_vtab_t ccv_cnnp_dropout_isa = {
2931
  .build = _ccv_cnnp_dropout_build,
2932
  .set_is_test = _ccv_cnnp_dropout_set_is_test,
2933
  .copy = _ccv_cnnp_dropout_copy,
2934
};
2935
2936
ccv_cnnp_model_t* ccv_cnnp_dropout(const float p, const int entirety, const char* const name)
2937
12
{
2938
12
  ccv_cnnp_model_dropout_t* const model_dropout = (ccv_cnnp_model_dropout_t*)cccalloc(1, sizeof(ccv_cnnp_model_dropout_t));
2939
12
  model_dropout->super.isa = &ccv_cnnp_dropout_isa;
2940
12
  model_dropout->super.input_size = 1;
2941
12
  model_dropout->super.outputs = &model_dropout->output;
2942
12
  model_dropout->super.output_size = 1;
2943
12
  model_dropout->p = p;
2944
12
  model_dropout->entirety = entirety;
2945
12
  ccv_cnnp_model_copy_name(&model_dropout->super, name);
2946
12
  return (ccv_cnnp_model_t*)model_dropout;
2947
12
}
2948
2949
static ccv_cnnp_model_t* _ccv_cnnp_dropout_copy(const ccv_cnnp_model_t* const super, void* const context)
2950
0
{
2951
0
  const ccv_cnnp_model_dropout_t* const self = (const ccv_cnnp_model_dropout_t*)super;
2952
0
  return ccv_cnnp_dropout(self->p, self->entirety, self->super.name);
2953
0
}
2954
2955
// MARK - Masked Fill Layer
2956
2957
typedef struct {
2958
  ccv_cnnp_model_t super;
2959
  ccv_nnc_tensor_symbol_t output;
2960
  float eq;
2961
  float fill;
2962
} ccv_cnnp_model_masked_fill_t;
2963
2964
static void _ccv_cnnp_masked_fill_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2965
4
{
2966
4
  PRINT(CCV_CLI_VERBOSE, "[cnnp_masked_fill_build] -\n");
2967
4
  assert(input_size == 2);
2968
4
  assert(output_size == 1);
2969
4
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2970
4
  ccv_cnnp_model_masked_fill_t* const self = (ccv_cnnp_model_masked_fill_t*)super;
2971
4
  const ccv_nnc_tensor_symbol_t masked_fill_output = ccv_nnc_tensor_symbol_new(graph, params, 0);
2972
4
  ccv_nnc_graph_exec_symbol_new(graph, CMD_MASKED_FILL_FORWARD(self->eq, self->fill), TENSOR_SYMBOL_LIST(inputs[0], inputs[1]), TENSOR_SYMBOL_LIST(masked_fill_output), "masked_fill");
2973
4
  outputs[0] = masked_fill_output;
2974
4
}
2975
2976
static ccv_cnnp_model_t* _ccv_cnnp_masked_fill_copy(const ccv_cnnp_model_t* const super, void* const context);
2977
2978
static const ccv_cnnp_model_vtab_t ccv_cnnp_masked_fill_isa = {
2979
  .build = _ccv_cnnp_masked_fill_build,
2980
  .copy = _ccv_cnnp_masked_fill_copy,
2981
};
2982
2983
ccv_cnnp_model_t* ccv_cnnp_masked_fill(const float eq, const float fill, const char* const name)
2984
4
{
2985
4
  ccv_cnnp_model_masked_fill_t* const model_masked_fill = (ccv_cnnp_model_masked_fill_t*)cccalloc(1, sizeof(ccv_cnnp_model_masked_fill_t));
2986
4
  model_masked_fill->super.isa = &ccv_cnnp_masked_fill_isa;
2987
4
  model_masked_fill->super.input_size = 2;
2988
4
  model_masked_fill->super.outputs = &model_masked_fill->output;
2989
4
  model_masked_fill->super.output_size = 1;
2990
4
  model_masked_fill->eq = eq;
2991
4
  model_masked_fill->fill = fill;
2992
4
  ccv_cnnp_model_copy_name(&model_masked_fill->super, name);
2993
4
  return (ccv_cnnp_model_t*)model_masked_fill;
2994
4
}
2995
2996
static ccv_cnnp_model_t* _ccv_cnnp_masked_fill_copy(const ccv_cnnp_model_t* const super, void* const context)
2997
0
{
2998
0
  const ccv_cnnp_model_masked_fill_t* const self = (const ccv_cnnp_model_masked_fill_t*)super;
2999
0
  return ccv_cnnp_masked_fill(self->eq, self->fill, self->super.name);
3000
0
}
3001
3002
// MARK - Index Select Layer
3003
3004
typedef struct {
3005
  ccv_cnnp_model_t super;
3006
  ccv_nnc_tensor_symbol_t output;
3007
} ccv_cnnp_model_index_select_t;
3008
3009
static void _ccv_cnnp_index_select_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3010
2
{
3011
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_index_select_build] -\n");
3012
2
  assert(input_size == 2);
3013
2
  assert(output_size == 1);
3014
2
  const ccv_nnc_tensor_param_t vocab_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3015
2
  const ccv_nnc_tensor_param_t index_params = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
3016
2
  ccv_nnc_tensor_param_t output_params;
3017
2
  const ccv_nnc_cmd_t index_select = CMD_INDEX_SELECT_FORWARD();
3018
2
  ccv_nnc_hint_tensor_auto(index_select, (ccv_nnc_tensor_param_t []){
3019
2
      vocab_params,
3020
2
      index_params,
3021
2
    }, 2, ccv_nnc_no_hint, &output_params, 1);
3022
2
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3023
2
  ccv_nnc_graph_exec_symbol_new(graph, index_select, TENSOR_SYMBOL_LIST(inputs[0], inputs[1]), TENSOR_SYMBOL_LIST(output), "index_select");
3024
2
  outputs[0] = output;
3025
2
}
3026
3027
static ccv_cnnp_model_t* _ccv_cnnp_index_select_copy(const ccv_cnnp_model_t* const super, void* const context);
3028
3029
static const ccv_cnnp_model_vtab_t ccv_cnnp_index_select_isa = {
3030
  .build = _ccv_cnnp_index_select_build,
3031
  .copy = _ccv_cnnp_index_select_copy,
3032
};
3033
3034
ccv_cnnp_model_t* ccv_cnnp_index_select(const char* const name)
3035
2
{
3036
2
  ccv_cnnp_model_index_select_t* const model_index_select = (ccv_cnnp_model_index_select_t*)cccalloc(1, sizeof(ccv_cnnp_model_index_select_t));
3037
2
  model_index_select->super.isa = &ccv_cnnp_index_select_isa;
3038
2
  model_index_select->super.input_size = 2;
3039
2
  model_index_select->super.outputs = &model_index_select->output;
3040
2
  model_index_select->super.output_size = 1;
3041
2
  ccv_cnnp_model_copy_name(&model_index_select->super, name);
3042
2
  return (ccv_cnnp_model_t*)model_index_select;
3043
2
}
3044
3045
static ccv_cnnp_model_t* _ccv_cnnp_index_select_copy(const ccv_cnnp_model_t* const super, void* const context)
3046
0
{
3047
0
  ccv_cnnp_model_index_select_t* const self = (ccv_cnnp_model_index_select_t*)super;
3048
0
  return ccv_cnnp_index_select(self->super.name);
3049
0
}
3050
3051
// MARK - Embedding Layer
3052
3053
typedef struct {
3054
  ccv_cnnp_model_t super;
3055
  ccv_nnc_tensor_symbol_t output;
3056
  ccv_nnc_tensor_symbol_t vocab;
3057
  int datatype;
3058
  int vocab_size;
3059
  int embed_size;
3060
} ccv_cnnp_model_embedding_t;
3061
3062
static void _ccv_cnnp_embedding_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3063
1
{
3064
1
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
3065
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_embedding_build] vocab_size: %d, embed_size: %d\n", self->vocab_size, self->embed_size);
3066
1
  assert(input_size == 1);
3067
1
  assert(output_size == 1);
3068
1
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3069
1
  ccv_nnc_tensor_param_t vocab_params = params;
3070
1
  memset(vocab_params.dim, 0, sizeof(vocab_params.dim));
3071
1
  vocab_params.datatype = self->datatype;
3072
1
  vocab_params.dim[0] = self->vocab_size;
3073
1
  vocab_params.dim[1] = self->embed_size;
3074
1
  if (!self->vocab.graph)
3075
1
    self->vocab = ccv_nnc_tensor_symbol_new(graph, vocab_params, "vocab");
3076
1
  assert(self->vocab.graph == graph);
3077
1
  ccv_nnc_tensor_param_t output_params;
3078
1
  const ccv_nnc_cmd_t embedding = CMD_INDEX_SELECT_FORWARD();
3079
1
  ccv_nnc_hint_tensor_auto(embedding, (ccv_nnc_tensor_param_t []){
3080
1
      vocab_params,
3081
1
      params,
3082
1
    }, 2, ccv_nnc_no_hint, &output_params, 1);
3083
1
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3084
1
  ccv_nnc_graph_exec_symbol_new(graph, embedding, TENSOR_SYMBOL_LIST(self->vocab, inputs[0]), TENSOR_SYMBOL_LIST(output), "embedding");
3085
1
  outputs[0] = output;
3086
1
}
3087
3088
static void _ccv_cnnp_embedding_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
3089
1
{
3090
1
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
3091
1
  const float std = sqrtf(2) / sqrtf(self->vocab_size + self->embed_size);
3092
1
  const float bound = sqrtf(3) * std;
3093
1
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->vocab);
3094
1
}
3095
3096
static void _ccv_cnnp_embedding_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
3097
1
{
3098
1
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
3099
1
  add_to_array(parameters, self->vocab, is_trainable);
3100
1
}
3101
3102
static ccv_cnnp_model_t* _ccv_cnnp_embedding_copy(const ccv_cnnp_model_t* const super, void* const context);
3103
3104
static const ccv_cnnp_model_vtab_t ccv_cnnp_embedding_isa = {
3105
  .build = _ccv_cnnp_embedding_build,
3106
  .init_states = _ccv_cnnp_embedding_init_states,
3107
  .add_to_parameter = _ccv_cnnp_embedding_add_to_parameter,
3108
  .copy = _ccv_cnnp_embedding_copy,
3109
};
3110
3111
ccv_cnnp_model_t* ccv_cnnp_embedding(const int datatype, const int vocab_size, const int embed_size, const int is_trainable, const char* const name)
3112
1
{
3113
1
  ccv_cnnp_model_embedding_t* const model_embedding = (ccv_cnnp_model_embedding_t*)cccalloc(1, sizeof(ccv_cnnp_model_embedding_t));
3114
1
  model_embedding->super.isa = &ccv_cnnp_embedding_isa;
3115
1
  model_embedding->super.input_size = 1;
3116
1
  model_embedding->super.outputs = &model_embedding->output;
3117
1
  model_embedding->super.output_size = 1;
3118
1
  model_embedding->super.is_trainable = is_trainable;
3119
1
  ccv_cnnp_model_copy_name(&model_embedding->super, name);
3120
1
  model_embedding->vocab.d = CCV_NNC_NO_TENSOR_SYMBOL;
3121
1
  model_embedding->vocab.graph = 0;
3122
1
  assert(datatype == CCV_32F || datatype == CCV_16F || datatype == CCV_16BF);
3123
1
  model_embedding->datatype = datatype;
3124
1
  assert(vocab_size > 0);
3125
1
  model_embedding->vocab_size = vocab_size;
3126
1
  assert(embed_size > 0);
3127
1
  model_embedding->embed_size = embed_size;
3128
1
  return (ccv_cnnp_model_t*)model_embedding;
3129
1
}
3130
3131
static ccv_cnnp_model_t* _ccv_cnnp_embedding_copy(const ccv_cnnp_model_t* const super, void* const context)
3132
0
{
3133
0
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
3134
0
  return ccv_cnnp_embedding(self->datatype, self->vocab_size, self->embed_size, self->super.is_trainable, self->super.name);
3135
0
}
3136
3137
// MARK - Pool Layers
3138
3139
typedef struct {
3140
  ccv_cnnp_model_t super;
3141
  ccv_nnc_tensor_symbol_t output;
3142
  int type;
3143
  float width_scale;
3144
  float height_scale;
3145
  int align_corners;
3146
} ccv_cnnp_model_upsample_t;
3147
3148
static void _ccv_cnnp_upsample_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3149
3
{
3150
3
  PRINT(CCV_CLI_VERBOSE, "[cnnp_upsample_build] -\n");
3151
3
  assert(input_size == 1);
3152
3
  assert(output_size == 1);
3153
3
  ccv_cnnp_model_upsample_t* const self = (ccv_cnnp_model_upsample_t*)super;
3154
3
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3155
3
  ccv_nnc_cmd_t cmd = CMD_UPSAMPLE_FORWARD(self->type, self->width_scale, self->height_scale, self->align_corners);
3156
3
  ccv_nnc_tensor_param_t output_params;
3157
3
  ccv_nnc_hint_tensor_auto(cmd, &params, 1, ccv_nnc_no_hint, &output_params, 1);
3158
3
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3159
3
  ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(output), "upsample");
3160
3
  outputs[0] = output;
3161
3
}
3162
3163
static ccv_cnnp_model_t* _ccv_cnnp_upsample_copy(const ccv_cnnp_model_t* const super, void* const context);
3164
3165
static const ccv_cnnp_model_vtab_t ccv_cnnp_upsample_isa = {
3166
  .build = _ccv_cnnp_upsample_build,
3167
  .copy = _ccv_cnnp_upsample_copy,
3168
};
3169
3170
ccv_cnnp_model_t* ccv_cnnp_upsample(const int type, const float width_scale, const float height_scale, const int align_corners, const char* const name)
3171
3
{
3172
3
  ccv_cnnp_model_upsample_t* const model_upsample = (ccv_cnnp_model_upsample_t*)cccalloc(1, sizeof(ccv_cnnp_model_upsample_t));
3173
3
  model_upsample->super.isa = &ccv_cnnp_upsample_isa;
3174
3
  model_upsample->super.input_size = 1;
3175
3
  model_upsample->super.outputs = &model_upsample->output;
3176
3
  model_upsample->super.output_size = 1;
3177
3
  ccv_cnnp_model_copy_name(&model_upsample->super, name);
3178
3
  assert(type == CCV_NNC_UPSAMPLE_NEAREST || type == CCV_NNC_UPSAMPLE_BILINEAR);
3179
3
  model_upsample->type = type;
3180
3
  model_upsample->width_scale = width_scale;
3181
3
  model_upsample->height_scale = height_scale;
3182
3
  model_upsample->align_corners = align_corners;
3183
3
  return (ccv_cnnp_model_t*)model_upsample;
3184
3
}
3185
3186
static ccv_cnnp_model_t* _ccv_cnnp_upsample_copy(const ccv_cnnp_model_t* const super, void* const context)
3187
0
{
3188
0
  const ccv_cnnp_model_upsample_t* const self = (const ccv_cnnp_model_upsample_t*)super;
3189
0
  return ccv_cnnp_upsample(self->type, self->width_scale, self->height_scale, self->align_corners, self->super.name);
3190
0
}
3191
3192
// MARK - Reduce Sum Layer
3193
3194
typedef struct {
3195
  ccv_cnnp_model_t super;
3196
  int axis[CCV_NNC_MAX_DIM_ALLOC];
3197
  int count;
3198
  ccv_nnc_tensor_symbol_t output;
3199
} ccv_cnnp_model_reduce_sum_t;
3200
3201
static void _ccv_cnnp_reduce_sum_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3202
1
{
3203
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_sum_build] -\n");
3204
1
  const ccv_cnnp_model_reduce_sum_t* const self = (const ccv_cnnp_model_reduce_sum_t*)super;
3205
1
  assert(input_size == 1);
3206
1
  assert(output_size == 1);
3207
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3208
1
  ccv_nnc_tensor_param_t output_params;
3209
1
  ccv_nnc_cmd_t reduce_sum = CMD_REDUCE_SUM_FORWARD();
3210
1
  int i;
3211
2
  for (i = 0; i < self->count; 
i++1
)
3212
1
    reduce_sum.info.reduce.axis[i] = self->axis[i];
3213
1
  reduce_sum.info.reduce.count = self->count;
3214
1
  ccv_nnc_hint_tensor_auto(reduce_sum, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3215
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3216
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_sum, inputs, input_size, outputs, output_size, "reduce_sum");
3217
1
}
3218
3219
static ccv_cnnp_model_t* _ccv_cnnp_reduce_sum_copy(const ccv_cnnp_model_t* const self, void* const context);
3220
3221
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_sum_isa = {
3222
  .build = _ccv_cnnp_reduce_sum_build,
3223
  .copy = _ccv_cnnp_reduce_sum_copy,
3224
};
3225
3226
ccv_cnnp_model_t* ccv_cnnp_reduce_sum(const int* const axis, const int axis_count, const char* const name)
3227
1
{
3228
1
  ccv_cnnp_model_reduce_sum_t* const model_reduce_sum = (ccv_cnnp_model_reduce_sum_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_sum_t));
3229
1
  model_reduce_sum->super.isa = &ccv_cnnp_reduce_sum_isa;
3230
1
  model_reduce_sum->super.input_size = 1;
3231
1
  model_reduce_sum->super.outputs = &model_reduce_sum->output;
3232
1
  model_reduce_sum->super.output_size = 1;
3233
1
  ccv_cnnp_model_copy_name(&model_reduce_sum->super, name);
3234
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3235
1
  int i;
3236
2
  for (i = 0; i < axis_count; 
i++1
)
3237
1
    model_reduce_sum->axis[i] = axis[i];
3238
1
  model_reduce_sum->count = axis_count;
3239
1
  return (ccv_cnnp_model_t*)model_reduce_sum;
3240
1
}
3241
3242
static ccv_cnnp_model_t* _ccv_cnnp_reduce_sum_copy(const ccv_cnnp_model_t* const super, void* const context)
3243
0
{
3244
0
  const ccv_cnnp_model_reduce_sum_t* const self = (const ccv_cnnp_model_reduce_sum_t*)super;
3245
0
  return ccv_cnnp_reduce_sum(self->axis, self->count, self->super.name);
3246
0
}
3247
3248
// MARK - Reduce Mean Layer
3249
3250
typedef struct {
3251
  ccv_cnnp_model_t super;
3252
  int axis[CCV_NNC_MAX_DIM_ALLOC];
3253
  int count;
3254
  ccv_nnc_tensor_symbol_t output;
3255
} ccv_cnnp_model_reduce_mean_t;
3256
3257
static void _ccv_cnnp_reduce_mean_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3258
1
{
3259
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_mean_build] -\n");
3260
1
  const ccv_cnnp_model_reduce_mean_t* const self = (const ccv_cnnp_model_reduce_mean_t*)super;
3261
1
  assert(input_size == 1);
3262
1
  assert(output_size == 1);
3263
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3264
1
  ccv_nnc_tensor_param_t output_params;
3265
1
  ccv_nnc_cmd_t reduce_mean = CMD_REDUCE_MEAN_FORWARD();
3266
1
  int i;
3267
2
  for (i = 0; i < self->count; 
i++1
)
3268
1
    reduce_mean.info.reduce.axis[i] = self->axis[i];
3269
1
  reduce_mean.info.reduce.count = self->count;
3270
1
  ccv_nnc_hint_tensor_auto(reduce_mean, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3271
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3272
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_mean, inputs, input_size, outputs, output_size, "reduce_mean");
3273
1
}
3274
3275
static ccv_cnnp_model_t* _ccv_cnnp_reduce_mean_copy(const ccv_cnnp_model_t* const self, void* const context);
3276
3277
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_mean_isa = {
3278
  .build = _ccv_cnnp_reduce_mean_build,
3279
  .copy = _ccv_cnnp_reduce_mean_copy,
3280
};
3281
3282
ccv_cnnp_model_t* ccv_cnnp_reduce_mean(const int* const axis, const int axis_count, const char* const name)
3283
1
{
3284
1
  ccv_cnnp_model_reduce_mean_t* const model_reduce_mean = (ccv_cnnp_model_reduce_mean_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_mean_t));
3285
1
  model_reduce_mean->super.isa = &ccv_cnnp_reduce_mean_isa;
3286
1
  model_reduce_mean->super.input_size = 1;
3287
1
  model_reduce_mean->super.outputs = &model_reduce_mean->output;
3288
1
  model_reduce_mean->super.output_size = 1;
3289
1
  ccv_cnnp_model_copy_name(&model_reduce_mean->super, name);
3290
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3291
1
  int i;
3292
2
  for (i = 0; i < axis_count; 
i++1
)
3293
1
    model_reduce_mean->axis[i] = axis[i];
3294
1
  model_reduce_mean->count = axis_count;
3295
1
  return (ccv_cnnp_model_t*)model_reduce_mean;
3296
1
}
3297
3298
static ccv_cnnp_model_t* _ccv_cnnp_reduce_mean_copy(const ccv_cnnp_model_t* const super, void* const context)
3299
0
{
3300
0
  const ccv_cnnp_model_reduce_mean_t* const self = (const ccv_cnnp_model_reduce_mean_t*)super;
3301
0
  return ccv_cnnp_reduce_mean(self->axis, self->count, self->super.name);
3302
0
}
3303
3304
// MARK - Reduce Max Layer
3305
3306
typedef struct {
3307
  ccv_cnnp_model_t super;
3308
  int axis[CCV_NNC_MAX_DIM_ALLOC];
3309
  int count;
3310
  ccv_nnc_tensor_symbol_t output;
3311
} ccv_cnnp_model_reduce_max_t;
3312
3313
static void _ccv_cnnp_reduce_max_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3314
1
{
3315
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_max_build] -\n");
3316
1
  const ccv_cnnp_model_reduce_max_t* const self = (const ccv_cnnp_model_reduce_max_t*)super;
3317
1
  assert(input_size == 1);
3318
1
  assert(output_size == 1);
3319
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3320
1
  ccv_nnc_tensor_param_t output_params;
3321
1
  ccv_nnc_cmd_t reduce_max = CMD_REDUCE_MAX_FORWARD();
3322
1
  int i;
3323
2
  for (i = 0; i < self->count; 
i++1
)
3324
1
    reduce_max.info.reduce.axis[i] = self->axis[i];
3325
1
  reduce_max.info.reduce.count = self->count;
3326
1
  ccv_nnc_hint_tensor_auto(reduce_max, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3327
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3328
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_max, inputs, input_size, outputs, output_size, "reduce_max");
3329
1
}
3330
3331
static ccv_cnnp_model_t* _ccv_cnnp_reduce_max_copy(const ccv_cnnp_model_t* const self, void* const context);
3332
3333
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_max_isa = {
3334
  .build = _ccv_cnnp_reduce_max_build,
3335
  .copy = _ccv_cnnp_reduce_max_copy,
3336
};
3337
3338
ccv_cnnp_model_t* ccv_cnnp_reduce_max(const int* const axis, const int axis_count, const char* const name)
3339
1
{
3340
1
  ccv_cnnp_model_reduce_max_t* const model_reduce_max = (ccv_cnnp_model_reduce_max_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_max_t));
3341
1
  model_reduce_max->super.isa = &ccv_cnnp_reduce_max_isa;
3342
1
  model_reduce_max->super.input_size = 1;
3343
1
  model_reduce_max->super.outputs = &model_reduce_max->output;
3344
1
  model_reduce_max->super.output_size = 1;
3345
1
  ccv_cnnp_model_copy_name(&model_reduce_max->super, name);
3346
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3347
1
  int i;
3348
2
  for (i = 0; i < axis_count; 
i++1
)
3349
1
    model_reduce_max->axis[i] = axis[i];
3350
1
  model_reduce_max->count = axis_count;
3351
1
  return (ccv_cnnp_model_t*)model_reduce_max;
3352
1
}
3353
3354
static ccv_cnnp_model_t* _ccv_cnnp_reduce_max_copy(const ccv_cnnp_model_t* const super, void* const context)
3355
0
{
3356
0
  const ccv_cnnp_model_reduce_max_t* const self = (const ccv_cnnp_model_reduce_max_t*)super;
3357
0
  return ccv_cnnp_reduce_max(self->axis, self->count, self->super.name);
3358
0
}
3359
3360
// MARK - Reduce Min Layer
3361
3362
typedef struct {
3363
  ccv_cnnp_model_t super;
3364
  int axis[CCV_NNC_MAX_DIM_ALLOC];
3365
  int count;
3366
  ccv_nnc_tensor_symbol_t output;
3367
} ccv_cnnp_model_reduce_min_t;
3368
3369
static void _ccv_cnnp_reduce_min_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3370
1
{
3371
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_min_build] -\n");
3372
1
  const ccv_cnnp_model_reduce_min_t* const self = (const ccv_cnnp_model_reduce_min_t*)super;
3373
1
  assert(input_size == 1);
3374
1
  assert(output_size == 1);
3375
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3376
1
  ccv_nnc_tensor_param_t output_params;
3377
1
  ccv_nnc_cmd_t reduce_min = CMD_REDUCE_MIN_FORWARD();
3378
1
  int i;
3379
2
  for (i = 0; i < self->count; 
i++1
)
3380
1
    reduce_min.info.reduce.axis[i] = self->axis[i];
3381
1
  reduce_min.info.reduce.count = self->count;
3382
1
  ccv_nnc_hint_tensor_auto(reduce_min, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3383
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3384
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_min, inputs, input_size, outputs, output_size, "reduce_min");
3385
1
}
3386
3387
static ccv_cnnp_model_t* _ccv_cnnp_reduce_min_copy(const ccv_cnnp_model_t* const self, void* const context);
3388
3389
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_min_isa = {
3390
  .build = _ccv_cnnp_reduce_min_build,
3391
  .copy = _ccv_cnnp_reduce_min_copy,
3392
};
3393
3394
ccv_cnnp_model_t* ccv_cnnp_reduce_min(const int* const axis, const int axis_count, const char* const name)
3395
1
{
3396
1
  ccv_cnnp_model_reduce_min_t* const model_reduce_min = (ccv_cnnp_model_reduce_min_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_min_t));
3397
1
  model_reduce_min->super.isa = &ccv_cnnp_reduce_min_isa;
3398
1
  model_reduce_min->super.input_size = 1;
3399
1
  model_reduce_min->super.outputs = &model_reduce_min->output;
3400
1
  model_reduce_min->super.output_size = 1;
3401
1
  ccv_cnnp_model_copy_name(&model_reduce_min->super, name);
3402
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3403
1
  int i;
3404
2
  for (i = 0; i < axis_count; 
i++1
)
3405
1
    model_reduce_min->axis[i] = axis[i];
3406
1
  model_reduce_min->count = axis_count;
3407
1
  return (ccv_cnnp_model_t*)model_reduce_min;
3408
1
}
3409
3410
static ccv_cnnp_model_t* _ccv_cnnp_reduce_min_copy(const ccv_cnnp_model_t* const super, void* const context)
3411
0
{
3412
0
  const ccv_cnnp_model_reduce_min_t* const self = (const ccv_cnnp_model_reduce_min_t*)super;
3413
0
  return ccv_cnnp_reduce_min(self->axis, self->count, self->super.name);
3414
0
}
3415
3416
// MARK - Reduce Norm2 Layer
3417
3418
typedef struct {
3419
  ccv_cnnp_model_t super;
3420
  int axis[CCV_NNC_MAX_DIM_ALLOC];
3421
  int count;
3422
  ccv_nnc_tensor_symbol_t output;
3423
} ccv_cnnp_model_reduce_norm2_t;
3424
3425
static void _ccv_cnnp_reduce_norm2_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3426
1
{
3427
1
  const ccv_cnnp_model_reduce_norm2_t* const self = (const ccv_cnnp_model_reduce_norm2_t*)super;
3428
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_norm2_build] -\n");
3429
1
  assert(input_size == 1);
3430
1
  assert(output_size == 1);
3431
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3432
1
  ccv_nnc_tensor_param_t output_params;
3433
1
  ccv_nnc_cmd_t reduce_norm2 = CMD_REDUCE_NORM2_FORWARD();
3434
1
  int i;
3435
2
  for (i = 0; i < self->count; 
i++1
)
3436
1
    reduce_norm2.info.reduce.axis[i] = self->axis[i];
3437
1
  reduce_norm2.info.reduce.count = self->count;
3438
1
  ccv_nnc_hint_tensor_auto(reduce_norm2, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3439
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3440
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_norm2, inputs, input_size, outputs, output_size, "reduce_norm2");
3441
1
}
3442
3443
static ccv_cnnp_model_t* _ccv_cnnp_reduce_norm2_copy(const ccv_cnnp_model_t* const self, void* const context);
3444
3445
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_norm2_isa = {
3446
  .build = _ccv_cnnp_reduce_norm2_build,
3447
  .copy = _ccv_cnnp_reduce_norm2_copy,
3448
};
3449
3450
ccv_cnnp_model_t* ccv_cnnp_reduce_norm2(const int* const axis, const int axis_count, const char* const name)
3451
1
{
3452
1
  ccv_cnnp_model_reduce_norm2_t* const model_reduce_norm2 = (ccv_cnnp_model_reduce_norm2_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_norm2_t));
3453
1
  model_reduce_norm2->super.isa = &ccv_cnnp_reduce_norm2_isa;
3454
1
  model_reduce_norm2->super.input_size = 1;
3455
1
  model_reduce_norm2->super.outputs = &model_reduce_norm2->output;
3456
1
  model_reduce_norm2->super.output_size = 1;
3457
1
  ccv_cnnp_model_copy_name(&model_reduce_norm2->super, name);
3458
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3459
1
  int i;
3460
2
  for (i = 0; i < axis_count; 
i++1
)
3461
1
    model_reduce_norm2->axis[i] = axis[i];
3462
1
  model_reduce_norm2->count = axis_count;
3463
1
  return (ccv_cnnp_model_t*)model_reduce_norm2;
3464
1
}
3465
3466
static ccv_cnnp_model_t* _ccv_cnnp_reduce_norm2_copy(const ccv_cnnp_model_t* const super, void* const context)
3467
0
{
3468
0
  const ccv_cnnp_model_reduce_norm2_t* const self = (const ccv_cnnp_model_reduce_norm2_t*)super;
3469
0
  return ccv_cnnp_reduce_norm2(self->axis, self->count, self->super.name);
3470
0
}
3471
3472
// MARK - Argmax Layer
3473
3474
typedef struct {
3475
  ccv_cnnp_model_t super;
3476
  int axis;
3477
  ccv_nnc_tensor_symbol_t output;
3478
} ccv_cnnp_model_argmax_t;
3479
3480
static void _ccv_cnnp_argmax_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3481
1
{
3482
1
  const ccv_cnnp_model_argmax_t* const self = (const ccv_cnnp_model_argmax_t*)super;
3483
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_argmax_build] -\n");
3484
1
  assert(input_size == 1);
3485
1
  assert(output_size == 1);
3486
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3487
1
  ccv_nnc_tensor_param_t output_params;
3488
1
  ccv_nnc_cmd_t argmax = CMD_ARGMAX_FORWARD();
3489
1
  argmax.info.reduce.axis[0] = self->axis;
3490
1
  argmax.info.reduce.count = 1;
3491
1
  ccv_nnc_hint_tensor_auto(argmax, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3492
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3493
1
  ccv_nnc_graph_exec_symbol_new(graph, argmax, inputs, input_size, outputs, output_size, "argmax");
3494
1
}
3495
3496
static ccv_cnnp_model_t* _ccv_cnnp_argmax_copy(const ccv_cnnp_model_t* const self, void* const context);
3497
3498
static const ccv_cnnp_model_vtab_t ccv_cnnp_argmax_isa = {
3499
  .build = _ccv_cnnp_argmax_build,
3500
  .copy = _ccv_cnnp_argmax_copy,
3501
};
3502
3503
ccv_cnnp_model_t* ccv_cnnp_argmax(const int axis, const char* const name)
3504
1
{
3505
1
  ccv_cnnp_model_argmax_t* const model_argmax = (ccv_cnnp_model_argmax_t*)cccalloc(1, sizeof(ccv_cnnp_model_argmax_t));
3506
1
  model_argmax->super.isa = &ccv_cnnp_argmax_isa;
3507
1
  model_argmax->super.input_size = 1;
3508
1
  model_argmax->super.outputs = &model_argmax->output;
3509
1
  model_argmax->super.output_size = 1;
3510
1
  ccv_cnnp_model_copy_name(&model_argmax->super, name);
3511
1
  model_argmax->axis = axis;
3512
1
  return (ccv_cnnp_model_t*)model_argmax;
3513
1
}
3514
3515
static ccv_cnnp_model_t* _ccv_cnnp_argmax_copy(const ccv_cnnp_model_t* const super, void* const context)
3516
0
{
3517
0
  const ccv_cnnp_model_argmax_t* const self = (const ccv_cnnp_model_argmax_t*)super;
3518
0
  return ccv_cnnp_argmax(self->axis, self->super.name);
3519
0
}
3520
3521
// MARK - Argmin Layer
3522
3523
typedef struct {
3524
  ccv_cnnp_model_t super;
3525
  int axis;
3526
  ccv_nnc_tensor_symbol_t output;
3527
} ccv_cnnp_model_argmin_t;
3528
3529
static void _ccv_cnnp_argmin_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3530
1
{
3531
1
  const ccv_cnnp_model_argmin_t* const self = (const ccv_cnnp_model_argmin_t*)super;
3532
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_argmin_build] -\n");
3533
1
  assert(input_size == 1);
3534
1
  assert(output_size == 1);
3535
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3536
1
  ccv_nnc_tensor_param_t output_params;
3537
1
  ccv_nnc_cmd_t argmin = CMD_ARGMIN_FORWARD();
3538
1
  argmin.info.reduce.axis[0] = self->axis;
3539
1
  argmin.info.reduce.count = 1;
3540
1
  ccv_nnc_hint_tensor_auto(argmin, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3541
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3542
1
  ccv_nnc_graph_exec_symbol_new(graph, argmin, inputs, input_size, outputs, output_size, "argmin");
3543
1
}
3544
3545
static ccv_cnnp_model_t* _ccv_cnnp_argmin_copy(const ccv_cnnp_model_t* const self, void* const context);
3546
3547
static const ccv_cnnp_model_vtab_t ccv_cnnp_argmin_isa = {
3548
  .build = _ccv_cnnp_argmin_build,
3549
  .copy = _ccv_cnnp_argmin_copy,
3550
};
3551
3552
ccv_cnnp_model_t* ccv_cnnp_argmin(const int axis, const char* const name)
3553
1
{
3554
1
  ccv_cnnp_model_argmin_t* const model_argmin = (ccv_cnnp_model_argmin_t*)cccalloc(1, sizeof(ccv_cnnp_model_argmin_t));
3555
1
  model_argmin->super.isa = &ccv_cnnp_argmin_isa;
3556
1
  model_argmin->super.input_size = 1;
3557
1
  model_argmin->super.outputs = &model_argmin->output;
3558
1
  model_argmin->super.output_size = 1;
3559
1
  ccv_cnnp_model_copy_name(&model_argmin->super, name);
3560
1
  model_argmin->axis = axis;
3561
1
  return (ccv_cnnp_model_t*)model_argmin;
3562
1
}
3563
3564
static ccv_cnnp_model_t* _ccv_cnnp_argmin_copy(const ccv_cnnp_model_t* const super, void* const context)
3565
0
{
3566
0
  const ccv_cnnp_model_argmin_t* const self = (const ccv_cnnp_model_argmin_t*)super;
3567
0
  return ccv_cnnp_argmin(self->axis, self->super.name);
3568
0
}
3569
3570
// MARK - Min Layer
3571
3572
typedef struct {
3573
  ccv_cnnp_model_t super;
3574
  ccv_nnc_tensor_symbol_t output;
3575
} ccv_cnnp_model_min_t;
3576
3577
static void _ccv_cnnp_min_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3578
1
{
3579
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_min_build] -\n");
3580
1
  assert(input_size == 2);
3581
1
  assert(output_size == 1);
3582
1
  ccv_nnc_tensor_param_t input_params[2];
3583
1
  int i;
3584
3
  for (i = 0; i < 2; 
i++2
)
3585
2
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
3586
1
  ccv_nnc_tensor_param_t output_params;
3587
1
  const ccv_nnc_cmd_t min = CMD_MIN_FORWARD();
3588
1
  ccv_nnc_hint_tensor_auto(min, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
3589
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3590
1
  ccv_nnc_graph_exec_symbol_new(graph, min, inputs, input_size, outputs, output_size, "min");
3591
1
}
3592
3593
static ccv_cnnp_model_t* _ccv_cnnp_min_copy(const ccv_cnnp_model_t* const self, void* const context);
3594
3595
static const ccv_cnnp_model_vtab_t ccv_cnnp_min_isa = {
3596
  .build = _ccv_cnnp_min_build,
3597
  .copy = _ccv_cnnp_min_copy,
3598
};
3599
3600
ccv_cnnp_model_t* ccv_cnnp_min(const char* const name)
3601
1
{
3602
1
  ccv_cnnp_model_min_t* const model_min = (ccv_cnnp_model_min_t*)cccalloc(1, sizeof(ccv_cnnp_model_min_t));
3603
1
  model_min->super.isa = &ccv_cnnp_min_isa;
3604
1
  model_min->super.input_size = 2;
3605
1
  model_min->super.outputs = &model_min->output;
3606
1
  model_min->super.output_size = 1;
3607
1
  ccv_cnnp_model_copy_name(&model_min->super, name);
3608
1
  return (ccv_cnnp_model_t*)model_min;
3609
1
}
3610
3611
static ccv_cnnp_model_t* _ccv_cnnp_min_copy(const ccv_cnnp_model_t* const super, void* const context)
3612
0
{
3613
0
  const ccv_cnnp_model_min_t* const self = (const ccv_cnnp_model_min_t*)super;
3614
0
  return ccv_cnnp_min(self->super.name);
3615
0
}
3616
3617
// MARK - Max Layer
3618
3619
typedef struct {
3620
  ccv_cnnp_model_t super;
3621
  ccv_nnc_tensor_symbol_t output;
3622
} ccv_cnnp_model_max_t;
3623
3624
static void _ccv_cnnp_max_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3625
1
{
3626
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_max_build] -\n");
3627
1
  assert(input_size == 2);
3628
1
  assert(output_size == 1);
3629
1
  ccv_nnc_tensor_param_t input_params[2];
3630
1
  int i;
3631
3
  for (i = 0; i < 2; 
i++2
)
3632
2
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
3633
1
  ccv_nnc_tensor_param_t output_params;
3634
1
  const ccv_nnc_cmd_t max = CMD_MAX_FORWARD();
3635
1
  ccv_nnc_hint_tensor_auto(max, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
3636
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3637
1
  ccv_nnc_graph_exec_symbol_new(graph, max, inputs, input_size, outputs, output_size, "max");
3638
1
}
3639
3640
static ccv_cnnp_model_t* _ccv_cnnp_max_copy(const ccv_cnnp_model_t* const self, void* const context);
3641
3642
static const ccv_cnnp_model_vtab_t ccv_cnnp_max_isa = {
3643
  .build = _ccv_cnnp_max_build,
3644
  .copy = _ccv_cnnp_max_copy,
3645
};
3646
3647
ccv_cnnp_model_t* ccv_cnnp_max(const char* const name)
3648
1
{
3649
1
  ccv_cnnp_model_max_t* const model_max = (ccv_cnnp_model_max_t*)cccalloc(1, sizeof(ccv_cnnp_model_max_t));
3650
1
  model_max->super.isa = &ccv_cnnp_max_isa;
3651
1
  model_max->super.input_size = 2;
3652
1
  model_max->super.outputs = &model_max->output;
3653
1
  model_max->super.output_size = 1;
3654
1
  ccv_cnnp_model_copy_name(&model_max->super, name);
3655
1
  return (ccv_cnnp_model_t*)model_max;
3656
1
}
3657
3658
static ccv_cnnp_model_t* _ccv_cnnp_max_copy(const ccv_cnnp_model_t* const super, void* const context)
3659
0
{
3660
0
  const ccv_cnnp_model_max_t* const self = (const ccv_cnnp_model_max_t*)super;
3661
0
  return ccv_cnnp_max(self->super.name);
3662
0
}
3663
3664
// MARK - LSTM Layer
3665
3666
typedef struct {
3667
  ccv_cnnp_model_t super;
3668
  int masked;
3669
  ccv_nnc_tensor_symbol_t output;
3670
  ccv_nnc_tensor_symbol_t weights;
3671
  ccv_nnc_tensor_symbol_t reserves;
3672
  ccv_nnc_cmd_param_t params;
3673
  ccv_nnc_graph_exec_symbol_t lstm;
3674
} ccv_cnnp_model_lstm_t;
3675
3676
static int _ccv_cnnp_lstm_weight_dim(int bidirectional, int num_layers, int input_size, int hidden_size, int proj_size, int bias)
3677
1
{
3678
1
  const int D = !!bidirectional + 1;
3679
1
  if (hidden_size == proj_size)
3680
1
    return (num_layers * (bias ? 8 : 
00
) + (num_layers - 1) * (hidden_size * 4 * D + hidden_size * 4) + input_size * 4 + hidden_size * 4) * D;
3681
0
  else
3682
0
    return (num_layers * (bias ? 8 : 0) + (num_layers - 1) * (proj_size * 4 * D + proj_size * 4) + (proj_size * 4 + input_size * 4) + num_layers * proj_size) * D;
3683
1
}
3684
3685
static void _ccv_cnnp_lstm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3686
1
{
3687
1
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3688
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_lstm_build] -\n");
3689
1
  assert(input_size == self->super.input_size);
3690
1
  assert(output_size == 1);
3691
1
  const int proj_size = self->params.rnn.proj_size == 0 ? self->params.rnn.hidden_size : 
self->params.rnn.proj_size0
;
3692
1
  ccv_nnc_tensor_param_t input_params[5];
3693
1
  input_params[0]= ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3694
1
  if (input_size == 2)
3695
1
    input_params[1] = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
3696
1
  input_params[4] = input_params[0];
3697
1
  memset(input_params[4].dim, 0, sizeof(input_params[4].dim));
3698
1
  const int x_nd = ccv_nnc_tensor_nd(input_params[0].dim);
3699
1
  const int feature_count = input_params[0].dim[x_nd - 1];
3700
1
  input_params[4].dim[0] = _ccv_cnnp_lstm_weight_dim(self->params.rnn.bidirectional, self->params.rnn.num_layers, feature_count, self->params.rnn.hidden_size, proj_size, self->params.rnn.bias);
3701
1
  input_params[4].dim[1] = self->params.rnn.hidden_size;
3702
1
  const ccv_nnc_cmd_t lstm = ccv_nnc_cmd(CCV_NNC_LSTM_FORWARD, 0, self->params, 0);
3703
1
  ccv_nnc_tensor_param_t output_params[4];
3704
1
  ccv_nnc_hint_tensor_auto(lstm, input_params, 5, ccv_nnc_no_hint, output_params, 4);
3705
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
3706
1
  if (!self->weights.graph)
3707
1
    self->weights = ccv_nnc_tensor_symbol_new(graph, input_params[4], "weights");
3708
1
  if (!self->reserves.graph)
3709
1
    self->reserves = ccv_nnc_tensor_symbol_new(graph, output_params[3], "reserves");
3710
1
  const ccv_nnc_tensor_symbol_t mask = input_size == 2 ? inputs[1] : 
NO_TENSOR_SYMBOL0
;
3711
1
  self->lstm = ccv_nnc_graph_exec_symbol_new(graph, lstm, TENSOR_SYMBOL_LIST(inputs[0], mask, NO_TENSOR_SYMBOL, NO_TENSOR_SYMBOL, self->weights), TENSOR_SYMBOL_LIST(outputs[0], NO_TENSOR_SYMBOL, NO_TENSOR_SYMBOL, self->reserves), "lstm");
3712
1
}
3713
3714
static void _ccv_cnnp_lstm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
3715
1
{
3716
1
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3717
1
  if (self->weights.graph)
3718
1
  {
3719
1
    const float stdv = 1.0 / sqrt(self->params.rnn.hidden_size);
3720
1
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-stdv, stdv), ccv_nnc_no_hint, 0, 0, self->weights);
3721
1
  }
3722
1
}
3723
3724
static void _ccv_cnnp_lstm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
3725
1
{
3726
1
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3727
1
  if (self->weights.graph)
3728
1
    add_to_array(parameters, self->weights, is_trainable);
3729
1
}
3730
3731
static void _ccv_cnnp_lstm_set_is_test(ccv_cnnp_model_t* const super, const int is_test, const ccv_cnnp_cmd_updater_f updater, void* const context)
3732
2
{
3733
2
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3734
2
  if (self->lstm.graph)
3735
2
  {
3736
2
    self->params.rnn.is_test = is_test;
3737
2
    updater(context, self->lstm, ccv_nnc_cmd(CCV_NNC_LSTM_FORWARD, 0, self->params, 0), ccv_nnc_no_hint);
3738
2
  }
3739
2
}
3740
3741
static ccv_cnnp_model_t* _ccv_cnnp_lstm_copy(const ccv_cnnp_model_t* const self, void* const context);
3742
3743
static const ccv_cnnp_model_vtab_t ccv_cnnp_lstm_isa = {
3744
  .build = _ccv_cnnp_lstm_build,
3745
  .init_states = _ccv_cnnp_lstm_init_states,
3746
  .add_to_parameter = _ccv_cnnp_lstm_add_to_parameter,
3747
  .copy = _ccv_cnnp_lstm_copy,
3748
  .set_is_test = _ccv_cnnp_lstm_set_is_test,
3749
};
3750
3751
ccv_cnnp_model_t* ccv_cnnp_lstm(const int masked, const int hidden_size, const int proj_size, const int num_layers, const int bias, const int batch_first, const int bidirectional, const float dropout, const int is_trainable, const char* const name)
3752
1
{
3753
1
  ccv_cnnp_model_lstm_t* const model_lstm = (ccv_cnnp_model_lstm_t*)cccalloc(1, sizeof(ccv_cnnp_model_lstm_t));
3754
1
  model_lstm->super.isa = &ccv_cnnp_lstm_isa;
3755
1
  model_lstm->super.input_size = masked ? 2 : 
10
;
3756
1
  model_lstm->super.outputs = &model_lstm->output;
3757
1
  model_lstm->super.output_size = 1;
3758
1
  model_lstm->super.is_trainable = is_trainable;
3759
1
  ccv_cnnp_model_copy_name(&model_lstm->super, name);
3760
1
  model_lstm->masked = masked;
3761
1
  model_lstm->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
3762
1
  model_lstm->weights.graph = 0;
3763
1
  model_lstm->params.rnn.hidden_size = hidden_size;
3764
1
  model_lstm->params.rnn.proj_size = proj_size;
3765
1
  model_lstm->params.rnn.num_layers = num_layers;
3766
1
  model_lstm->params.rnn.bias = bias;
3767
1
  model_lstm->params.rnn.batch_first = batch_first;
3768
1
  model_lstm->params.rnn.bidirectional = bidirectional;
3769
1
  model_lstm->params.rnn.dropout = dropout;
3770
1
  return (ccv_cnnp_model_t*)model_lstm;
3771
1
}
3772
3773
static ccv_cnnp_model_t* _ccv_cnnp_lstm_copy(const ccv_cnnp_model_t* const super, void* const context)
3774
0
{
3775
0
  const ccv_cnnp_model_lstm_t* const self = (const ccv_cnnp_model_lstm_t*)super;
3776
0
  return ccv_cnnp_lstm(self->masked, self->params.rnn.hidden_size, self->params.rnn.proj_size, self->params.rnn.num_layers, self->params.rnn.bias, self->params.rnn.batch_first, self->params.rnn.bidirectional, self->params.rnn.dropout, self->super.is_trainable, self->super.name);
3777
0
}
3778
3779
/// MARK - Datatype conversion layer.
3780
3781
typedef struct {
3782
  ccv_cnnp_model_t super;
3783
  ccv_nnc_tensor_symbol_t output;
3784
  int datatype;
3785
  int ref_to_last;
3786
} ccv_cnnp_model_datatype_conversion_t;
3787
3788
static void _ccv_cnnp_datatype_conversion_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3789
2
{
3790
2
  ccv_cnnp_model_datatype_conversion_t* const self = (ccv_cnnp_model_datatype_conversion_t*)super;
3791
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_datatype_conversion_build] -\n");
3792
2
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3793
2
  if (self->ref_to_last)
3794
1
  {
3795
1
    assert(input_size > 1);
3796
1
    const ccv_nnc_tensor_param_t last_params = ccv_nnc_tensor_symbol_params(graph, inputs[input_size - 1]);
3797
1
    params.datatype = last_params.datatype;
3798
1
  } else
3799
1
    params.datatype = self->datatype;
3800
2
  assert(output_size == 1);
3801
2
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3802
2
  ccv_nnc_graph_exec_symbol_new(graph, CMD_DATATYPE_CONVERSION_FORWARD(), inputs, output_size /* intentional */, outputs, output_size, 0);
3803
2
}
3804
3805
static ccv_cnnp_model_t* _ccv_cnnp_datatype_conversion_copy(const ccv_cnnp_model_t* const self, void* const context);
3806
3807
static const ccv_cnnp_model_vtab_t ccv_cnnp_datatype_conversion_isa = {
3808
  .build = _ccv_cnnp_datatype_conversion_build,
3809
  .copy = _ccv_cnnp_datatype_conversion_copy,
3810
};
3811
3812
ccv_cnnp_model_t* ccv_cnnp_datatype_conversion(const int datatype, const int ref_to_last, const char* const name)
3813
2
{
3814
2
  ccv_cnnp_model_datatype_conversion_t* const model_datatype_conversion = (ccv_cnnp_model_datatype_conversion_t*)cccalloc(1, sizeof(ccv_cnnp_model_datatype_conversion_t));
3815
2
  model_datatype_conversion->super.isa = &ccv_cnnp_datatype_conversion_isa;
3816
2
  model_datatype_conversion->super.input_size = 0;
3817
2
  model_datatype_conversion->super.outputs = &model_datatype_conversion->output;
3818
2
  model_datatype_conversion->super.output_size = 1;
3819
2
  model_datatype_conversion->datatype = datatype;
3820
2
  model_datatype_conversion->ref_to_last = ref_to_last;
3821
2
  ccv_cnnp_model_copy_name(&model_datatype_conversion->super, name);
3822
2
  return (ccv_cnnp_model_t*)model_datatype_conversion;
3823
2
}
3824
3825
static ccv_cnnp_model_t* _ccv_cnnp_datatype_conversion_copy(const ccv_cnnp_model_t* const super, void* const context)
3826
0
{
3827
0
  ccv_cnnp_model_datatype_conversion_t* const self = (ccv_cnnp_model_datatype_conversion_t*)super;
3828
0
  return ccv_cnnp_datatype_conversion(self->datatype, self->ref_to_last, self->super.name);
3829
0
}
3830
3831
/// MARK - Clamp layer.
3832
3833
typedef struct {
3834
  ccv_cnnp_model_t super;
3835
  ccv_nnc_tensor_symbol_t output;
3836
  float min;
3837
  float max;
3838
} ccv_cnnp_model_clamp_t;
3839
3840
static void _ccv_cnnp_clamp_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3841
0
{
3842
0
  ccv_cnnp_model_clamp_t* const self = (ccv_cnnp_model_clamp_t*)super;
3843
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_clamp_build] -\n");
3844
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3845
0
  assert(output_size == 1);
3846
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3847
0
  ccv_nnc_graph_exec_symbol_new(graph, CMD_CLAMP_FORWARD(self->min, self->max), inputs, output_size /* intentional */, outputs, output_size, 0);
3848
0
}
3849
3850
static ccv_cnnp_model_t* _ccv_cnnp_clamp_copy(const ccv_cnnp_model_t* const self, void* const context);
3851
3852
static const ccv_cnnp_model_vtab_t ccv_cnnp_clamp_isa = {
3853
  .build = _ccv_cnnp_clamp_build,
3854
  .copy = _ccv_cnnp_clamp_copy,
3855
};
3856
3857
ccv_cnnp_model_t* ccv_cnnp_clamp(const float min, const float max, const char* const name)
3858
0
{
3859
0
  ccv_cnnp_model_clamp_t* const model_clamp = (ccv_cnnp_model_clamp_t*)cccalloc(1, sizeof(ccv_cnnp_model_clamp_t));
3860
0
  model_clamp->super.isa = &ccv_cnnp_clamp_isa;
3861
0
  model_clamp->super.input_size = 0;
3862
0
  model_clamp->super.outputs = &model_clamp->output;
3863
0
  model_clamp->super.output_size = 1;
3864
0
  model_clamp->min = min;
3865
0
  model_clamp->max = max;
3866
0
  ccv_cnnp_model_copy_name(&model_clamp->super, name);
3867
0
  return (ccv_cnnp_model_t*)model_clamp;
3868
0
}
3869
3870
static ccv_cnnp_model_t* _ccv_cnnp_clamp_copy(const ccv_cnnp_model_t* const super, void* const context)
3871
0
{
3872
0
  ccv_cnnp_model_clamp_t* const self = (ccv_cnnp_model_clamp_t*)super;
3873
0
  return ccv_cnnp_clamp(self->min, self->max, self->super.name);
3874
0
}
3875
3876
// MARK - Parameter Layer
3877
3878
typedef struct {
3879
  ccv_cnnp_model_t super;
3880
  float init_bound;
3881
  ccv_nnc_tensor_symbol_t weights;
3882
  ccv_nnc_tensor_param_t weights_params;
3883
  ccv_nnc_tensor_symbol_t output;
3884
} ccv_cnnp_model_parameter_t;
3885
3886
static void _ccv_cnnp_parameter_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3887
1
{
3888
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_parameter_build] -\n");
3889
1
  assert(output_size == 1);
3890
1
  ccv_cnnp_model_parameter_t* const self = (ccv_cnnp_model_parameter_t*)super;
3891
1
  if (!self->weights.graph)
3892
1
    self->weights = ccv_nnc_tensor_symbol_new(graph, self->weights_params, "weights");
3893
1
  assert(self->weights.graph == graph);
3894
1
  outputs[0] = self->weights;
3895
1
}
3896
3897
static void _ccv_cnnp_parameter_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
3898
0
{
3899
0
  ccv_cnnp_model_parameter_t* const self = (ccv_cnnp_model_parameter_t*)super;
3900
0
  if (self->init_bound > 0)
3901
0
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-self->init_bound, self->init_bound), ccv_nnc_no_hint, 0, 0, self->weights);
3902
0
  else
3903
0
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->weights);
3904
0
}
3905
3906
static void _ccv_cnnp_parameter_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
3907
1
{
3908
1
  ccv_cnnp_model_parameter_t* const self = (ccv_cnnp_model_parameter_t*)super;
3909
1
  add_to_array(parameters, self->weights, is_trainable);
3910
1
}
3911
3912
static ccv_cnnp_model_t* _ccv_cnnp_parameter_copy(const ccv_cnnp_model_t* const super, void* const context);
3913
3914
static const ccv_cnnp_model_vtab_t ccv_cnnp_parameter_isa = {
3915
  .build = _ccv_cnnp_parameter_build,
3916
  .init_states = _ccv_cnnp_parameter_init_states,
3917
  .add_to_parameter = _ccv_cnnp_parameter_add_to_parameter,
3918
  .copy = _ccv_cnnp_parameter_copy,
3919
};
3920
3921
ccv_cnnp_model_t* ccv_cnnp_parameter(const ccv_nnc_tensor_param_t params, const float init_bound, const int is_trainable, const char* const name)
3922
1
{
3923
1
  ccv_cnnp_model_parameter_t* const model_parameter = (ccv_cnnp_model_parameter_t*)cccalloc(1, sizeof(ccv_cnnp_model_parameter_t));
3924
1
  model_parameter->super.isa = &ccv_cnnp_parameter_isa;
3925
1
  model_parameter->super.input_size = 0;
3926
1
  model_parameter->super.outputs = &model_parameter->output;
3927
1
  model_parameter->super.output_size = 1;
3928
1
  model_parameter->super.is_trainable = is_trainable;
3929
1
  ccv_cnnp_model_copy_name(&model_parameter->super, name);
3930
1
  model_parameter->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
3931
1
  model_parameter->weights.graph = 0;
3932
1
  model_parameter->weights_params = params;
3933
1
  return (ccv_cnnp_model_t*)model_parameter;
3934
1
}
3935
3936
static ccv_cnnp_model_t* _ccv_cnnp_parameter_copy(const ccv_cnnp_model_t* const super, void* const context)
3937
0
{
3938
0
  const ccv_cnnp_model_parameter_t* const self = (const ccv_cnnp_model_parameter_t*)super;
3939
0
  return ccv_cnnp_parameter(self->weights_params, self->init_bound, self->super.is_trainable, self->super.name);
3940
0
}
3941
3942
// MARK - Scalar Layer
3943
3944
typedef struct {
3945
  ccv_cnnp_model_t super;
3946
  int type;
3947
  int format;
3948
  int datatype;
3949
  float value;
3950
  ccv_nnc_tensor_symbol_t output;
3951
} ccv_cnnp_model_scalar_t;
3952
3953
static void _ccv_cnnp_scalar_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3954
2
{
3955
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_scalar_build] -\n");
3956
2
  assert(output_size == 1);
3957
2
  ccv_cnnp_model_scalar_t* const self = (ccv_cnnp_model_scalar_t*)super;
3958
2
  ccv_nnc_tensor_param_t params = {
3959
2
    .type = self->type,
3960
2
    .format = self->format,
3961
2
    .datatype = self->datatype,
3962
2
    .dim = {
3963
2
      1
3964
2
    }
3965
2
  };
3966
2
  if (input_size > 0)
3967
1
  {
3968
1
    ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3969
1
    params.type = input_params.type;
3970
1
    params.format = input_params.format;
3971
1
    params.datatype = input_params.datatype;
3972
1
  }
3973
2
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3974
2
  ccv_nnc_graph_exec_symbol_new(graph, CMD_SET_FORWARD(self->value), 0, 0, outputs, 1, 0);
3975
2
}
3976
3977
static ccv_cnnp_model_t* _ccv_cnnp_scalar_copy(const ccv_cnnp_model_t* const super, void* const context);
3978
3979
static const ccv_cnnp_model_vtab_t ccv_cnnp_scalar_isa = {
3980
  .build = _ccv_cnnp_scalar_build,
3981
  .copy = _ccv_cnnp_scalar_copy,
3982
};
3983
3984
ccv_cnnp_model_t* ccv_cnnp_scalar(const int type, const int format, const int datatype, const float value, const char* const name)
3985
2
{
3986
2
  ccv_cnnp_model_scalar_t* const model_scalar = (ccv_cnnp_model_scalar_t*)cccalloc(1, sizeof(ccv_cnnp_model_scalar_t));
3987
2
  model_scalar->super.isa = &ccv_cnnp_scalar_isa;
3988
2
  model_scalar->super.input_size = 0;
3989
2
  model_scalar->super.outputs = &model_scalar->output;
3990
2
  model_scalar->super.output_size = 1;
3991
2
  ccv_cnnp_model_copy_name(&model_scalar->super, name);
3992
2
  model_scalar->type = type;
3993
2
  model_scalar->format = format;
3994
2
  model_scalar->datatype = datatype;
3995
2
  model_scalar->value = value;
3996
2
  return (ccv_cnnp_model_t*)model_scalar;
3997
2
}
3998
3999
static ccv_cnnp_model_t* _ccv_cnnp_scalar_copy(const ccv_cnnp_model_t* const super, void* const context)
4000
0
{
4001
0
  const ccv_cnnp_model_scalar_t* const self = (const ccv_cnnp_model_scalar_t*)super;
4002
0
  return ccv_cnnp_scalar(self->type, self->format, self->datatype, self->value, self->super.name);
4003
0
}
4004
4005
// MARK - Variable Layer
4006
4007
typedef struct {
4008
  ccv_cnnp_model_t super;
4009
  ccv_nnc_tensor_param_t params;
4010
  ccv_nnc_tensor_symbol_t output;
4011
} ccv_cnnp_model_variable_t;
4012
4013
static void _ccv_cnnp_variable_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4014
1
{
4015
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_variable_build] -\n");
4016
1
  assert(input_size == 0);
4017
1
  assert(output_size == 1);
4018
1
  ccv_cnnp_model_variable_t* const self = (ccv_cnnp_model_variable_t*)super;
4019
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, self->params, 0);
4020
1
}
4021
4022
static ccv_cnnp_model_t* _ccv_cnnp_variable_copy(const ccv_cnnp_model_t* const super, void* const context);
4023
4024
static const ccv_cnnp_model_vtab_t ccv_cnnp_variable_isa = {
4025
  .build = _ccv_cnnp_variable_build,
4026
  .copy = _ccv_cnnp_variable_copy,
4027
};
4028
4029
ccv_cnnp_model_t* ccv_cnnp_variable(const ccv_nnc_tensor_param_t params, const char* const name)
4030
1
{
4031
1
  ccv_cnnp_model_variable_t* const model_variable = (ccv_cnnp_model_variable_t*)cccalloc(1, sizeof(ccv_cnnp_model_variable_t));
4032
1
  model_variable->super.isa = &ccv_cnnp_variable_isa;
4033
1
  model_variable->super.input_size = 0;
4034
1
  model_variable->super.outputs = &model_variable->output;
4035
1
  model_variable->super.output_size = 1;
4036
1
  ccv_cnnp_model_copy_name(&model_variable->super, name);
4037
1
  model_variable->params = params;
4038
1
  return (ccv_cnnp_model_t*)model_variable;
4039
1
}
4040
4041
static ccv_cnnp_model_t* _ccv_cnnp_variable_copy(const ccv_cnnp_model_t* const super, void* const context)
4042
0
{
4043
0
  const ccv_cnnp_model_variable_t* const self = (const ccv_cnnp_model_variable_t*)super;
4044
0
  return ccv_cnnp_variable(self->params, self->super.name);
4045
0
}
4046
4047
// MARK - Move Layer
4048
4049
typedef struct {
4050
  ccv_cnnp_model_t super;
4051
  ccv_nnc_tensor_symbol_t output;
4052
} ccv_cnnp_model_move_t;
4053
4054
static void _ccv_cnnp_move_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4055
3
{
4056
3
  PRINT(CCV_CLI_VERBOSE, "[cnnp_move_build] -\n");
4057
3
  assert(input_size == 2);
4058
3
  assert(output_size == 1);
4059
3
  outputs[0] = inputs[1];
4060
3
  ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, 1, outputs, 1, "move");
4061
3
}
4062
4063
static ccv_cnnp_model_t* _ccv_cnnp_move_copy(const ccv_cnnp_model_t* const super, void* const context);
4064
4065
static const ccv_cnnp_model_vtab_t ccv_cnnp_move_isa = {
4066
  .build = _ccv_cnnp_move_build,
4067
  .copy = _ccv_cnnp_move_copy,
4068
};
4069
4070
ccv_cnnp_model_t* ccv_cnnp_move(const char* const name)
4071
3
{
4072
3
  ccv_cnnp_model_move_t* const model_move = (ccv_cnnp_model_move_t*)cccalloc(1, sizeof(ccv_cnnp_model_move_t));
4073
3
  model_move->super.isa = &ccv_cnnp_move_isa;
4074
3
  model_move->super.input_size = 2;
4075
3
  model_move->super.outputs = &model_move->output;
4076
3
  model_move->super.output_size = 1;
4077
3
  ccv_cnnp_model_copy_name(&model_move->super, name);
4078
3
  return (ccv_cnnp_model_t*)model_move;
4079
3
}
4080
4081
static ccv_cnnp_model_t* _ccv_cnnp_move_copy(const ccv_cnnp_model_t* const super, void* const context)
4082
0
{
4083
0
  const ccv_cnnp_model_move_t* const self = (const ccv_cnnp_model_move_t*)super;
4084
0
  return ccv_cnnp_move(self->super.name);
4085
0
}
4086
4087
// MARK - "Making" Contiguous Layer
4088
4089
typedef struct {
4090
  ccv_cnnp_model_t super;
4091
  ccv_nnc_tensor_symbol_t output;
4092
} ccv_cnnp_model_contiguous_t;
4093
4094
static void _ccv_cnnp_contiguous_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4095
5
{
4096
5
  PRINT(CCV_CLI_VERBOSE, "[cnnp_contiguous_build] -\n");
4097
5
  assert(input_size == 1);
4098
5
  assert(output_size == 1);
4099
5
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4100
5
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
4101
5
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
4102
0
  {
4103
0
    outputs[0] = inputs[0];
4104
0
    return;
4105
0
  }
4106
  // Otherwise, we need to check its stride to know if it is contiguous.
4107
5
  int old_stride[CCV_NNC_MAX_DIM_ALLOC];
4108
5
  ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], 0, old_stride);
4109
  // We identify permute by checking if the stride is not in descending order.
4110
  // This also covered "permute" through reshape, rather than using ccv_cnnp_permute directly.
4111
5
  if (ccv_nnc_is_tensor_stride_packed(old_stride, params.dim))
4112
2
  {
4113
2
    outputs[0] = inputs[0];
4114
2
    return;
4115
2
  }
4116
3
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4117
3
  ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, 1, outputs, 1, "contiguous");
4118
3
  ccv_nnc_graph_exec_symbol_set_flags(graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
4119
3
}
4120
4121
static ccv_cnnp_model_t* _ccv_cnnp_contiguous_copy(const ccv_cnnp_model_t* const super, void* const context);
4122
4123
static const ccv_cnnp_model_vtab_t ccv_cnnp_contiguous_isa = {
4124
  .build = _ccv_cnnp_contiguous_build,
4125
  .copy = _ccv_cnnp_contiguous_copy,
4126
};
4127
4128
ccv_cnnp_model_t* ccv_cnnp_contiguous(const char* const name)
4129
5
{
4130
5
  ccv_cnnp_model_contiguous_t* const model_contiguous = (ccv_cnnp_model_contiguous_t*)cccalloc(1, sizeof(ccv_cnnp_model_contiguous_t));
4131
5
  model_contiguous->super.isa = &ccv_cnnp_contiguous_isa;
4132
5
  model_contiguous->super.input_size = 1;
4133
5
  model_contiguous->super.outputs = &model_contiguous->output;
4134
5
  model_contiguous->super.output_size = 1;
4135
5
  ccv_cnnp_model_copy_name(&model_contiguous->super, name);
4136
5
  return (ccv_cnnp_model_t*)model_contiguous;
4137
5
}
4138
4139
static ccv_cnnp_model_t* _ccv_cnnp_contiguous_copy(const ccv_cnnp_model_t* const super, void* const context)
4140
0
{
4141
0
  const ccv_cnnp_model_contiguous_t* const self = (const ccv_cnnp_model_contiguous_t*)super;
4142
0
  return ccv_cnnp_contiguous(self->super.name);
4143
0
}
4144
4145
// MARK - "Making" Copy Layer
4146
4147
typedef struct {
4148
  ccv_cnnp_model_t super;
4149
  ccv_nnc_tensor_symbol_t output;
4150
} ccv_cnnp_model_copy_t;
4151
4152
static void _ccv_cnnp_copy_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4153
0
{
4154
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_copy_build] -\n");
4155
0
  assert(input_size == 1);
4156
0
  assert(output_size == 1);
4157
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4158
0
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
4159
0
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
4160
0
  {
4161
0
    outputs[0] = inputs[0];
4162
0
    return;
4163
0
  }
4164
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4165
0
  ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, 1, outputs, 1, "contiguous");
4166
0
  ccv_nnc_graph_exec_symbol_set_flags(graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
4167
0
}
4168
4169
static ccv_cnnp_model_t* _ccv_cnnp_copy_copy(const ccv_cnnp_model_t* const super, void* const context);
4170
4171
static const ccv_cnnp_model_vtab_t ccv_cnnp_copy_isa = {
4172
  .build = _ccv_cnnp_copy_build,
4173
  .copy = _ccv_cnnp_copy_copy,
4174
};
4175
4176
ccv_cnnp_model_t* ccv_cnnp_copy(const char* const name)
4177
0
{
4178
0
  ccv_cnnp_model_copy_t* const model_copy = (ccv_cnnp_model_copy_t*)cccalloc(1, sizeof(ccv_cnnp_model_copy_t));
4179
0
  model_copy->super.isa = &ccv_cnnp_copy_isa;
4180
0
  model_copy->super.input_size = 1;
4181
0
  model_copy->super.outputs = &model_copy->output;
4182
0
  model_copy->super.output_size = 1;
4183
0
  ccv_cnnp_model_copy_name(&model_copy->super, name);
4184
0
  return (ccv_cnnp_model_t*)model_copy;
4185
0
}
4186
4187
static ccv_cnnp_model_t* _ccv_cnnp_copy_copy(const ccv_cnnp_model_t* const super, void* const context)
4188
0
{
4189
0
  const ccv_cnnp_model_copy_t* const self = (const ccv_cnnp_model_copy_t*)super;
4190
0
  return ccv_cnnp_copy(self->super.name);
4191
0
}
4192
4193
// MARK - Scaled-Dot Product Attention Layer
4194
4195
typedef struct {
4196
  ccv_cnnp_model_t super;
4197
  ccv_nnc_tensor_symbol_t output;
4198
  ccv_nnc_tensor_symbol_t weights;
4199
  ccv_nnc_tensor_symbol_t bias;
4200
  float scale;
4201
  int is_causal;
4202
  int has_attn_mask;
4203
  int flags;
4204
  int fused_unify_head_weights;
4205
  int no_bias;
4206
} ccv_cnnp_model_scaled_dot_product_attention_t;
4207
4208
static void _ccv_cnnp_scaled_dot_product_attention_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4209
3
{
4210
3
  PRINT(CCV_CLI_VERBOSE, "[cnnp_scaled_dot_product_attention_build] -\n");
4211
3
  assert(input_size == 3 || input_size == 4);
4212
3
  assert(output_size == 1);
4213
3
  ccv_cnnp_model_scaled_dot_product_attention_t* const self = (ccv_cnnp_model_scaled_dot_product_attention_t*)super;
4214
3
  const ccv_nnc_tensor_param_t q_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4215
3
  const ccv_nnc_tensor_param_t k_params = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
4216
3
  const ccv_nnc_tensor_param_t v_params = ccv_nnc_tensor_symbol_params(graph, inputs[2]);
4217
3
  const int v_nd = ccv_nnc_tensor_nd(v_params.dim);
4218
3
  assert(v_nd == 3 || v_nd == 4);
4219
3
  const int hEv = (v_nd == 3 ? 
10
: v_params.dim[2]) * v_params.dim[v_nd - 1];
4220
3
  ccv_nnc_tensor_param_t weights_params = q_params;
4221
3
  memset(weights_params.dim, 0, sizeof(weights_params.dim));
4222
3
  weights_params.dim[0] = hEv;
4223
3
  weights_params.dim[1] = hEv;
4224
3
  ccv_nnc_tensor_param_t bias_params = q_params;
4225
3
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
4226
3
  bias_params.dim[0] = hEv;
4227
3
  ccv_nnc_cmd_t cmd = {0};
4228
3
  cmd.cmd = CCV_NNC_SCALED_DOT_PRODUCT_ATTENTION_FORWARD;
4229
3
  cmd.info.scaled_dot_product_attention.scale = self->scale;
4230
3
  cmd.info.scaled_dot_product_attention.is_causal = self->is_causal;
4231
3
  cmd.info.scaled_dot_product_attention.flags = self->flags;
4232
3
  ccv_nnc_tensor_param_t output_params[3];
4233
3
  ccv_nnc_tensor_symbol_t output;
4234
3
  ccv_nnc_tensor_symbol_t saved_softmax_lse;
4235
3
  ccv_nnc_tensor_symbol_t saved_v_proj = NO_TENSOR_SYMBOL;
4236
3
  ccv_nnc_tensor_symbol_t attn_mask = NO_TENSOR_SYMBOL;
4237
3
  ccv_nnc_tensor_symbol_t weights = NO_TENSOR_SYMBOL;
4238
3
  ccv_nnc_tensor_symbol_t bias = NO_TENSOR_SYMBOL;
4239
3
  if (self->has_attn_mask)
4240
1
    attn_mask = inputs[3];
4241
3
  if (self->fused_unify_head_weights)
4242
1
  {
4243
1
    if (!self->weights.graph)
4244
1
      self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
4245
1
    weights = self->weights;
4246
1
    if (!self->no_bias)
4247
1
    {
4248
1
      if (!self->bias.graph)
4249
1
        self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
4250
1
      bias = self->bias;
4251
1
    }
4252
1
    ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
4253
1
        q_params,
4254
1
        k_params,
4255
1
        v_params,
4256
1
        (ccv_nnc_tensor_param_t){},
4257
1
        weights_params,
4258
1
        bias_params,
4259
1
      }, 6, ccv_nnc_no_hint, output_params, 3);
4260
1
    output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
4261
1
    saved_softmax_lse = ccv_nnc_tensor_symbol_new(graph, output_params[1], 0);
4262
1
    saved_v_proj = ccv_nnc_tensor_symbol_new(graph, output_params[2], 0);
4263
2
  } else {
4264
2
    ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
4265
2
        q_params,
4266
2
        k_params,
4267
2
        v_params,
4268
2
      }, 3, ccv_nnc_no_hint, output_params, 2);
4269
2
    output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
4270
2
    saved_softmax_lse = ccv_nnc_tensor_symbol_new(graph, output_params[1], 0);
4271
2
  }
4272
3
  ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], inputs[1], inputs[2], attn_mask, weights, bias), TENSOR_SYMBOL_LIST(output, saved_softmax_lse, saved_v_proj), "scaled_dot_product_attention");
4273
3
  outputs[0] = output;
4274
3
}
4275
4276
static void _ccv_cnnp_scaled_dot_product_attention_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
4277
0
{
4278
0
  ccv_cnnp_model_scaled_dot_product_attention_t* const self = (ccv_cnnp_model_scaled_dot_product_attention_t*)super;
4279
0
  if (self->weights.graph)
4280
0
  {
4281
0
    assert(self->fused_unify_head_weights);
4282
0
    const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
4283
0
    const int c = weight_params.dim[1];
4284
0
    const float std = sqrtf(2) / sqrtf(c);
4285
0
    const float bound = sqrtf(3) * std;
4286
0
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
4287
0
    if (self->bias.graph)
4288
0
      initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
4289
0
  }
4290
0
}
4291
4292
static void _ccv_cnnp_scaled_dot_product_attention_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
4293
1
{
4294
1
  ccv_cnnp_model_scaled_dot_product_attention_t* const self = (ccv_cnnp_model_scaled_dot_product_attention_t*)super;
4295
1
  if (self->weights.graph)
4296
1
  {
4297
1
    assert(self->fused_unify_head_weights);
4298
1
    add_to_array(parameters, self->weights, is_trainable);
4299
1
    if (self->bias.graph)
4300
1
      add_to_array(parameters, self->bias, is_trainable);
4301
1
  }
4302
1
}
4303
4304
static ccv_cnnp_model_t* _ccv_cnnp_scaled_dot_product_attention_copy(const ccv_cnnp_model_t* const super, void* const context);
4305
4306
static const ccv_cnnp_model_vtab_t ccv_cnnp_scaled_dot_product_attention_isa = {
4307
  .build = _ccv_cnnp_scaled_dot_product_attention_build,
4308
  .copy = _ccv_cnnp_scaled_dot_product_attention_copy,
4309
};
4310
4311
static const ccv_cnnp_model_vtab_t ccv_cnnp_scaled_dot_product_attention_fused_isa = {
4312
  .build = _ccv_cnnp_scaled_dot_product_attention_build,
4313
  .init_states = _ccv_cnnp_scaled_dot_product_attention_init_states,
4314
  .add_to_parameter = _ccv_cnnp_scaled_dot_product_attention_add_to_parameter,
4315
  .copy = _ccv_cnnp_scaled_dot_product_attention_copy,
4316
};
4317
4318
ccv_cnnp_model_t* ccv_cnnp_scaled_dot_product_attention(const float scale, const int is_causal, const int has_attn_mask, const int flags, const int fused_unify_head_weights, const int no_bias, const int is_trainable, const char* const name)
4319
3
{
4320
3
  ccv_cnnp_model_scaled_dot_product_attention_t* const model_scaled_dot_product_attention = (ccv_cnnp_model_scaled_dot_product_attention_t*)cccalloc(1, sizeof(ccv_cnnp_model_scaled_dot_product_attention_t));
4321
3
  model_scaled_dot_product_attention->super.isa = fused_unify_head_weights ? 
&ccv_cnnp_scaled_dot_product_attention_fused_isa1
:
&ccv_cnnp_scaled_dot_product_attention_isa2
;
4322
3
  model_scaled_dot_product_attention->super.input_size = has_attn_mask ? 
41
:
32
;
4323
3
  model_scaled_dot_product_attention->super.outputs = &model_scaled_dot_product_attention->output;
4324
3
  model_scaled_dot_product_attention->super.output_size = 1;
4325
3
  model_scaled_dot_product_attention->super.is_trainable = is_trainable;
4326
3
  ccv_cnnp_model_copy_name(&model_scaled_dot_product_attention->super, name);
4327
3
  model_scaled_dot_product_attention->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
4328
3
  model_scaled_dot_product_attention->weights.graph = 0;
4329
3
  model_scaled_dot_product_attention->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
4330
3
  model_scaled_dot_product_attention->bias.graph = 0;
4331
3
  model_scaled_dot_product_attention->scale = scale;
4332
3
  model_scaled_dot_product_attention->is_causal = is_causal;
4333
3
  model_scaled_dot_product_attention->has_attn_mask = has_attn_mask;
4334
3
  model_scaled_dot_product_attention->flags = flags;
4335
3
  model_scaled_dot_product_attention->fused_unify_head_weights = fused_unify_head_weights;
4336
3
  model_scaled_dot_product_attention->no_bias = no_bias;
4337
3
  return (ccv_cnnp_model_t*)model_scaled_dot_product_attention;
4338
3
}
4339
4340
static ccv_cnnp_model_t* _ccv_cnnp_scaled_dot_product_attention_copy(const ccv_cnnp_model_t* const super, void* const context)
4341
0
{
4342
0
  const ccv_cnnp_model_scaled_dot_product_attention_t* const self = (const ccv_cnnp_model_scaled_dot_product_attention_t*)super;
4343
0
  return ccv_cnnp_scaled_dot_product_attention(self->scale, self->is_causal, self->has_attn_mask, self->flags, self->fused_unify_head_weights, self->no_bias, self->super.is_trainable, self->super.name);
4344
0
}
4345
4346
// MARK - Debug Layer
4347
4348
typedef struct {
4349
  ccv_cnnp_model_t super;
4350
  ccv_nnc_tensor_symbol_t output;
4351
  ccv_cnnp_model_debug_f debugger;
4352
  ccv_cnnp_model_debug_context_deinit_f debug_deinit;
4353
  ccv_cnnp_model_debug_context_copy_f debug_copy;
4354
  void* debug_context;
4355
} ccv_cnnp_model_debug_t;
4356
4357
static int _ccv_cnnp_debug_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
4358
1
{
4359
1
  if (cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
4360
0
  {
4361
0
    assert(0 && "don't support debug backward pass yet");
4362
0
  }
4363
1
  ccv_cnnp_model_debug_t* const self = (ccv_cnnp_model_debug_t*)cmd.data;
4364
1
  self->debugger(inputs, input_size, stream_context, self->debug_context);
4365
1
  return CCV_NNC_EXEC_SUCCESS;
4366
1
}
4367
4368
static ccv_nnc_cmd_vtab_t ccv_cnnp_debug_exec_isa = {
4369
  .exec = _ccv_cnnp_debug_exec
4370
};
4371
4372
static void _ccv_cnnp_debug_build(ccv_cnnp_model_t* const self, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4373
1
{
4374
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_debug_build] -\n");
4375
1
  assert(input_size >= 1);
4376
1
  assert(output_size == 1);
4377
1
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
4378
1
  ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4379
1
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
4380
1
  {
4381
1
    int ofs[CCV_NNC_MAX_DIM_ALLOC] = {0};
4382
1
    int stride[CCV_NNC_MAX_DIM_ALLOC];
4383
1
    ccv_nnc_tensor_get_stride(output_params.dim, stride);
4384
1
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, stride, output_params, 0);
4385
1
  } else {
4386
0
    int old_ofs[CCV_NNC_MAX_DIM_ALLOC];
4387
0
    int old_stride[CCV_NNC_MAX_DIM_ALLOC];
4388
0
    ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], old_ofs, old_stride);
4389
0
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, to, old_ofs, old_stride, output_params, 0);
4390
0
  }
4391
1
  ccv_nnc_cmd_t cmd = ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, (ccv_nnc_cmd_vtab_t*)&ccv_cnnp_debug_exec_isa, (ccv_nnc_cmd_param_t){}, 0);
4392
1
  cmd.data = self;
4393
1
  ccv_nnc_graph_exec_symbol_t make_debug = ccv_nnc_graph_exec_symbol_new(graph, cmd, inputs, input_size, outputs, 1, "debug");
4394
  // Disable any optimizations.
4395
1
  ccv_nnc_graph_exec_symbol_set_flags(graph, make_debug, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
4396
1
}
4397
4398
static void _ccv_cnnp_debug_deinit(ccv_cnnp_model_t* const super)
4399
1
{
4400
1
  const ccv_cnnp_model_debug_t* const self = (const ccv_cnnp_model_debug_t*)super;
4401
1
  if (self->debug_deinit && 
self->debug_context0
)
4402
0
    self->debug_deinit(self->debug_context);
4403
1
}
4404
4405
static ccv_cnnp_model_t* _ccv_cnnp_debug_copy(const ccv_cnnp_model_t* const super, void* const context);
4406
4407
static const ccv_cnnp_model_vtab_t ccv_cnnp_debug_isa = {
4408
  .build = _ccv_cnnp_debug_build,
4409
  .deinit = _ccv_cnnp_debug_deinit,
4410
  .copy = _ccv_cnnp_debug_copy,
4411
};
4412
4413
ccv_cnnp_model_t* ccv_cnnp_debug(ccv_cnnp_model_debug_f func, void* const context, ccv_cnnp_model_debug_context_deinit_f deinit, ccv_cnnp_model_debug_context_copy_f copy, const char* const name)
4414
1
{
4415
1
  ccv_cnnp_model_debug_t* const model_debug = (ccv_cnnp_model_debug_t*)cccalloc(1, sizeof(ccv_cnnp_model_debug_t));
4416
1
  model_debug->super.isa = &ccv_cnnp_debug_isa;
4417
1
  model_debug->super.input_size = 0;
4418
1
  model_debug->super.outputs = &model_debug->output;
4419
1
  model_debug->super.output_size = 1;
4420
1
  model_debug->debugger = func;
4421
1
  model_debug->debug_context = context;
4422
1
  model_debug->debug_deinit = deinit;
4423
1
  model_debug->debug_copy = copy;
4424
1
  ccv_cnnp_model_copy_name(&model_debug->super, name);
4425
1
  return (ccv_cnnp_model_t*)model_debug;
4426
1
}
4427
4428
static ccv_cnnp_model_t* _ccv_cnnp_debug_copy(const ccv_cnnp_model_t* const super, void* const context)
4429
0
{
4430
0
  const ccv_cnnp_model_debug_t* const self = (const ccv_cnnp_model_debug_t*)super;
4431
0
  void* debug_context = self->debug_context;
4432
0
  if (self->debug_copy && self->debug_context)
4433
0
    debug_context = self->debug_copy(self->debug_context);
4434
0
  return ccv_cnnp_debug(self->debugger, debug_context, self->debug_deinit, self->debug_copy, self->super.name);
4435
0
}
4436
4437
/// MARK - Sort layer.
4438
4439
typedef struct {
4440
  ccv_cnnp_model_t super;
4441
  ccv_nnc_tensor_symbol_t outputs[2];
4442
  int along_axis;
4443
  int descending;
4444
} ccv_cnnp_model_sort_t;
4445
4446
static void _ccv_cnnp_sort_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4447
1
{
4448
1
  ccv_cnnp_model_sort_t* const self = (ccv_cnnp_model_sort_t*)super;
4449
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sort_build] - along_axis: %d, descending: %d\n", self->along_axis, self->descending);
4450
1
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4451
1
  assert(output_size == 2);
4452
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4453
1
  params.datatype = CCV_32S;
4454
1
  outputs[1] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4455
1
  ccv_nnc_graph_exec_symbol_new(graph, CMD_SORT_FORWARD(self->along_axis, self->descending), inputs, input_size, outputs, output_size, "sort");
4456
1
}
4457
4458
static ccv_cnnp_model_t* _ccv_cnnp_sort_copy(const ccv_cnnp_model_t* const self, void* const context);
4459
4460
static const ccv_cnnp_model_vtab_t ccv_cnnp_sort_isa = {
4461
  .build = _ccv_cnnp_sort_build,
4462
  .copy = _ccv_cnnp_sort_copy,
4463
};
4464
4465
ccv_cnnp_model_t* ccv_cnnp_sort(const int along_axis, const int descending, const char* const name)
4466
1
{
4467
1
  ccv_cnnp_model_sort_t* const model_sort = (ccv_cnnp_model_sort_t*)cccalloc(1, sizeof(ccv_cnnp_model_sort_t));
4468
1
  model_sort->super.isa = &ccv_cnnp_sort_isa;
4469
1
  model_sort->super.input_size = 0;
4470
1
  model_sort->super.outputs = model_sort->outputs;
4471
1
  model_sort->super.output_size = 2;
4472
1
  model_sort->along_axis = along_axis;
4473
1
  model_sort->descending = descending;
4474
1
  ccv_cnnp_model_copy_name(&model_sort->super, name);
4475
1
  return (ccv_cnnp_model_t*)model_sort;
4476
1
}
4477
4478
static ccv_cnnp_model_t* _ccv_cnnp_sort_copy(const ccv_cnnp_model_t* const super, void* const context)
4479
0
{
4480
0
  ccv_cnnp_model_sort_t* const self = (ccv_cnnp_model_sort_t*)super;
4481
0
  return ccv_cnnp_sort(self->along_axis, self->descending, self->super.name);
4482
0
}
4483
4484
/// MARK - Partition layer.
4485
4486
typedef struct {
4487
  ccv_cnnp_model_t super;
4488
  ccv_nnc_tensor_symbol_t outputs[2];
4489
  int kth;
4490
  int along_axis;
4491
  int descending;
4492
} ccv_cnnp_model_partition_t;
4493
4494
static void _ccv_cnnp_partition_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4495
1
{
4496
1
  ccv_cnnp_model_partition_t* const self = (ccv_cnnp_model_partition_t*)super;
4497
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_partition_build] - kth: %d, along_axis: %d, descending: %d\n", self->kth, self->along_axis, self->descending);
4498
1
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4499
1
  assert(output_size == 2);
4500
1
  if (self->kth > 0)
4501
1
    params.dim[self->along_axis] = self->kth;
4502
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4503
1
  params.datatype = CCV_32S;
4504
1
  outputs[1] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4505
1
  ccv_nnc_graph_exec_symbol_new(graph, CMD_PARTITION_FORWARD(self->kth, self->along_axis, self->descending), inputs, input_size, outputs, output_size, "partition");
4506
1
}
4507
4508
static ccv_cnnp_model_t* _ccv_cnnp_partition_copy(const ccv_cnnp_model_t* const self, void* const context);
4509
4510
static const ccv_cnnp_model_vtab_t ccv_cnnp_partition_isa = {
4511
  .build = _ccv_cnnp_partition_build,
4512
  .copy = _ccv_cnnp_partition_copy,
4513
};
4514
4515
ccv_cnnp_model_t* ccv_cnnp_partition(const int kth, const int along_axis, const int descending, const char* const name)
4516
1
{
4517
1
  ccv_cnnp_model_partition_t* const model_partition = (ccv_cnnp_model_partition_t*)cccalloc(1, sizeof(ccv_cnnp_model_partition_t));
4518
1
  model_partition->super.isa = &ccv_cnnp_partition_isa;
4519
1
  model_partition->super.input_size = 0;
4520
1
  model_partition->super.outputs = model_partition->outputs;
4521
1
  model_partition->super.output_size = 2;
4522
1
  model_partition->kth = kth;
4523
1
  model_partition->along_axis = along_axis;
4524
1
  model_partition->descending = descending;
4525
1
  ccv_cnnp_model_copy_name(&model_partition->super, name);
4526
1
  return (ccv_cnnp_model_t*)model_partition;
4527
1
}
4528
4529
static ccv_cnnp_model_t* _ccv_cnnp_partition_copy(const ccv_cnnp_model_t* const super, void* const context)
4530
0
{
4531
0
  ccv_cnnp_model_partition_t* const self = (ccv_cnnp_model_partition_t*)super;
4532
0
  return ccv_cnnp_partition(self->kth, self->along_axis, self->descending, self->super.name);
4533
0
}
4534
4535
/// MARK - Unique consecutive layer.
4536
4537
typedef struct {
4538
  ccv_cnnp_model_t super;
4539
  ccv_nnc_tensor_symbol_t outputs[2];
4540
  int bincount;
4541
} ccv_cnnp_model_unique_consecutive_t;
4542
4543
static void _ccv_cnnp_unique_consecutive_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4544
1
{
4545
1
  ccv_cnnp_model_unique_consecutive_t* const self = (ccv_cnnp_model_unique_consecutive_t*)super;
4546
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_unique_consecutive_build] - bincount: %d\n", self->bincount);
4547
1
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4548
1
  assert(output_size == 2);
4549
1
  if (self->bincount > 0)
4550
1
    params.dim[0] = ccv_min(params.dim[0], self->bincount);
4551
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4552
1
  params.datatype = CCV_32S;
4553
1
  outputs[1] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4554
1
  ccv_nnc_graph_exec_symbol_new(graph, CMD_UNIQUE_CONSECUTIVE_FORWARD(self->bincount), inputs, input_size, outputs, output_size, "unique_consecutive");
4555
1
}
4556
4557
static ccv_cnnp_model_t* _ccv_cnnp_unique_consecutive_copy(const ccv_cnnp_model_t* const self, void* const context);
4558
4559
static const ccv_cnnp_model_vtab_t ccv_cnnp_unique_consecutive_isa = {
4560
  .build = _ccv_cnnp_unique_consecutive_build,
4561
  .copy = _ccv_cnnp_unique_consecutive_copy,
4562
};
4563
4564
ccv_cnnp_model_t* ccv_cnnp_unique_consecutive(const int bincount, const char* const name)
4565
1
{
4566
1
  ccv_cnnp_model_unique_consecutive_t* const model_unique_consecutive = (ccv_cnnp_model_unique_consecutive_t*)cccalloc(1, sizeof(ccv_cnnp_model_unique_consecutive_t));
4567
1
  model_unique_consecutive->super.isa = &ccv_cnnp_unique_consecutive_isa;
4568
1
  model_unique_consecutive->super.input_size = 0;
4569
1
  model_unique_consecutive->super.outputs = model_unique_consecutive->outputs;
4570
1
  model_unique_consecutive->super.output_size = 2;
4571
1
  model_unique_consecutive->bincount = bincount;
4572
1
  ccv_cnnp_model_copy_name(&model_unique_consecutive->super, name);
4573
1
  return (ccv_cnnp_model_t*)model_unique_consecutive;
4574
1
}
4575
4576
static ccv_cnnp_model_t* _ccv_cnnp_unique_consecutive_copy(const ccv_cnnp_model_t* const super, void* const context)
4577
0
{
4578
0
  ccv_cnnp_model_unique_consecutive_t* const self = (ccv_cnnp_model_unique_consecutive_t*)super;
4579
0
  return ccv_cnnp_unique_consecutive(self->bincount, self->super.name);
4580
0
}
4581
4582
/// MARK - Scatter add layer.
4583
4584
typedef struct {
4585
  ccv_cnnp_model_t super;
4586
  ccv_nnc_tensor_symbol_t output;
4587
  int bincount;
4588
} ccv_cnnp_model_scatter_add_t;
4589
4590
static void _ccv_cnnp_scatter_add_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4591
1
{
4592
1
  ccv_cnnp_model_scatter_add_t* const self = (ccv_cnnp_model_scatter_add_t*)super;
4593
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_scatter_add_build] - bincount: %d\n", self->bincount);
4594
1
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4595
1
  assert(output_size == 1);
4596
1
  assert(self->bincount > 0);
4597
1
  params.dim[0] = self->bincount;
4598
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4599
1
  ccv_nnc_graph_exec_symbol_new(graph, CMD_SCATTER_ADD_FORWARD(self->bincount), inputs, input_size, outputs, output_size, "scatter_add");
4600
1
}
4601
4602
static ccv_cnnp_model_t* _ccv_cnnp_scatter_add_copy(const ccv_cnnp_model_t* const self, void* const context);
4603
4604
static const ccv_cnnp_model_vtab_t ccv_cnnp_scatter_add_isa = {
4605
  .build = _ccv_cnnp_scatter_add_build,
4606
  .copy = _ccv_cnnp_scatter_add_copy,
4607
};
4608
4609
ccv_cnnp_model_t* ccv_cnnp_scatter_add(const int bincount, const char* const name)
4610
1
{
4611
1
  assert(bincount > 0);
4612
1
  ccv_cnnp_model_scatter_add_t* const model_scatter_add = (ccv_cnnp_model_scatter_add_t*)cccalloc(1, sizeof(ccv_cnnp_model_scatter_add_t));
4613
1
  model_scatter_add->super.isa = &ccv_cnnp_scatter_add_isa;
4614
1
  model_scatter_add->super.input_size = 0;
4615
1
  model_scatter_add->super.outputs = &model_scatter_add->output;
4616
1
  model_scatter_add->super.output_size = 1;
4617
1
  model_scatter_add->bincount = bincount;
4618
1
  ccv_cnnp_model_copy_name(&model_scatter_add->super, name);
4619
1
  return (ccv_cnnp_model_t*)model_scatter_add;
4620
1
}
4621
4622
static ccv_cnnp_model_t* _ccv_cnnp_scatter_add_copy(const ccv_cnnp_model_t* const super, void* const context)
4623
0
{
4624
0
  ccv_cnnp_model_scatter_add_t* const self = (ccv_cnnp_model_scatter_add_t*)super;
4625
0
  return ccv_cnnp_scatter_add(self->bincount, self->super.name);
4626
0
}
4627
4628
// MARK - Segmented Dense Layer
4629
4630
typedef struct {
4631
  ccv_cnnp_model_t super;
4632
  ccv_nnc_tensor_symbol_t output;
4633
  ccv_nnc_tensor_symbol_t weights;
4634
  ccv_nnc_tensor_symbol_t bias;
4635
  int segments;
4636
  int count;
4637
  int no_bias;
4638
  int flags;
4639
} ccv_cnnp_model_segmented_dense_t;
4640
4641
static void _ccv_cnnp_segmented_dense_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4642
2
{
4643
2
  ccv_cnnp_model_segmented_dense_t* const self = (ccv_cnnp_model_segmented_dense_t*)super;
4644
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_segmented_dense_build] -\n");
4645
2
  assert(input_size == 3);
4646
2
  assert(output_size == 1);
4647
2
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4648
2
  const ccv_nnc_tensor_param_t indices_params = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
4649
2
  const ccv_nnc_tensor_param_t counts_params = ccv_nnc_tensor_symbol_params(graph, inputs[2]);
4650
2
  ccv_nnc_tensor_param_t weights_params = params;
4651
2
  memset(weights_params.dim, 0, sizeof(weights_params.dim));
4652
2
  weights_params.dim[0] = self->segments;
4653
2
  weights_params.dim[1] = self->count;
4654
2
  weights_params.dim[2] = params.dim[ccv_nnc_tensor_nd(params.dim) - 1];
4655
2
  if (!self->weights.graph)
4656
2
    self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
4657
2
  assert(self->weights.graph == graph);
4658
2
  ccv_nnc_tensor_param_t bias_params = params;
4659
2
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
4660
2
  bias_params.dim[0] = self->segments;
4661
2
  bias_params.dim[1] = self->count;
4662
2
  ccv_nnc_cmd_t cmd = {0};
4663
2
  cmd.cmd = CCV_NNC_SEGMENTED_GEMM_FORWARD;
4664
2
  cmd.info.blas.a[0] = 1;
4665
2
  cmd.info.blas.a[1] = 1;
4666
2
  cmd.info.blas.transpose_b[0] = 1;
4667
2
  cmd.info.blas.transpose_b[1] = 2;
4668
2
  cmd.info.blas.flags = self->flags;
4669
2
  ccv_nnc_tensor_param_t output_params;
4670
2
  ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
4671
2
      params, indices_params, counts_params,
4672
2
      weights_params,
4673
2
      bias_params,
4674
2
    }, 5, ccv_nnc_no_hint, &output_params, 1);
4675
2
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
4676
2
  if (self->no_bias)
4677
1
    ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], inputs[1], inputs[2], self->weights), TENSOR_SYMBOL_LIST(output), "segmented_dense");
4678
1
  else {
4679
1
    if (!self->bias.graph)
4680
1
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
4681
1
    ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], inputs[1], inputs[2], self->weights, self->bias), TENSOR_SYMBOL_LIST(output), "segmented_dense");
4682
1
  }
4683
2
  outputs[0] = output;
4684
2
}
4685
4686
static void _ccv_cnnp_segmented_dense_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
4687
0
{
4688
0
  ccv_cnnp_model_segmented_dense_t* const self = (ccv_cnnp_model_segmented_dense_t*)super;
4689
0
  const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
4690
0
  const int c = weight_params.dim[1];
4691
0
  const float std = sqrtf(2) / sqrtf(c);
4692
0
  const float bound = sqrtf(3) * std;
4693
0
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
4694
0
  if (self->bias.graph)
4695
0
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
4696
0
}
4697
4698
static void _ccv_cnnp_segmented_dense_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
4699
2
{
4700
2
  ccv_cnnp_model_segmented_dense_t* const self = (ccv_cnnp_model_segmented_dense_t*)super;
4701
2
  add_to_array(parameters, self->weights, is_trainable);
4702
2
  if (self->bias.graph)
4703
1
    add_to_array(parameters, self->bias, is_trainable);
4704
2
}
4705
4706
static ccv_cnnp_model_t* _ccv_cnnp_segmented_dense_copy(const ccv_cnnp_model_t* const super, void* const context);
4707
4708
static const ccv_cnnp_model_vtab_t ccv_cnnp_segmented_dense_isa = {
4709
  .build = _ccv_cnnp_segmented_dense_build,
4710
  .init_states = _ccv_cnnp_segmented_dense_init_states,
4711
  .add_to_parameter = _ccv_cnnp_segmented_dense_add_to_parameter,
4712
  .copy = _ccv_cnnp_segmented_dense_copy,
4713
};
4714
4715
ccv_cnnp_model_t* ccv_cnnp_segmented_dense(const int segments, const int count, const int no_bias, const int flags, const int is_trainable, const char* const name)
4716
2
{
4717
2
  ccv_cnnp_model_segmented_dense_t* const model_segmented_dense = (ccv_cnnp_model_segmented_dense_t*)cccalloc(1, sizeof(ccv_cnnp_model_segmented_dense_t));
4718
2
  model_segmented_dense->super.isa = &ccv_cnnp_segmented_dense_isa;
4719
2
  model_segmented_dense->super.input_size = 3;
4720
2
  model_segmented_dense->super.outputs = &model_segmented_dense->output;
4721
2
  model_segmented_dense->super.output_size = 1;
4722
2
  model_segmented_dense->super.is_trainable = is_trainable;
4723
2
  ccv_cnnp_model_copy_name(&model_segmented_dense->super, name);
4724
2
  model_segmented_dense->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
4725
2
  model_segmented_dense->weights.graph = 0;
4726
2
  model_segmented_dense->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
4727
2
  model_segmented_dense->bias.graph = 0;
4728
2
  model_segmented_dense->segments = segments;
4729
2
  model_segmented_dense->count = count;
4730
2
  model_segmented_dense->no_bias = no_bias;
4731
2
  model_segmented_dense->flags = flags;
4732
2
  return (ccv_cnnp_model_t*)model_segmented_dense;
4733
2
}
4734
4735
static ccv_cnnp_model_t* _ccv_cnnp_segmented_dense_copy(const ccv_cnnp_model_t* const super, void* const context)
4736
0
{
4737
0
  const ccv_cnnp_model_segmented_dense_t* const self = (const ccv_cnnp_model_segmented_dense_t*)super;
4738
0
  return ccv_cnnp_segmented_dense(self->segments, self->count, self->no_bias, self->flags, self->super.is_trainable, self->super.name);
4739
0
}