Coverage Report

Created: 2024-08-19 11:27

/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_cnnp_model_addons.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_internal.h"
5
#include "_ccv_cnnp_model.h"
6
7
// MARK - Add-on Functions
8
9
static int _ccv_cnnp_model_clip_grad_norm_reduce_norm2(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
10
2
{
11
2
  const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[0]->info.type);
12
2
  ccv_nnc_tensor_t* const old_norm2 = outputs[1 + device_id * 2];
13
2
  ccv_nnc_tensor_t* const norm2 = outputs[1 + device_id * 2 + 1];
14
2
  const int tensor_count = ccv_nnc_tensor_count(inputs[0]->info);
15
2
  if (tensor_count == 1)
16
2
    ccv_nnc_cmd_exec(CMD_MUL_FORWARD(1), hint, flags, TENSOR_LIST(inputs[0], inputs[0]), TENSOR_LIST(norm2), stream_context);
17
0
  else {
18
0
    ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(), hint, flags, TENSOR_LIST(inputs[0]), TENSOR_LIST(norm2), stream_context);
19
0
    ccv_nnc_cmd_exec(CMD_MUL_FORWARD(1), hint, flags, TENSOR_LIST(norm2, norm2), TENSOR_LIST(norm2), stream_context);
20
0
  }
21
2
  ccv_nnc_cmd_exec(CMD_ADD_FORWARD(1, 1), hint, flags, TENSOR_LIST(old_norm2, norm2), TENSOR_LIST(old_norm2), stream_context);
22
2
  return CCV_NNC_EXEC_SUCCESS;
23
2
}
24
25
static ccv_nnc_cmd_vtab_t clip_grad_norm_reduce_norm2_vtab = {
26
  .exec = _ccv_cnnp_model_clip_grad_norm_reduce_norm2
27
};
28
29
static int _ccv_cnnp_model_clip_grad_norm_scatter_norm2(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
30
2
{
31
2
  const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[0]->info.type);
32
2
  ccv_nnc_tensor_t* const norm2 = inputs[1 + device_id * 2];
33
2
  ccv_nnc_cmd_exec(CMD_MUL_FORWARD(1), hint, flags, TENSOR_LIST(inputs[0], norm2), TENSOR_LIST(outputs[0]), stream_context);
34
2
  return CCV_NNC_EXEC_SUCCESS;
35
2
}
36
37
static ccv_nnc_cmd_vtab_t clip_grad_norm_scatter_norm2_vtab = {
38
  .exec = _ccv_cnnp_model_clip_grad_norm_scatter_norm2
39
};
40
41
void ccv_cnnp_model_parameters_clip_grad_norm(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int norm_type, float max_norm, ccv_nnc_stream_context_t* const stream_context)
42
2
{
43
2
  assert(norm_type == 2);
44
2
  ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
45
2
  assert(compiled_data);
46
2
  const int parallel_count = ccv_max(model->parallel_count, 1);
47
2
  ccv_nnc_tensor_t* norm2[parallel_count * 2];
48
2
  ccv_nnc_tensor_t* max_normt[parallel_count];
49
2
  const int stream_type = model->compiled_data->stream_type;
50
2
  int i;
51
2
  if (stream_type == CCV_STREAM_CONTEXT_GPU)
52
0
  {
53
0
    for (i = 0; i < parallel_count; i++)
54
0
    {
55
0
      ccv_nnc_tensor_param_t info = {
56
0
        .type = CCV_TENSOR_GPU_MEMORY,
57
0
        .format = CCV_TENSOR_FORMAT_NHWC,
58
0
        .datatype = CCV_32F,
59
0
        .dim = {1},
60
0
      };
61
0
      CCV_TENSOR_SET_DEVICE_ID(info.type, i);
62
0
      norm2[i * 2] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
63
0
      norm2[i * 2 + 1] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
64
0
      max_normt[i] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
65
0
    }
66
2
  } else {
67
4
    for (i = 0; i < parallel_count; 
i++2
)
68
2
    {
69
2
      ccv_nnc_tensor_param_t info = {
70
2
        .type = CCV_TENSOR_CPU_MEMORY,
71
2
        .format = CCV_TENSOR_FORMAT_NHWC,
72
2
        .datatype = CCV_32F,
73
2
        .dim = {1},
74
2
      };
75
2
      norm2[i * 2] = ccv_nnc_tensor_new(0, info, 0);
76
2
      norm2[i * 2 + 1] = ccv_nnc_tensor_new(0, info, 0);
77
2
      max_normt[i] = ccv_nnc_tensor_new(0, info, 0);
78
2
    }
79
2
  }
80
  // zero out old norm2.
81
2
  if (parallel_count > 1)
82
0
  {
83
0
    ccv_nnc_stream_context_t* streams[parallel_count];
84
0
    ccv_nnc_stream_signal_t* signal;
85
0
    if (stream_context)
86
0
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
87
0
    for (i = 0; i < parallel_count; i++)
88
0
    {
89
0
      const int stream_type = CCV_TENSOR_GET_MEMORY(norm2[i * 2]->info.type) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
90
0
      const int device_id = CCV_TENSOR_GET_DEVICE_ID(norm2[i * 2]->info.type);
91
0
      int type = stream_type;
92
0
      CCV_STREAM_SET_DEVICE_ID(type, device_id);
93
0
      ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(compiled_data, type);
94
      // Wait signal to finish.
95
0
      if (stream_context)
96
0
        ccv_nnc_stream_context_wait_signal(stream_0, signal);
97
0
      ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(norm2[i * 2]), stream_0);
98
0
      if (stream_context)
99
0
      {
100
0
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
101
0
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
102
0
      }
103
0
      streams[i] = stream_0;
104
0
    }
105
    // If this should be blocking, blocking it.
106
0
    if (!stream_context)
107
0
      for (i = 0; i < parallel_count; i++)
108
0
        if (streams[i])
109
0
          ccv_nnc_stream_context_wait(streams[i]);
110
2
  } else {
111
2
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(norm2[0]), stream_context);
112
2
  }
113
  // Gather norm2.
114
2
  ccv_nnc_cmd_t reduce_cmd = {
115
2
    .cmd = CCV_NNC_CUSTOM_FORWARD,
116
2
    .isa = &clip_grad_norm_reduce_norm2_vtab,
117
2
  };
118
2
  ccv_cnnp_model_parameter_gradients_map(model, parameters, reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, norm2, parallel_count * 2, stream_context);
119
  // Now compute max(max_norm / norm2, 1.0).
120
2
  if (parallel_count > 1)
121
0
  {
122
0
    ccv_nnc_stream_context_t* streams[parallel_count];
123
0
    ccv_nnc_stream_signal_t* signal;
124
0
    if (stream_context)
125
0
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
126
0
    for (i = 0; i < parallel_count; i++)
127
0
    {
128
0
      const int stream_type = CCV_TENSOR_GET_MEMORY(norm2[i * 2]->info.type) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
129
0
      const int device_id = CCV_TENSOR_GET_DEVICE_ID(norm2[i * 2]->info.type);
130
0
      int type = stream_type;
131
0
      CCV_STREAM_SET_DEVICE_ID(type, device_id);
132
0
      ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(compiled_data, type);
133
      // Wait signal to finish.
134
0
      if (stream_context)
135
0
        ccv_nnc_stream_context_wait_signal(stream_0, signal);
136
0
      ccv_nnc_cmd_exec(CMD_EWSQRT_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[i * 2]), TENSOR_LIST(norm2[i * 2]), stream_0);
137
0
      ccv_nnc_cmd_exec(CMD_SET_FORWARD(max_norm), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(max_normt[i]), stream_0);
138
0
      ccv_nnc_cmd_exec(CMD_EWDIV_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(max_normt[i], norm2[i * 2]), TENSOR_LIST(norm2[i * 2]), stream_0);
139
0
      ccv_nnc_cmd_exec(CMD_CLAMP_FORWARD(NAN, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[i * 2]), TENSOR_LIST(norm2[i * 2]), stream_0);
140
0
      if (stream_context)
141
0
      {
142
0
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
143
0
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
144
0
      }
145
0
      streams[i] = stream_0;
146
0
    }
147
    // If this should be blocking, blocking it.
148
0
    if (!stream_context)
149
0
      for (i = 0; i < parallel_count; i++)
150
0
        if (streams[i])
151
0
          ccv_nnc_stream_context_wait(streams[i]);
152
2
  } else {
153
2
    ccv_nnc_cmd_exec(CMD_EWSQRT_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[0]), TENSOR_LIST(norm2[0]), stream_context);
154
2
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(max_norm), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(max_normt[0]), stream_context);
155
2
    ccv_nnc_cmd_exec(CMD_EWDIV_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(max_normt[0], norm2[0]), TENSOR_LIST(norm2[0]), stream_context);
156
2
    ccv_nnc_cmd_exec(CMD_CLAMP_FORWARD(NAN, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[0]), TENSOR_LIST(norm2[0]), stream_context);
157
2
  }
158
2
  ccv_nnc_cmd_t scatter_cmd = {
159
2
    .cmd = CCV_NNC_CUSTOM_FORWARD,
160
2
    .isa = &clip_grad_norm_scatter_norm2_vtab,
161
2
  };
162
2
  ccv_cnnp_model_parameter_gradients_map(model, parameters, scatter_cmd, ccv_nnc_no_hint, 0, norm2, parallel_count * 2, 0, 0, stream_context);
163
2
  if (stream_type == CCV_STREAM_CONTEXT_GPU)
164
0
    for (i = 0; i < parallel_count; i++)
165
0
    {
166
0
      ccv_nnc_xpu_free(&compiled_data->xpu_alloc, norm2[i * 2]->data.u8);
167
0
      ccv_nnc_xpu_free(&compiled_data->xpu_alloc, norm2[i * 2 + 1]->data.u8);
168
0
      ccv_nnc_xpu_free(&compiled_data->xpu_alloc, max_normt[i]->data.u8);
169
0
    }
170
4
  for (i = 0; i < parallel_count; 
i++2
)
171
2
  {
172
2
    ccv_nnc_tensor_free(norm2[i * 2]);
173
2
    ccv_nnc_tensor_free(norm2[i * 2 + 1]);
174
2
    ccv_nnc_tensor_free(max_normt[i]);
175
2
  }
176
2
}
177
178
// MARK - Add-on Functions
179
180
static int _ccv_cnnp_model_isnan(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
181
0
{
182
0
  const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[0]->info.type);
183
0
  ccv_nnc_tensor_t* const old_isnanr = outputs[1 + device_id * 2];
184
0
  ccv_nnc_tensor_t* const isnanr = outputs[1 + device_id * 2 + 1];
185
0
  ccv_nnc_cmd_t reduce_cmd = CMD_REDUCE_ISNAN_FORWARD();
186
0
  reduce_cmd.info.reduce.count = ccv_nnc_tensor_nd(inputs[0]->info.dim);
187
0
  int i;
188
0
  for (i = 0; i < cmd.info.reduce.count; i++)
189
0
    reduce_cmd.info.reduce.axis[i] = i;
190
0
  ccv_nnc_cmd_exec(reduce_cmd, hint, flags, TENSOR_LIST(inputs[0]), TENSOR_LIST(isnanr), stream_context);
191
0
  ccv_nnc_cmd_exec(CMD_EWSUM_FORWARD(), hint, flags, TENSOR_LIST(old_isnanr, isnanr), TENSOR_LIST(old_isnanr), stream_context);
192
0
  return CCV_NNC_EXEC_SUCCESS;
193
0
}
194
195
static ccv_nnc_cmd_vtab_t reduce_isnan_vtab = {
196
  .exec = _ccv_cnnp_model_isnan
197
};
198
199
int ccv_cnnp_model_parameter_gradients_isnan(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context)
200
0
{
201
0
  ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
202
0
  assert(compiled_data);
203
0
  const int parallel_count = ccv_max(model->parallel_count, 1);
204
0
  ccv_nnc_tensor_t* isnanr[parallel_count * 2];
205
0
  const int stream_type = model->compiled_data->stream_type;
206
0
  int i;
207
0
  if (stream_type == CCV_STREAM_CONTEXT_GPU)
208
0
  {
209
0
    for (i = 0; i < parallel_count; i++)
210
0
    {
211
0
      ccv_nnc_tensor_param_t info = {
212
0
        .type = CCV_TENSOR_GPU_MEMORY,
213
0
        .format = CCV_TENSOR_FORMAT_NHWC,
214
0
        .datatype = CCV_32S,
215
0
        .dim = {1},
216
0
      };
217
0
      CCV_TENSOR_SET_DEVICE_ID(info.type, i);
218
0
      isnanr[i * 2] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
219
0
      isnanr[i * 2 + 1] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
220
0
    }
221
0
  } else {
222
0
    for (i = 0; i < parallel_count; i++)
223
0
    {
224
0
      ccv_nnc_tensor_param_t info = {
225
0
        .type = CCV_TENSOR_CPU_MEMORY,
226
0
        .format = CCV_TENSOR_FORMAT_NHWC,
227
0
        .datatype = CCV_32S,
228
0
        .dim = {1},
229
0
      };
230
0
      isnanr[i * 2] = ccv_nnc_tensor_new(0, info, 0);
231
0
      isnanr[i * 2 + 1] = ccv_nnc_tensor_new(0, info, 0);
232
0
    }
233
0
  }
234
  // zero out old isnanr.
235
0
  if (parallel_count > 1)
236
0
  {
237
0
    ccv_nnc_stream_context_t* streams[parallel_count];
238
0
    ccv_nnc_stream_signal_t* signal;
239
0
    if (stream_context)
240
0
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
241
0
    for (i = 0; i < parallel_count; i++)
242
0
    {
243
0
      const int stream_type = CCV_TENSOR_GET_MEMORY(isnanr[i * 2]->info.type) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
244
0
      const int device_id = CCV_TENSOR_GET_DEVICE_ID(isnanr[i * 2]->info.type);
245
0
      int type = stream_type;
246
0
      CCV_STREAM_SET_DEVICE_ID(type, device_id);
247
0
      ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(compiled_data, type);
248
      // Wait signal to finish.
249
0
      if (stream_context)
250
0
        ccv_nnc_stream_context_wait_signal(stream_0, signal);
251
0
      ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(isnanr[i * 2]), stream_0);
252
0
      if (stream_context)
253
0
      {
254
0
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
255
0
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
256
0
      }
257
0
      streams[i] = stream_0;
258
0
    }
259
    // If this should be blocking, blocking it.
260
0
    if (!stream_context)
261
0
      for (i = 0; i < parallel_count; i++)
262
0
        if (streams[i])
263
0
          ccv_nnc_stream_context_wait(streams[i]);
264
0
  } else
265
0
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(isnanr[0]), stream_context);
266
  // Gather isnanr.
267
0
  ccv_nnc_cmd_t reduce_cmd = {
268
0
    .cmd = CCV_NNC_CUSTOM_FORWARD,
269
0
    .isa = &reduce_isnan_vtab,
270
0
  };
271
0
  ccv_cnnp_model_parameter_gradients_map(model, parameters, reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, isnanr, parallel_count * 2, stream_context);
272
0
  for (i = 0; i < parallel_count; i++)
273
0
    ccv_nnc_tensor_free(isnanr[i * 2 + 1]);
274
0
  int retval = 0;
275
0
  if (stream_type == CCV_TENSOR_GPU_MEMORY)
276
0
  {
277
0
    ccv_nnc_tensor_param_t info = {
278
0
      .type = CCV_TENSOR_CPU_MEMORY,
279
0
      .format = CCV_TENSOR_FORMAT_NHWC,
280
0
      .datatype = CCV_32S,
281
0
      .dim = {1},
282
0
    };
283
0
    ccv_nnc_tensor_t* checknan = ccv_nnc_tensor_new(0, info, 0);
284
0
    for (i = 0; i < parallel_count; i++)
285
0
    {
286
0
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(isnanr[i * 2]), TENSOR_LIST(checknan), 0);
287
0
      if (checknan->data.i32[0] > 0)
288
0
      {
289
0
        retval = 1;
290
0
        break;
291
0
      }
292
0
    }
293
0
    ccv_nnc_tensor_free(checknan);
294
0
  } else {
295
0
    for (i = 0; i < parallel_count; i++)
296
0
      if (isnanr[i * 2]->data.i32[0] > 0)
297
0
      {
298
0
        retval = 1;
299
0
        break;
300
0
      }
301
0
  }
302
0
  for (i = 0; i < parallel_count; i++)
303
0
    ccv_nnc_tensor_free(isnanr[i * 2]);
304
0
  return retval;
305
0
}
306
307
// MARK - Core Layers
308
309
static void _ccv_cnnp_sum_build(ccv_cnnp_model_t* const self, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
310
62
{
311
62
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sum_build] -\n");
312
62
  assert(output_size == 1);
313
62
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, ccv_nnc_tensor_symbol_params(graph, inputs[0]), 0);
314
62
  ccv_nnc_graph_exec_symbol_new(graph, CMD_EWSUM_FORWARD(), inputs, input_size, outputs, output_size, 0);
315
62
}
316
317
static ccv_cnnp_model_t* _ccv_cnnp_sum_copy(const ccv_cnnp_model_t* const self, void* const context);
318
319
static const ccv_cnnp_model_vtab_t ccv_cnnp_sum_isa = {
320
  .build = _ccv_cnnp_sum_build,
321
  .copy = _ccv_cnnp_sum_copy,
322
};
323
324
typedef struct {
325
  ccv_cnnp_model_t super;
326
  ccv_nnc_tensor_symbol_t output;
327
} ccv_cnnp_model_sum_t;
328
329
ccv_cnnp_model_t* ccv_cnnp_sum(const char* const name)
330
61
{
331
61
  ccv_cnnp_model_sum_t* const model_sum = (ccv_cnnp_model_sum_t*)cccalloc(1, sizeof(ccv_cnnp_model_sum_t));
332
61
  model_sum->super.isa = &ccv_cnnp_sum_isa;
333
61
  model_sum->super.input_size = 0;
334
61
  model_sum->super.outputs = &model_sum->output;
335
61
  model_sum->super.output_size = 1;
336
61
  ccv_cnnp_model_copy_name(&model_sum->super, name);
337
61
  return (ccv_cnnp_model_t*)model_sum;
338
61
}
339
340
static ccv_cnnp_model_t* _ccv_cnnp_sum_copy(const ccv_cnnp_model_t* const self, void* const context)
341
3
{
342
3
  return ccv_cnnp_sum(self->name);
343
3
}
344
345
typedef struct {
346
  ccv_cnnp_model_t super;
347
  int axis;
348
  ccv_nnc_tensor_symbol_t output;
349
} ccv_cnnp_model_concat_t;
350
351
static void _ccv_cnnp_concat_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
352
4
{
353
4
  const ccv_cnnp_model_concat_t* const self = (const ccv_cnnp_model_concat_t*)super;
354
4
  PRINT(CCV_CLI_VERBOSE, "[cnnp_concat_build] 1. -\n");
355
4
  assert(output_size == 1);
356
4
  ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
357
4
  int i, j;
358
4
  if (output_params.dim[0] == 0)
359
0
    for (i = 1; i < input_size; i++)
360
0
    {
361
0
      output_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
362
0
      if (output_params.dim[0] != 0)
363
0
        break;
364
0
    }
365
4
  const int nd = ccv_nnc_tensor_nd(output_params.dim);
366
4
  const int axis = self->axis;
367
4
  assert(axis < nd);
368
4
  output_params.dim[axis] = 0;
369
4
  int input_is_contiguous = 1;
370
12
  for (i = 0; i < input_size; 
i++8
)
371
8
  {
372
8
    const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
373
8
    const int input_nd = ccv_nnc_tensor_nd(input_params.dim);
374
8
    if (input_nd == 0)
375
0
    {
376
0
      PRINT(CCV_CLI_VERBOSE, "[cnnp_concat_build] %d. input[%d]: -\n", i + 2, i);
377
0
      input_is_contiguous = 0;
378
0
      continue;
379
0
    }
380
8
    if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
381
0
    {
382
0
      PRINT(CCV_CLI_VERBOSE, "[cnnp_concat_build] %d. input[%d]: (%d", i + 2, i, input_params.dim[0]);
383
0
      int i;
384
0
      for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && input_params.dim[i] > 0; i++)
385
0
        PRINT(CCV_CLI_VERBOSE, ", %d", input_params.dim[i]);
386
0
      PRINT(CCV_CLI_VERBOSE, ")\n");
387
0
    }
388
8
    assert(input_nd == nd);
389
16
    
for (j = 0; 8
j < nd;
j++8
)
390
8
      if (j != axis)
391
0
        { assert(input_params.dim[j] == output_params.dim[j]); }
392
8
    output_params.dim[axis] += input_params.dim[axis];
393
8
  }
394
4
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
395
4
  int ofs[CCV_NNC_MAX_DIM_ALLOC] = {};
396
4
  int stride[CCV_NNC_MAX_DIM_ALLOC] = {};
397
4
  ccv_nnc_tensor_get_stride(output_params.dim, stride);
398
4
  if (input_is_contiguous)
399
4
  {
400
4
    ccv_nnc_tensor_symbol_t aliases[input_size];
401
12
    for (i = 0; i < input_size; 
i++8
)
402
8
    {
403
8
      const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
404
8
      aliases[i] = ccv_nnc_tensor_symbol_alias_new(graph, outputs[0], ofs, stride, input_params, 0);
405
8
      ofs[axis] += input_params.dim[axis];
406
8
    }
407
    // Format transform is more flexible.
408
4
    ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, input_size, aliases, input_size, "concat");
409
4
  } else {
410
0
    ccv_nnc_tensor_symbol_t aliases[input_size];
411
0
    for (i = 0; i < input_size; i++)
412
0
    {
413
0
      const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
414
0
      if (input_params.dim[0] == 0)
415
0
      {
416
        // Create a new alias anyway, but not going to use it, in this way, the alias count will match during absorb.
417
0
        aliases[i] = ccv_nnc_tensor_symbol_alias_new(graph, outputs[0], ofs, stride, input_params, 0);
418
0
        continue;
419
0
      }
420
0
      aliases[i] = ccv_nnc_tensor_symbol_alias_new(graph, outputs[0], ofs, stride, input_params, 0);
421
0
      ofs[axis] += input_params.dim[axis];
422
0
    }
423
    // Format transform is more flexible.
424
0
    ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, input_size, aliases, input_size, "concat");
425
0
  }
426
4
}
427
428
static ccv_cnnp_model_t* _ccv_cnnp_concat_copy(const ccv_cnnp_model_t* const self, void* const context);
429
430
static const ccv_cnnp_model_vtab_t ccv_cnnp_concat_isa = {
431
  .build = _ccv_cnnp_concat_build,
432
  .copy = _ccv_cnnp_concat_copy,
433
};
434
435
ccv_cnnp_model_t* ccv_cnnp_concat(const int axis, const char* const name)
436
4
{
437
4
  ccv_cnnp_model_concat_t* const model_concat = (ccv_cnnp_model_concat_t*)cccalloc(1, sizeof(ccv_cnnp_model_concat_t));
438
4
  model_concat->super.isa = &ccv_cnnp_concat_isa;
439
4
  model_concat->super.input_size = 0;
440
4
  model_concat->super.outputs = &model_concat->output;
441
4
  model_concat->super.output_size = 1;
442
4
  model_concat->axis = axis;
443
4
  ccv_cnnp_model_copy_name(&model_concat->super, name);
444
4
  return (ccv_cnnp_model_t*)model_concat;
445
4
}
446
447
static ccv_cnnp_model_t* _ccv_cnnp_concat_copy(const ccv_cnnp_model_t* const super, void* const context)
448
0
{
449
0
  const ccv_cnnp_model_concat_t* const self = (const ccv_cnnp_model_concat_t*)super;
450
0
  return ccv_cnnp_concat(self->axis, self->super.name);
451
0
}
452
453
typedef struct {
454
  ccv_cnnp_model_t super;
455
  int axis;
456
  ccv_nnc_tensor_symbol_t outputs[1];
457
} ccv_cnnp_model_chunk_t;
458
459
static void _ccv_cnnp_chunk_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
460
2
{
461
2
  const ccv_cnnp_model_concat_t* const self = (const ccv_cnnp_model_concat_t*)super;
462
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_chunk_build] 1. axis: %d\n", self->axis);
463
2
  assert(input_size == 1);
464
2
  const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
465
2
  if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
466
0
  {
467
0
    PRINT(CCV_CLI_VERBOSE, "[cnnp_chunk_build] 2. input: (%d", input_params.dim[0]);
468
0
    int i;
469
0
    for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && input_params.dim[i] > 0; i++)
470
0
      PRINT(CCV_CLI_VERBOSE, ", %d", input_params.dim[i]);
471
0
    PRINT(CCV_CLI_VERBOSE, ")\n");
472
0
  }
473
2
  ccv_nnc_tensor_param_t output_params = input_params;
474
2
  int i;
475
2
  const int nd = ccv_nnc_tensor_nd(output_params.dim);
476
2
  const int axis = self->axis;
477
2
  assert(axis < nd);
478
2
  const int n = self->super.output_size;
479
2
  assert(n == output_size);
480
2
  assert(output_params.dim[axis] % n == 0);
481
2
  output_params.dim[axis] = output_params.dim[axis] / n;
482
2
  int ofs[CCV_NNC_MAX_DIM_ALLOC] = {};
483
2
  int stride[CCV_NNC_MAX_DIM_ALLOC] = {};
484
2
  ccv_nnc_tensor_get_stride(input_params.dim, stride);
485
2
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
486
2
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
487
2
  {
488
6
    for (i = 0; i < output_size; 
i++4
)
489
4
    {
490
4
      outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, stride, output_params, 0);
491
4
      ofs[axis] += output_params.dim[axis];
492
4
    }
493
2
  } else {
494
    // Otherwise, we need to check if it is permute. For permute, we cannot do alias directly.
495
    // We need to first materialize the permute and then run reshape on top of it, otherwise it will be wrong.
496
0
    int old_stride[CCV_NNC_MAX_DIM_ALLOC];
497
0
    ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], 0, old_stride);
498
    // We identify permute by checking if the stride is not in descending order.
499
    // This also covered "permute" through reshape, rather than using ccv_cnnp_permute directly.
500
0
    int i, no_permute = 1;
501
0
    for (i = 1; no_permute && i < nd; i++)
502
0
      if (old_stride[i - 1] < old_stride[i])
503
0
        no_permute = 0;
504
0
    if (no_permute)
505
0
    { // Just straightforward reshape if there is no no permute.
506
0
      for (i = 0; i < output_size; i++)
507
0
      {
508
0
        outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, old_stride, output_params, 0);
509
0
        ofs[axis] += output_params.dim[axis];
510
0
      }
511
0
    } else {
512
      // Otherwise, we first do format transform to plain tensor and then do reshape.
513
0
      ccv_nnc_tensor_symbol_t permuted = ccv_nnc_tensor_symbol_new(graph, input_params, 0);
514
0
      ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(permuted), "reshape");
515
0
      for (i = 0; i < output_size; i++)
516
0
      {
517
0
        outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, permuted, ofs, stride, output_params, 0);
518
0
        ofs[axis] += output_params.dim[axis];
519
0
      }
520
0
    }
521
0
  }
522
2
}
523
524
static ccv_cnnp_model_t* _ccv_cnnp_chunk_copy(const ccv_cnnp_model_t* const self, void* const context);
525
526
static const ccv_cnnp_model_vtab_t ccv_cnnp_chunk_isa = {
527
  .build = _ccv_cnnp_chunk_build,
528
  .copy = _ccv_cnnp_chunk_copy,
529
};
530
531
ccv_cnnp_model_t* ccv_cnnp_chunk(const int n, const int axis, const char* const name)
532
2
{
533
2
  assert(n >= 1);
534
2
  ccv_cnnp_model_chunk_t* const model_chunk = (ccv_cnnp_model_chunk_t*)cccalloc(1, sizeof(ccv_cnnp_model_chunk_t) + sizeof(ccv_nnc_tensor_symbol_t) * (n - 1));
535
2
  model_chunk->super.isa = &ccv_cnnp_chunk_isa;
536
2
  model_chunk->super.input_size = 1;
537
2
  model_chunk->super.outputs = model_chunk->outputs;
538
2
  model_chunk->super.output_size = n;
539
2
  model_chunk->axis = axis;
540
2
  ccv_cnnp_model_copy_name(&model_chunk->super, name);
541
2
  return (ccv_cnnp_model_t*)model_chunk;
542
2
}
543
544
static ccv_cnnp_model_t* _ccv_cnnp_chunk_copy(const ccv_cnnp_model_t* const super, void* const context)
545
0
{
546
0
  const ccv_cnnp_model_chunk_t* const self = (const ccv_cnnp_model_chunk_t*)super;
547
0
  return ccv_cnnp_chunk(self->super.output_size, self->axis, self->super.name);
548
0
}
549
550
typedef struct {
551
  ccv_cnnp_model_t super;
552
  ccv_nnc_tensor_symbol_t output;
553
  int format;
554
  int dim[CCV_NNC_MAX_DIM_ALLOC];
555
  int ofs[CCV_NNC_MAX_DIM_ALLOC];
556
  int stride[CCV_NNC_MAX_DIM_ALLOC];
557
} ccv_cnnp_model_reshape_t;
558
559
static void _ccv_cnnp_reshape_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
560
1.06k
{
561
1.06k
  assert(input_size == 1);
562
1.06k
  assert(output_size == 1);
563
1.06k
  ccv_cnnp_model_reshape_t* const self = (ccv_cnnp_model_reshape_t*)super;
564
1.06k
  if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
565
0
  {
566
0
    PRINT(CCV_CLI_VERBOSE, "[cnnp_reshape_build] 1. dim: (%d", self->dim[0]);
567
0
    int i;
568
0
    for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && self->dim[i] > 0; i++)
569
0
      PRINT(CCV_CLI_VERBOSE, ", %d", self->dim[i]);
570
0
    const int count = i;
571
0
    PRINT(CCV_CLI_VERBOSE, "), ofs: (%d", self->ofs[0]);
572
0
    for (i = 1; i < count; i++)
573
0
      PRINT(CCV_CLI_VERBOSE, ", %d", self->ofs[i]);
574
0
    PRINT(CCV_CLI_VERBOSE, "), stride: (%d", self->stride[0]);
575
0
    for (i = 1; i < count; i++)
576
0
      PRINT(CCV_CLI_VERBOSE, ", %d", self->stride[i]);
577
0
    PRINT(CCV_CLI_VERBOSE, ")\n");
578
0
  }
579
1.06k
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
580
1.06k
  if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
581
0
  {
582
0
    PRINT(CCV_CLI_VERBOSE, "[cnnp_reshape_build] 2. input: (%d", params.dim[0]);
583
0
    int i;
584
0
    for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && params.dim[i] > 0; i++)
585
0
      PRINT(CCV_CLI_VERBOSE, ", %d", params.dim[i]);
586
0
    PRINT(CCV_CLI_VERBOSE, ")\n");
587
0
  }
588
1.06k
  if (self->format > 0)
589
5
    params.format = self->format;
590
1.06k
  assert(ccv_nnc_dimension_count(self->dim) <= ccv_nnc_tensor_count(params));
591
1.06k
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
592
1.06k
  int stride_from_dim[CCV_NNC_MAX_DIM_ALLOC];
593
1.06k
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
594
1.06k
  {
595
1.06k
    memcpy(params.dim, self->dim, sizeof(params.dim));
596
1.06k
    int* stride;
597
1.06k
    if (self->stride[0] == 0)
598
1.06k
    {
599
1.06k
      ccv_nnc_tensor_get_stride(self->dim, stride_from_dim);
600
1.06k
      stride = stride_from_dim;
601
1.06k
    } else
602
5
      stride = self->stride;
603
1.06k
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], self->ofs, stride, params, 0);
604
1.06k
  } else {
605
    // Otherwise, we need to check if it is permute. For permute, we cannot do alias directly.
606
    // We need to first materialize the permute and then run reshape on top of it, otherwise it will be wrong.
607
1
    int old_stride[CCV_NNC_MAX_DIM_ALLOC];
608
1
    ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], 0, old_stride);
609
    // We identify permute by checking if the stride is not in descending order.
610
    // This also covered "permute" through reshape, rather than using ccv_cnnp_permute directly.
611
1
    const int nd = ccv_nnc_tensor_nd(params.dim);
612
1
    const int new_nd = ccv_nnc_tensor_nd(self->dim);
613
1
    int i, no_permute = 1;
614
    // If the new dim has different nd, or we actually have a stride, we need to check if it is no permute or not.
615
1
    if (new_nd != nd || 
(0
self->stride[0] != 00
&&
memcmp(self->stride, old_stride, sizeof(self->stride)) != 00
))
616
2
      
for (i = 1; 1
no_permute &&
i < nd1
;
i++1
)
617
1
        if (old_stride[i - 1] < old_stride[i])
618
1
          no_permute = 0;
619
1
    if (no_permute)
620
0
    { // Just straightforward reshape if there is no no permute.
621
0
      memcpy(params.dim, self->dim, sizeof(params.dim));
622
0
      int* stride;
623
0
      if (self->stride[0] == 0)
624
0
      {
625
0
        if (new_nd != nd) // Cannot use old stride.
626
0
        {
627
0
          ccv_nnc_tensor_get_stride(self->dim, stride_from_dim);
628
0
          stride = stride_from_dim;
629
0
        } else
630
0
          stride = old_stride;
631
0
      } else
632
0
        stride = self->stride;
633
0
      outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], self->ofs, stride, params, 0);
634
1
    } else {
635
      // Otherwise, we first do format transform to plain tensor and then do reshape.
636
1
      ccv_nnc_tensor_symbol_t permuted = ccv_nnc_tensor_symbol_new(graph, params, 0);
637
1
      ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(permuted), "reshape");
638
1
      memcpy(params.dim, self->dim, sizeof(params.dim));
639
1
      int* stride;
640
1
      if (self->stride[0] == 0)
641
1
      {
642
1
        ccv_nnc_tensor_get_stride(self->dim, stride_from_dim);
643
1
        stride = stride_from_dim;
644
1
      } else
645
0
        stride = self->stride;
646
      // And then we create alias against the permuted one.
647
1
      outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, permuted, self->ofs, stride, params, 0);
648
1
    }
649
1
  }
650
1.06k
}
651
652
static ccv_cnnp_model_t* _ccv_cnnp_reshape_copy(const ccv_cnnp_model_t* const super, void* const context);
653
654
static const ccv_cnnp_model_vtab_t ccv_cnnp_reshape_isa = {
655
  .build = _ccv_cnnp_reshape_build,
656
  .copy = _ccv_cnnp_reshape_copy,
657
};
658
659
ccv_cnnp_model_t* ccv_cnnp_reshape(const int format, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC], const char* const name)
660
1.06k
{
661
1.06k
  ccv_cnnp_model_reshape_t* const model_reshape = (ccv_cnnp_model_reshape_t*)cccalloc(1, sizeof(ccv_cnnp_model_reshape_t));
662
1.06k
  model_reshape->super.isa = &ccv_cnnp_reshape_isa;
663
1.06k
  model_reshape->super.input_size = 1;
664
1.06k
  model_reshape->super.outputs = &model_reshape->output;
665
1.06k
  model_reshape->super.output_size = 1;
666
1.06k
  ccv_cnnp_model_copy_name(&model_reshape->super, name);
667
1.06k
  model_reshape->format = format;
668
1.06k
  memcpy(model_reshape->dim, dim, sizeof(model_reshape->dim));
669
1.06k
  memcpy(model_reshape->ofs, ofs, sizeof(model_reshape->ofs));
670
1.06k
  if (stride[0] != 0)
671
5
    memcpy(model_reshape->stride, stride, sizeof(model_reshape->stride));
672
1.06k
  return (ccv_cnnp_model_t*)model_reshape;
673
1.06k
}
674
675
static ccv_cnnp_model_t* _ccv_cnnp_reshape_copy(const ccv_cnnp_model_t* const super, void* const context)
676
1.00k
{
677
1.00k
  const ccv_cnnp_model_reshape_t* const self = (const ccv_cnnp_model_reshape_t*)super;
678
1.00k
  return ccv_cnnp_reshape(self->format, self->dim, self->ofs, self->stride, self->super.name);
679
1.00k
}
680
681
typedef struct {
682
  ccv_cnnp_model_t super;
683
  ccv_nnc_tensor_symbol_t output;
684
  int type;
685
  int begin[CCV_NNC_MAX_DIM_ALLOC];
686
  int end[CCV_NNC_MAX_DIM_ALLOC];
687
} ccv_cnnp_model_pad_t;
688
689
static void _ccv_cnnp_pad_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
690
1
{
691
1
  assert(input_size == 1);
692
1
  assert(output_size == 1);
693
1
  ccv_cnnp_model_pad_t* const self = (ccv_cnnp_model_pad_t*)super;
694
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_pad_build] -\n");
695
1
  const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
696
1
  const int nd = ccv_nnc_tensor_nd(input_params.dim);
697
1
  ccv_nnc_tensor_param_t params = input_params;
698
1
  int i;
699
5
  for (i = 0 ; i < nd; 
i++4
)
700
4
    params.dim[i] += self->begin[i] + self->end[i];
701
1
  const ccv_nnc_tensor_symbol_t padded = ccv_nnc_tensor_symbol_new(graph, params, 0);
702
1
  ccv_nnc_cmd_t pad = CMD_PAD_FORWARD(self->type, (), ());
703
1
  memcpy(pad.info.size.dim, self->begin, sizeof(pad.info.size.dim));
704
1
  memcpy(pad.info.pad.end, self->end, sizeof(pad.info.pad.end));
705
1
  ccv_nnc_graph_exec_symbol_new(graph, pad, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(padded), "pad");
706
1
  outputs[0] = padded;
707
1
}
708
709
static ccv_cnnp_model_t* _ccv_cnnp_pad_copy(const ccv_cnnp_model_t* const super, void* const context);
710
711
static const ccv_cnnp_model_vtab_t ccv_cnnp_pad_isa = {
712
  .build = _ccv_cnnp_pad_build,
713
  .copy = _ccv_cnnp_pad_copy,
714
};
715
716
ccv_cnnp_model_t* ccv_cnnp_pad(const int type, const int begin[CCV_NNC_MAX_DIM_ALLOC], const int end[CCV_NNC_MAX_DIM_ALLOC], const char* const name)
717
1
{
718
1
  ccv_cnnp_model_pad_t* const model_pad = (ccv_cnnp_model_pad_t*)cccalloc(1, sizeof(ccv_cnnp_model_pad_t));
719
1
  model_pad->super.isa = &ccv_cnnp_pad_isa;
720
1
  model_pad->super.input_size = 1;
721
1
  model_pad->super.outputs = &model_pad->output;
722
1
  model_pad->super.output_size = 1;
723
1
  ccv_cnnp_model_copy_name(&model_pad->super, name);
724
1
  model_pad->type = type;
725
1
  memcpy(model_pad->begin, begin, sizeof(model_pad->begin));
726
1
  memcpy(model_pad->end, end, sizeof(model_pad->end));
727
1
  return (ccv_cnnp_model_t*)model_pad;
728
1
}
729
730
static ccv_cnnp_model_t* _ccv_cnnp_pad_copy(const ccv_cnnp_model_t* const super, void* const context)
731
0
{
732
0
  const ccv_cnnp_model_pad_t* const self = (const ccv_cnnp_model_pad_t*)super;
733
0
  return ccv_cnnp_pad(self->type, self->begin, self->end, self->super.name);
734
0
}
735
736
typedef struct {
737
  ccv_cnnp_model_t super;
738
  ccv_nnc_tensor_symbol_t output;
739
} ccv_cnnp_model_identity_t;
740
741
static void _ccv_cnnp_identity_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
742
0
{
743
0
  assert(input_size == 1);
744
0
  assert(output_size == 1);
745
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_identity_build] -\n");
746
0
  outputs[0] = inputs[0];
747
0
}
748
749
static ccv_cnnp_model_t* _ccv_cnnp_identity_copy(const ccv_cnnp_model_t* const super, void* const context);
750
751
static const ccv_cnnp_model_vtab_t ccv_cnnp_identity_isa = {
752
  .build = _ccv_cnnp_identity_build,
753
  .copy = _ccv_cnnp_identity_copy,
754
};
755
756
ccv_cnnp_model_t* ccv_cnnp_identity(const char* const name)
757
0
{
758
0
  ccv_cnnp_model_identity_t* const model_identity = (ccv_cnnp_model_identity_t*)cccalloc(1, sizeof(ccv_cnnp_model_identity_t));
759
0
  model_identity->super.isa = &ccv_cnnp_identity_isa;
760
0
  model_identity->super.input_size = 1;
761
0
  model_identity->super.outputs = &model_identity->output;
762
0
  model_identity->super.output_size = 1;
763
0
  ccv_cnnp_model_copy_name(&model_identity->super, name);
764
0
  return (ccv_cnnp_model_t*)model_identity;
765
0
}
766
767
static ccv_cnnp_model_t* _ccv_cnnp_identity_copy(const ccv_cnnp_model_t* const super, void* const context)
768
0
{
769
0
  const ccv_cnnp_model_identity_t* const self = (const ccv_cnnp_model_identity_t*)super;
770
0
  return ccv_cnnp_identity(self->super.name);
771
0
}
772
773
typedef struct {
774
  ccv_cnnp_model_t super;
775
  ccv_nnc_tensor_symbol_t output;
776
  int index[CCV_NNC_MAX_DIM_ALLOC];
777
} ccv_cnnp_model_permute_t;
778
779
static void _ccv_cnnp_permute_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
780
1
{
781
1
  assert(input_size == 1);
782
1
  assert(output_size == 1);
783
1
  ccv_cnnp_model_permute_t* const self = (ccv_cnnp_model_permute_t*)super;
784
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_permute_build] -\n");
785
1
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
786
1
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
787
1
  const int nd = ccv_nnc_tensor_nd(params.dim);
788
1
  int input_dim[CCV_NNC_MAX_DIM_ALLOC];
789
1
  memcpy(input_dim, params.dim, sizeof(params.dim));
790
1
  int input_stride[CCV_NNC_MAX_DIM_ALLOC] = {};
791
1
  int output_stride[CCV_NNC_MAX_DIM_ALLOC] = {};
792
1
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If it is not an alias. Find stride and permute.
793
0
  {
794
0
    ccv_nnc_tensor_get_stride(input_dim, input_stride);
795
0
    int i;
796
0
    for (i = 0; i < nd; i++)
797
0
    {
798
0
      const int idx = self->index[i];
799
0
      assert(idx >= 0 && idx < nd);
800
0
      params.dim[i] = input_dim[idx];
801
0
      output_stride[i] = input_stride[idx];
802
0
    }
803
0
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ccv_nnc_no_ofs, output_stride, params, 0);
804
1
  } else {
805
    // if it is an alias, we can get the stride from it and use that.
806
1
    int input_ofs[CCV_NNC_MAX_DIM_ALLOC];
807
1
    ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], input_ofs, input_stride);
808
1
    assert(input_stride[0] != 0);
809
1
    int output_ofs[CCV_NNC_MAX_DIM_ALLOC] = {};
810
1
    int i;
811
4
    for (i = 0; i < nd; 
i++3
)
812
3
    {
813
3
      const int idx = self->index[i];
814
3
      assert(idx >= 0 && idx < nd);
815
3
      params.dim[i] = input_dim[idx];
816
3
      output_stride[i] = input_stride[idx];
817
3
      output_ofs[i] = input_ofs[idx];
818
3
    }
819
1
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], output_ofs, output_stride, params, 0);
820
1
  }
821
1
}
822
823
static ccv_cnnp_model_t* _ccv_cnnp_permute_copy(const ccv_cnnp_model_t* const super, void* const context);
824
825
static const ccv_cnnp_model_vtab_t ccv_cnnp_permute_isa = {
826
  .build = _ccv_cnnp_permute_build,
827
  .copy = _ccv_cnnp_permute_copy,
828
};
829
830
ccv_cnnp_model_t* ccv_cnnp_permute(const int index[CCV_NNC_MAX_DIM_ALLOC], const char* const name)
831
1
{
832
1
  ccv_cnnp_model_permute_t* const model_permute = (ccv_cnnp_model_permute_t*)cccalloc(1, sizeof(ccv_cnnp_model_permute_t));
833
1
  model_permute->super.isa = &ccv_cnnp_permute_isa;
834
1
  model_permute->super.input_size = 1;
835
1
  model_permute->super.outputs = &model_permute->output;
836
1
  model_permute->super.output_size = 1;
837
1
  ccv_cnnp_model_copy_name(&model_permute->super, name);
838
1
  memcpy(model_permute->index, index, sizeof(model_permute->index));
839
1
  return (ccv_cnnp_model_t*)model_permute;
840
1
}
841
842
static ccv_cnnp_model_t* _ccv_cnnp_permute_copy(const ccv_cnnp_model_t* const super, void* const context)
843
0
{
844
0
  const ccv_cnnp_model_permute_t* const self = (const ccv_cnnp_model_permute_t*)super;
845
0
  return ccv_cnnp_permute(self->index, self->super.name);
846
0
}
847
848
typedef struct {
849
  ccv_cnnp_model_t super;
850
  int index;
851
  ccv_nnc_tensor_symbol_t output;
852
} ccv_cnnp_model_extract_t;
853
854
static void _ccv_cnnp_extract_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
855
6
{
856
6
  assert(output_size == 1);
857
6
  ccv_cnnp_model_extract_t* const self = (ccv_cnnp_model_extract_t*)super;
858
6
  PRINT(CCV_CLI_VERBOSE, "[cnnp_extract_build] index: %d\n", self->index);
859
6
  outputs[0] = inputs[self->index];
860
6
}
861
862
static ccv_cnnp_model_t* _ccv_cnnp_extract_copy(const ccv_cnnp_model_t* const self, void* const context);
863
864
static const ccv_cnnp_model_vtab_t ccv_cnnp_extract_isa = {
865
  .build = _ccv_cnnp_extract_build,
866
  .copy = _ccv_cnnp_extract_copy,
867
};
868
869
ccv_cnnp_model_t* ccv_cnnp_extract(const int index, const char* const name)
870
6
{
871
6
  ccv_cnnp_model_extract_t* const model_extract = (ccv_cnnp_model_extract_t*)cccalloc(1, sizeof(ccv_cnnp_model_extract_t));
872
6
  model_extract->index = index;
873
6
  model_extract->super.isa = &ccv_cnnp_extract_isa;
874
6
  model_extract->super.input_size = 0;
875
6
  model_extract->super.outputs = &model_extract->output;
876
6
  model_extract->super.output_size = 1;
877
6
  ccv_cnnp_model_copy_name(&model_extract->super, name);
878
6
  return (ccv_cnnp_model_t*)model_extract;
879
6
}
880
881
static ccv_cnnp_model_t* _ccv_cnnp_extract_copy(const ccv_cnnp_model_t* const super, void* const context)
882
0
{
883
0
  ccv_cnnp_model_extract_t* const self = (ccv_cnnp_model_extract_t*)super;
884
0
  return ccv_cnnp_extract(self->index, self->super.name);
885
0
}
886
887
typedef struct {
888
  ccv_cnnp_model_t super;
889
  ccv_nnc_tensor_symbol_t output;
890
} ccv_cnnp_model_flatten_t;
891
892
static void _ccv_cnnp_flatten_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
893
10
{
894
10
  PRINT(CCV_CLI_VERBOSE, "[cnnp_flatten_build] -\n");
895
10
  assert(input_size == 1);
896
10
  assert(output_size == 1);
897
10
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
898
10
  ccv_nnc_tensor_param_t output_params = params;
899
10
  memset(output_params.dim, 0, sizeof(output_params.dim));
900
10
  output_params.dim[0] = ccv_nnc_tensor_get_n(params);
901
10
  assert(output_params.dim[0] > 0);
902
10
  output_params.dim[1] = ccv_nnc_tensor_count(params) / output_params.dim[0];
903
10
  int stride[CCV_NNC_MAX_DIM_ALLOC] = {};
904
10
  ccv_nnc_tensor_get_stride(output_params.dim, stride);
905
10
  outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], DIM_ALLOC(), stride, output_params, 0);
906
10
}
907
908
static ccv_cnnp_model_t* _ccv_cnnp_flatten_copy(const ccv_cnnp_model_t* const self, void* const context);
909
910
static const ccv_cnnp_model_vtab_t ccv_cnnp_flatten_isa = {
911
  .build = _ccv_cnnp_flatten_build,
912
  .copy = _ccv_cnnp_flatten_copy,
913
};
914
915
ccv_cnnp_model_t* ccv_cnnp_flatten(const char* const name)
916
12
{
917
12
  ccv_cnnp_model_flatten_t* const model_flatten = (ccv_cnnp_model_flatten_t*)cccalloc(1, sizeof(ccv_cnnp_model_flatten_t));
918
12
  model_flatten->super.isa = &ccv_cnnp_flatten_isa;
919
12
  model_flatten->super.input_size = 1;
920
12
  model_flatten->super.outputs = &model_flatten->output;
921
12
  model_flatten->super.output_size = 1;
922
12
  ccv_cnnp_model_copy_name(&model_flatten->super, name);
923
12
  return (ccv_cnnp_model_t*)model_flatten;
924
12
}
925
926
static ccv_cnnp_model_t* _ccv_cnnp_flatten_copy(const ccv_cnnp_model_t* const self, void* const context)
927
2
{
928
2
  return ccv_cnnp_flatten(self->name);
929
2
}
930
931
// MARK - Batch Norm Layer
932
933
typedef struct {
934
  ccv_cnnp_model_t super;
935
  ccv_nnc_tensor_symbol_t output;
936
  ccv_nnc_tensor_symbol_t bias;
937
  ccv_nnc_tensor_symbol_t scale;
938
  ccv_nnc_graph_exec_symbol_t batch_norm;
939
  ccv_nnc_cmd_param_t params;
940
  ccv_array_t* zero_inits;
941
  ccv_array_t* retainables;
942
} ccv_cnnp_model_batch_norm_t;
943
944
static void _ccv_cnnp_batch_norm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
945
75
{
946
75
  assert(input_size == 1);
947
75
  assert(output_size == 1);
948
75
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
949
75
  PRINT(CCV_CLI_VERBOSE, "[cnnp_batch_norm_build] -\n");
950
75
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
951
75
  const int nd = ccv_nnc_tensor_nd(params.dim);
952
75
  ccv_nnc_tensor_param_t bias_params = params;
953
75
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
954
  // If the accuracy is not enough, bump it to 32-bit floating point.
955
75
  if (bias_params.datatype != CCV_32F && 
bias_params.datatype != CCV_64F16
)
956
16
    bias_params.datatype = CCV_32F;
957
75
  bias_params.dim[0] = nd > 1 ? ccv_nnc_tensor_get_c(params) : 
params.dim[0]0
;
958
75
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, params, 0);
959
  // Both scale and bias are shared between if this model is reused.
960
75
  if (!self->scale.graph)
961
75
    self->scale = ccv_nnc_tensor_symbol_new(graph, bias_params, "scale");
962
75
  if (!self->bias.graph)
963
75
    self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
964
75
  const ccv_nnc_tensor_symbol_t mean = ccv_nnc_tensor_symbol_new(graph, bias_params, "mean");
965
75
  const ccv_nnc_tensor_symbol_t var = ccv_nnc_tensor_symbol_new(graph, bias_params, "var");
966
  // Otherwise, notice mean, var, saved_mean, saved_inv_std are not reused.
967
75
  if (!self->zero_inits)
968
75
    self->zero_inits = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
969
75
  ccv_array_push(self->zero_inits, &mean);
970
75
  ccv_array_push(self->zero_inits, &var);
971
75
  const ccv_nnc_tensor_symbol_t out_mean = ccv_nnc_tensor_symbol_new(graph, bias_params, "out_mean");
972
75
  const ccv_nnc_tensor_symbol_t out_var = ccv_nnc_tensor_symbol_new(graph, bias_params, "out_var");
973
75
  if (!self->retainables)
974
75
    self->retainables = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
975
75
  ccv_array_push(self->retainables, &out_mean);
976
75
  ccv_array_push(self->retainables, &out_var);
977
75
  const ccv_nnc_tensor_symbol_t saved_mean = ccv_nnc_tensor_symbol_new(graph, bias_params, "saved_mean");
978
75
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, bias_params, "saved_inv_std");
979
75
  const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim));
980
75
  ccv_nnc_cmd_param_t batch_norm = self->params;
981
75
  batch_norm.bnorm.count = hw >= 0 ? CCV_NNC_MAX_DIM + 1 : 
10
;
982
75
  int i;
983
75
  batch_norm.bnorm.axis[0] = (params.format == CCV_TENSOR_FORMAT_CHWN) ? 
30
: 0;
984
75
  if (hw >= 0)
985
225
    
for (i = 0; 75
i < CCV_NNC_MAX_DIM;
i++150
)
986
150
      batch_norm.bnorm.axis[i + 1] = i + hw;
987
75
  self->params = batch_norm;
988
75
  self->batch_norm = ccv_nnc_graph_exec_symbol_new(graph, ccv_nnc_cmd(CCV_NNC_BATCH_NORM_FORWARD, 0, batch_norm, 0), TENSOR_SYMBOL_LIST(inputs[0], self->scale, self->bias, mean, var), TENSOR_SYMBOL_LIST(output, out_mean, out_var, saved_mean, saved_inv_std), "batch_norm");
989
75
  outputs[0] = output;
990
75
}
991
992
static void _ccv_cnnp_batch_norm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
993
24
{
994
24
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
995
24
  if (self->scale.graph)
996
24
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(0, 1), ccv_nnc_no_hint, 0, 0, self->scale);
997
24
  if (self->bias.graph)
998
24
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
999
24
  int i;
1000
24
  if (self->zero_inits)
1001
72
    
for (i = 0; 24
i < self->zero_inits->rnum;
i++48
)
1002
48
      initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, *(ccv_nnc_tensor_symbol_t*)ccv_array_get(self->zero_inits, i));
1003
24
}
1004
1005
static void _ccv_cnnp_batch_norm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1006
75
{
1007
75
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1008
75
  if (self->scale.graph)
1009
75
    add_to_array(parameters, self->scale, is_trainable);
1010
75
  if (self->bias.graph)
1011
75
    add_to_array(parameters, self->bias, is_trainable);
1012
75
}
1013
1014
static void _ccv_cnnp_batch_norm_add_to_output(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const outputs)
1015
75
{
1016
75
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1017
75
  int i;
1018
75
  if (self->retainables)
1019
225
    
for (i = 0; 75
i < self->retainables->rnum;
i++150
)
1020
150
    {
1021
150
      const ccv_nnc_tensor_symbol_t symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(self->retainables, i);
1022
150
      add_to_array(outputs, symbol, 0);
1023
150
    }
1024
75
}
1025
1026
static void _ccv_cnnp_batch_norm_set_is_test(ccv_cnnp_model_t* const super, const int is_test, const ccv_cnnp_cmd_updater_f updater, void* const context)
1027
32
{
1028
32
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1029
32
  if (self->batch_norm.graph)
1030
32
  {
1031
32
    self->params.bnorm.is_test = is_test;
1032
32
    updater(context, self->batch_norm, ccv_nnc_cmd(CCV_NNC_BATCH_NORM_FORWARD, 0, self->params, 0), ccv_nnc_no_hint);
1033
32
  }
1034
32
}
1035
1036
static void _ccv_cnnp_batch_norm_deinit(ccv_cnnp_model_t* const super)
1037
83
{
1038
83
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1039
83
  if (self->zero_inits)
1040
75
    ccv_array_free(self->zero_inits);
1041
83
  if (self->retainables)
1042
75
    ccv_array_free(self->retainables);
1043
83
}
1044
1045
static ccv_cnnp_model_t* _ccv_cnnp_batch_norm_copy(const ccv_cnnp_model_t* const super, void* const context);
1046
1047
static const ccv_cnnp_model_vtab_t ccv_cnnp_batch_norm_isa = {
1048
  .build = _ccv_cnnp_batch_norm_build,
1049
  .init_states = _ccv_cnnp_batch_norm_init_states,
1050
  .add_to_parameter = _ccv_cnnp_batch_norm_add_to_parameter,
1051
  .add_to_output = _ccv_cnnp_batch_norm_add_to_output,
1052
  .copy = _ccv_cnnp_batch_norm_copy,
1053
  .set_is_test = _ccv_cnnp_batch_norm_set_is_test,
1054
  .deinit = _ccv_cnnp_batch_norm_deinit,
1055
};
1056
1057
ccv_cnnp_model_t* ccv_cnnp_batch_norm(const float momentum, const float epsilon, const int is_trainable, const char* const name)
1058
83
{
1059
83
  ccv_cnnp_model_batch_norm_t* const model_batch_norm = (ccv_cnnp_model_batch_norm_t*)cccalloc(1, sizeof(ccv_cnnp_model_batch_norm_t));
1060
83
  model_batch_norm->super.isa = &ccv_cnnp_batch_norm_isa;
1061
83
  model_batch_norm->super.input_size = 1;
1062
83
  model_batch_norm->super.outputs = &model_batch_norm->output;
1063
83
  model_batch_norm->super.output_size = 1;
1064
83
  model_batch_norm->super.is_trainable = is_trainable;
1065
83
  ccv_cnnp_model_copy_name(&model_batch_norm->super, name);
1066
83
  model_batch_norm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
1067
83
  model_batch_norm->scale.graph = 0;
1068
83
  model_batch_norm->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1069
83
  model_batch_norm->bias.graph = 0;
1070
83
  model_batch_norm->params.bnorm.momentum = momentum;
1071
83
  model_batch_norm->params.bnorm.epsilon = epsilon;
1072
83
  return (ccv_cnnp_model_t*)model_batch_norm;
1073
83
}
1074
1075
static ccv_cnnp_model_t* _ccv_cnnp_batch_norm_copy(const ccv_cnnp_model_t* const super, void* const context)
1076
8
{
1077
8
  const ccv_cnnp_model_batch_norm_t* const self = (const ccv_cnnp_model_batch_norm_t*)super;
1078
8
  return ccv_cnnp_batch_norm(self->params.bnorm.momentum, self->params.bnorm.epsilon, self->super.is_trainable, self->super.name);
1079
8
}
1080
1081
// MARK - Convolution Layer
1082
1083
typedef struct {
1084
  ccv_cnnp_model_t super;
1085
  ccv_nnc_tensor_symbol_t output;
1086
  ccv_nnc_tensor_symbol_t weights;
1087
  ccv_nnc_tensor_symbol_t bias;
1088
  int groups;
1089
  int filters;
1090
  int kdim[CCV_NNC_MAX_DIM_ALLOC];
1091
  int dilation[CCV_NNC_MAX_DIM_ALLOC];
1092
  int no_bias;
1093
  int format;
1094
  ccv_nnc_hint_t hint;
1095
} ccv_cnnp_model_convolution_t;
1096
1097
static void _ccv_cnnp_convolution_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1098
114
{
1099
114
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1100
114
  PRINT(CCV_CLI_VERBOSE, "[cnnp_convolution_build] -\n");
1101
114
  assert(input_size == 1);
1102
114
  assert(output_size == 1);
1103
114
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1104
114
  int i;
1105
114
  const int nd = CCV_NNC_MAX_DIM + 2;
1106
114
  ccv_nnc_tensor_param_t weights_params = params;
1107
114
  if (self->format)
1108
0
    weights_params.format = self->format;
1109
114
  ccv_nnc_tensor_set_n(&weights_params, self->filters);
1110
114
  assert(ccv_nnc_tensor_get_c(params) % self->groups == 0);
1111
114
  ccv_nnc_tensor_set_c(&weights_params, nd, ccv_nnc_tensor_get_c(params) / self->groups);
1112
114
  const int hw = ccv_nnc_tensor_hw(weights_params, nd);
1113
114
  assert(hw >= 0);
1114
342
  
for (i = 0; 114
i < CCV_NNC_MAX_DIM;
i++228
)
1115
228
    weights_params.dim[i + hw] = self->kdim[i];
1116
114
  if (!self->weights.graph)
1117
110
    self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
1118
114
  assert(self->weights.graph == graph);
1119
114
  ccv_nnc_tensor_param_t bias_params = params;
1120
114
  if (self->format)
1121
0
    bias_params.format = self->format;
1122
114
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
1123
114
  bias_params.dim[0] = self->filters;
1124
114
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(self->groups, self->filters);
1125
342
  for (i = 0; i < CCV_NNC_MAX_DIM; 
i++228
)
1126
228
    cmd.info.size.dim[i] = self->kdim[i];
1127
114
  memcpy(cmd.info.convolution.dilation, self->dilation, sizeof(self->dilation));
1128
114
  ccv_nnc_tensor_param_t output_params;
1129
  // Dilate weight size based on the dilation factor.
1130
342
  for (i = 0; i < CCV_NNC_MAX_DIM; 
i++228
)
1131
228
    weights_params.dim[i + hw] = (self->kdim[i] - 1) * ccv_max(self->dilation[i], 1) + 1;
1132
114
  ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
1133
114
      params,
1134
114
      weights_params,
1135
114
      bias_params,
1136
114
    }, 3, self->hint, &output_params, 1);
1137
114
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1138
114
  ccv_nnc_graph_exec_symbol_t convolution;
1139
114
  if (self->no_bias)
1140
10
    convolution = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights), TENSOR_SYMBOL_LIST(output), "convolution");
1141
104
  else {
1142
104
    if (!self->bias.graph)
1143
100
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
1144
104
    convolution = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights, self->bias), TENSOR_SYMBOL_LIST(output), "convolution");
1145
104
  }
1146
114
  ccv_nnc_graph_exec_symbol_set_hint(graph, convolution, self->hint);
1147
114
  outputs[0] = output;
1148
114
}
1149
1150
static void _ccv_cnnp_convolution_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
1151
36
{
1152
36
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1153
36
  const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
1154
36
  const int n = ccv_max(ccv_nnc_tensor_get_n(weight_params), 1);
1155
36
  const int count = ccv_nnc_tensor_count(weight_params);
1156
36
  const float std = sqrtf(2) / sqrtf(count / n);
1157
36
  const float bound = sqrtf(3) * std;
1158
36
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
1159
36
  if (self->bias.graph)
1160
36
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
1161
36
}
1162
1163
static void _ccv_cnnp_convolution_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1164
114
{
1165
114
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1166
114
  add_to_array(parameters, self->weights, is_trainable);
1167
114
  if (self->bias.graph)
1168
104
    add_to_array(parameters, self->bias, is_trainable);
1169
114
}
1170
1171
static ccv_cnnp_model_t* _ccv_cnnp_convolution_copy(const ccv_cnnp_model_t* const super, void* const context);
1172
1173
static const ccv_cnnp_model_vtab_t ccv_cnnp_convolution_isa = {
1174
  .build = _ccv_cnnp_convolution_build,
1175
  .init_states = _ccv_cnnp_convolution_init_states,
1176
  .add_to_parameter = _ccv_cnnp_convolution_add_to_parameter,
1177
  .copy = _ccv_cnnp_convolution_copy,
1178
};
1179
1180
ccv_cnnp_model_t* ccv_cnnp_convolution(const int groups, const int filters, const int kdim[CCV_NNC_MAX_DIM_ALLOC], const int dilation[CCV_NNC_MAX_DIM_ALLOC], const int no_bias, ccv_nnc_hint_t hint, const int format, const int is_trainable, const char* const name)
1181
126
{
1182
126
  ccv_cnnp_model_convolution_t* const model_convolution = (ccv_cnnp_model_convolution_t*)cccalloc(1, sizeof(ccv_cnnp_model_convolution_t));
1183
126
  model_convolution->super.isa = &ccv_cnnp_convolution_isa;
1184
126
  model_convolution->super.input_size = 1;
1185
126
  model_convolution->super.outputs = &model_convolution->output;
1186
126
  model_convolution->super.output_size = 1;
1187
126
  model_convolution->super.is_trainable = is_trainable;
1188
126
  ccv_cnnp_model_copy_name(&model_convolution->super, name);
1189
126
  model_convolution->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
1190
126
  model_convolution->weights.graph = 0;
1191
126
  model_convolution->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1192
126
  model_convolution->bias.graph = 0;
1193
126
  model_convolution->groups = groups;
1194
126
  model_convolution->filters = filters;
1195
126
  memcpy(model_convolution->kdim, kdim, sizeof(model_convolution->kdim));
1196
126
  memcpy(model_convolution->dilation, dilation, sizeof(model_convolution->dilation));
1197
126
  model_convolution->no_bias = no_bias;
1198
126
  model_convolution->hint = hint;
1199
126
  model_convolution->format = format;
1200
126
  return (ccv_cnnp_model_t*)model_convolution;
1201
126
}
1202
1203
static ccv_cnnp_model_t* _ccv_cnnp_convolution_copy(const ccv_cnnp_model_t* const super, void* const context)
1204
16
{
1205
16
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1206
16
  return ccv_cnnp_convolution(self->groups, self->filters, self->kdim, self->dilation, self->no_bias, self->hint, self->format, self->super.is_trainable, self->super.name);
1207
16
}
1208
1209
// MARK - Convolution Transpose Layer
1210
1211
typedef struct {
1212
  ccv_cnnp_model_t super;
1213
  ccv_nnc_tensor_symbol_t output;
1214
  ccv_nnc_tensor_symbol_t weights;
1215
  ccv_nnc_tensor_symbol_t bias;
1216
  int groups;
1217
  int filters;
1218
  int kdim[CCV_NNC_MAX_DIM_ALLOC];
1219
  int dilation[CCV_NNC_MAX_DIM_ALLOC];
1220
  int output_padding;
1221
  int no_bias;
1222
  int format;
1223
  ccv_nnc_hint_t hint;
1224
} ccv_cnnp_model_convolution_transpose_t;
1225
1226
static void _ccv_cnnp_convolution_transpose_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1227
0
{
1228
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1229
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_convolution_transpose_build] -\n");
1230
0
  assert(input_size == 1);
1231
0
  assert(output_size == 1);
1232
0
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1233
0
  int i;
1234
0
  const int nd = CCV_NNC_MAX_DIM + 2;
1235
0
  ccv_nnc_tensor_param_t weights_params = params;
1236
0
  if (self->format)
1237
0
    weights_params.format = self->format;
1238
0
  ccv_nnc_tensor_set_n(&weights_params, ccv_nnc_tensor_get_c(params));
1239
0
  assert(ccv_nnc_tensor_get_c(params) % self->groups == 0);
1240
0
  ccv_nnc_tensor_set_c(&weights_params, nd, self->filters / self->groups);
1241
0
  const int hw = ccv_nnc_tensor_hw(weights_params, nd);
1242
0
  assert(hw >= 0);
1243
0
  for (i = 0; i < CCV_NNC_MAX_DIM; i++)
1244
0
    weights_params.dim[i + hw] = self->kdim[i];
1245
0
  if (!self->weights.graph)
1246
0
    self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
1247
0
  assert(self->weights.graph == graph);
1248
0
  ccv_nnc_tensor_param_t bias_params = params;
1249
0
  if (self->format)
1250
0
    bias_params.format = self->format;
1251
0
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
1252
0
  bias_params.dim[0] = self->filters;
1253
0
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(self->groups, self->filters, self->output_padding);
1254
0
  for (i = 0; i < CCV_NNC_MAX_DIM; i++)
1255
0
    cmd.info.size.dim[i] = self->kdim[i];
1256
0
  memcpy(cmd.info.convolution_transpose.dilation, self->dilation, sizeof(self->dilation));
1257
0
  ccv_nnc_tensor_param_t output_params;
1258
  // Dilate weight size based on the dilation factor.
1259
0
  for (i = 0; i < CCV_NNC_MAX_DIM; i++)
1260
0
    weights_params.dim[i + hw] = (self->kdim[i] - 1) * ccv_max(self->dilation[i], 1) + 1;
1261
0
  ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
1262
0
      params,
1263
0
      weights_params,
1264
0
      bias_params,
1265
0
    }, 3, self->hint, &output_params, 1);
1266
0
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1267
0
  ccv_nnc_graph_exec_symbol_t convolution_transpose;
1268
0
  if (self->no_bias)
1269
0
    convolution_transpose = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights), TENSOR_SYMBOL_LIST(output), "convolution_transpose");
1270
0
  else {
1271
0
    if (!self->bias.graph)
1272
0
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
1273
0
    convolution_transpose = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights, self->bias), TENSOR_SYMBOL_LIST(output), "convolution_transpose");
1274
0
  }
1275
0
  ccv_nnc_graph_exec_symbol_set_hint(graph, convolution_transpose, self->hint);
1276
0
  outputs[0] = output;
1277
0
}
1278
1279
static void _ccv_cnnp_convolution_transpose_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
1280
0
{
1281
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1282
0
  const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
1283
0
  const int n = ccv_max(ccv_nnc_tensor_get_n(weight_params), 1);
1284
0
  const int count = ccv_nnc_tensor_count(weight_params);
1285
0
  const float std = sqrtf(2) / sqrtf(count / n);
1286
0
  const float bound = sqrtf(3) * std;
1287
0
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
1288
0
  if (self->bias.graph)
1289
0
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
1290
0
}
1291
1292
static void _ccv_cnnp_convolution_transpose_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1293
0
{
1294
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1295
0
  add_to_array(parameters, self->weights, is_trainable);
1296
0
  if (self->bias.graph)
1297
0
    add_to_array(parameters, self->bias, is_trainable);
1298
0
}
1299
1300
static ccv_cnnp_model_t* _ccv_cnnp_convolution_transpose_copy(const ccv_cnnp_model_t* const super, void* const context);
1301
1302
static const ccv_cnnp_model_vtab_t ccv_cnnp_convolution_transpose_isa = {
1303
  .build = _ccv_cnnp_convolution_transpose_build,
1304
  .init_states = _ccv_cnnp_convolution_transpose_init_states,
1305
  .add_to_parameter = _ccv_cnnp_convolution_transpose_add_to_parameter,
1306
  .copy = _ccv_cnnp_convolution_transpose_copy,
1307
};
1308
1309
ccv_cnnp_model_t* ccv_cnnp_convolution_transpose(const int groups, const int filters, const int kdim[CCV_NNC_MAX_DIM_ALLOC], const int dilation[CCV_NNC_MAX_DIM_ALLOC], const int output_padding, const int no_bias, ccv_nnc_hint_t hint, const int format, const int is_trainable, const char* const name)
1310
0
{
1311
0
  ccv_cnnp_model_convolution_transpose_t* const model_convolution_transpose = (ccv_cnnp_model_convolution_transpose_t*)cccalloc(1, sizeof(ccv_cnnp_model_convolution_transpose_t));
1312
0
  model_convolution_transpose->super.isa = &ccv_cnnp_convolution_transpose_isa;
1313
0
  model_convolution_transpose->super.input_size = 1;
1314
0
  model_convolution_transpose->super.outputs = &model_convolution_transpose->output;
1315
0
  model_convolution_transpose->super.output_size = 1;
1316
0
  model_convolution_transpose->super.is_trainable = is_trainable;
1317
0
  ccv_cnnp_model_copy_name(&model_convolution_transpose->super, name);
1318
0
  model_convolution_transpose->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
1319
0
  model_convolution_transpose->weights.graph = 0;
1320
0
  model_convolution_transpose->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1321
0
  model_convolution_transpose->bias.graph = 0;
1322
0
  model_convolution_transpose->groups = groups;
1323
0
  model_convolution_transpose->filters = filters;
1324
0
  memcpy(model_convolution_transpose->kdim, kdim, sizeof(model_convolution_transpose->kdim));
1325
0
  memcpy(model_convolution_transpose->dilation, dilation, sizeof(model_convolution_transpose->dilation));
1326
0
  model_convolution_transpose->output_padding = output_padding;
1327
0
  model_convolution_transpose->no_bias = no_bias;
1328
0
  model_convolution_transpose->hint = hint;
1329
0
  model_convolution_transpose->format = format;
1330
0
  return (ccv_cnnp_model_t*)model_convolution_transpose;
1331
0
}
1332
1333
static ccv_cnnp_model_t* _ccv_cnnp_convolution_transpose_copy(const ccv_cnnp_model_t* const super, void* const context)
1334
0
{
1335
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1336
0
  return ccv_cnnp_convolution_transpose(self->groups, self->filters, self->kdim, self->dilation, self->output_padding, self->no_bias, self->hint, self->format, self->super.is_trainable, self->super.name);
1337
0
}
1338
1339
// MARK - Dense Layer
1340
1341
typedef struct {
1342
  ccv_cnnp_model_t super;
1343
  ccv_nnc_tensor_symbol_t output;
1344
  ccv_nnc_tensor_symbol_t weights;
1345
  ccv_nnc_tensor_symbol_t bias;
1346
  int count;
1347
  int no_bias;
1348
  int flags;
1349
} ccv_cnnp_model_dense_t;
1350
1351
static void _ccv_cnnp_dense_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1352
2.33k
{
1353
2.33k
  ccv_cnnp_model_dense_t* const self = (ccv_cnnp_model_dense_t*)super;
1354
2.33k
  PRINT(CCV_CLI_VERBOSE, "[cnnp_dense_build] -\n");
1355
2.33k
  assert(input_size == 1);
1356
2.33k
  assert(output_size == 1);
1357
2.33k
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1358
2.33k
  ccv_nnc_tensor_param_t weights_params = params;
1359
2.33k
  memset(weights_params.dim, 0, sizeof(weights_params.dim));
1360
2.33k
  weights_params.dim[0] = self->count;
1361
2.33k
  weights_params.dim[1] = params.dim[ccv_nnc_tensor_nd(params.dim) - 1];
1362
2.33k
  if (!self->weights.graph)
1363
2.31k
    self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
1364
2.33k
  assert(self->weights.graph == graph);
1365
2.33k
  ccv_nnc_tensor_param_t bias_params = params;
1366
2.33k
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
1367
2.33k
  bias_params.dim[0] = self->count;
1368
2.33k
  ccv_nnc_cmd_t cmd = {0};
1369
2.33k
  cmd.cmd = CCV_NNC_GEMM_FORWARD;
1370
2.33k
  cmd.info.blas.a[0] = 1;
1371
2.33k
  cmd.info.blas.a[1] = 1;
1372
2.33k
  cmd.info.blas.transpose_b[0] = 0;
1373
2.33k
  cmd.info.blas.transpose_b[1] = 1;
1374
2.33k
  cmd.info.blas.flags = self->flags;
1375
2.33k
  ccv_nnc_tensor_param_t output_params;
1376
2.33k
  ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
1377
2.33k
      params,
1378
2.33k
      weights_params,
1379
2.33k
      bias_params,
1380
2.33k
    }, 3, ccv_nnc_no_hint, &output_params, 1);
1381
2.33k
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1382
2.33k
  if (self->no_bias)
1383
2.08k
    ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights), TENSOR_SYMBOL_LIST(output), "dense");
1384
246
  else {
1385
246
    if (!self->bias.graph)
1386
243
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
1387
246
    ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights, self->bias), TENSOR_SYMBOL_LIST(output), "dense");
1388
246
  }
1389
2.33k
  outputs[0] = output;
1390
2.33k
}
1391
1392
static void _ccv_cnnp_dense_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
1393
79
{
1394
79
  ccv_cnnp_model_dense_t* const self = (ccv_cnnp_model_dense_t*)super;
1395
79
  const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
1396
79
  const int c = weight_params.dim[1];
1397
79
  const float std = sqrtf(2) / sqrtf(c);
1398
79
  const float bound = sqrtf(3) * std;
1399
79
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
1400
79
  if (self->bias.graph)
1401
33
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
1402
79
}
1403
1404
static void _ccv_cnnp_dense_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1405
2.33k
{
1406
2.33k
  ccv_cnnp_model_dense_t* const self = (ccv_cnnp_model_dense_t*)super;
1407
2.33k
  add_to_array(parameters, self->weights, is_trainable);
1408
2.33k
  if (self->bias.graph)
1409
246
    add_to_array(parameters, self->bias, is_trainable);
1410
2.33k
}
1411
1412
static ccv_cnnp_model_t* _ccv_cnnp_dense_copy(const ccv_cnnp_model_t* const super, void* const context);
1413
1414
static const ccv_cnnp_model_vtab_t ccv_cnnp_dense_isa = {
1415
  .build = _ccv_cnnp_dense_build,
1416
  .init_states = _ccv_cnnp_dense_init_states,
1417
  .add_to_parameter = _ccv_cnnp_dense_add_to_parameter,
1418
  .copy = _ccv_cnnp_dense_copy,
1419
};
1420
1421
ccv_cnnp_model_t* ccv_cnnp_dense(const int count, const int no_bias, const int flags, const int is_trainable, const char* const name)
1422
2.31k
{
1423
2.31k
  ccv_cnnp_model_dense_t* const model_dense = (ccv_cnnp_model_dense_t*)cccalloc(1, sizeof(ccv_cnnp_model_dense_t));
1424
2.31k
  model_dense->super.isa = &ccv_cnnp_dense_isa;
1425
2.31k
  model_dense->super.input_size = 1;
1426
2.31k
  model_dense->super.outputs = &model_dense->output;
1427
2.31k
  model_dense->super.output_size = 1;
1428
2.31k
  model_dense->super.is_trainable = is_trainable;
1429
2.31k
  ccv_cnnp_model_copy_name(&model_dense->super, name);
1430
2.31k
  model_dense->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
1431
2.31k
  model_dense->weights.graph = 0;
1432
2.31k
  model_dense->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1433
2.31k
  model_dense->bias.graph = 0;
1434
2.31k
  model_dense->count = count;
1435
2.31k
  model_dense->no_bias = no_bias;
1436
2.31k
  model_dense->flags = flags;
1437
2.31k
  return (ccv_cnnp_model_t*)model_dense;
1438
2.31k
}
1439
1440
static ccv_cnnp_model_t* _ccv_cnnp_dense_copy(const ccv_cnnp_model_t* const super, void* const context)
1441
2.20k
{
1442
2.20k
  const ccv_cnnp_model_dense_t* const self = (const ccv_cnnp_model_dense_t*)super;
1443
2.20k
  return ccv_cnnp_dense(self->count, self->no_bias, self->flags, self->super.is_trainable, self->super.name);
1444
2.20k
}
1445
1446
// MARK - Pool Layers
1447
1448
typedef struct {
1449
  ccv_cnnp_model_t super;
1450
  ccv_nnc_tensor_symbol_t output;
1451
  int kdim[CCV_NNC_MAX_DIM_ALLOC];
1452
  ccv_nnc_hint_t hint;
1453
} ccv_cnnp_model_pool_t;
1454
1455
static void _ccv_cnnp_max_pool_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1456
18
{
1457
18
  ccv_cnnp_model_pool_t* const self = (ccv_cnnp_model_pool_t*)super;
1458
18
  PRINT(CCV_CLI_VERBOSE, "[cnnp_max_pool_build] -\n");
1459
18
  assert(input_size == 1);
1460
18
  assert(output_size == 1);
1461
18
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1462
18
  const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim));
1463
18
  ccv_nnc_cmd_t cmd;
1464
18
  if (hw >= 0 && self->kdim[0] == 0 && 
self->kdim[1] == 03
)
1465
3
    cmd = CMD_MAX_POOL_FORWARD(params.dim[hw], params.dim[hw + 1]);
1466
15
  else
1467
15
    cmd = CMD_MAX_POOL_FORWARD(self->kdim[0], self->kdim[1]);
1468
18
  ccv_nnc_tensor_param_t output_params;
1469
18
  ccv_nnc_hint_tensor_auto(cmd, &params, 1, self->hint, &output_params, 1);
1470
18
  const ccv_nnc_tensor_symbol_t pool_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1471
18
  const ccv_nnc_graph_exec_symbol_t exec = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(pool_output), "max_pool");
1472
18
  ccv_nnc_graph_exec_symbol_set_hint(graph, exec, self->hint);
1473
18
  outputs[0] = pool_output;
1474
18
}
1475
1476
static ccv_cnnp_model_t* _ccv_cnnp_max_pool_copy(const ccv_cnnp_model_t* const super, void* const context);
1477
1478
static const ccv_cnnp_model_vtab_t ccv_cnnp_max_pool_isa = {
1479
  .build = _ccv_cnnp_max_pool_build,
1480
  .copy = _ccv_cnnp_max_pool_copy,
1481
};
1482
1483
ccv_cnnp_model_t* ccv_cnnp_max_pool(const int kdim[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_hint_t hint, const char* const name)
1484
24
{
1485
24
  ccv_cnnp_model_pool_t* const model_pool = (ccv_cnnp_model_pool_t*)cccalloc(1, sizeof(ccv_cnnp_model_pool_t));
1486
24
  model_pool->super.isa = &ccv_cnnp_max_pool_isa;
1487
24
  model_pool->super.input_size = 1;
1488
24
  model_pool->super.outputs = &model_pool->output;
1489
24
  model_pool->super.output_size = 1;
1490
24
  ccv_cnnp_model_copy_name(&model_pool->super, name);
1491
24
  memcpy(model_pool->kdim, kdim, sizeof(model_pool->kdim));
1492
24
  model_pool->hint = hint;
1493
24
  return (ccv_cnnp_model_t*)model_pool;
1494
24
}
1495
1496
static ccv_cnnp_model_t* _ccv_cnnp_max_pool_copy(const ccv_cnnp_model_t* const super, void* const context)
1497
6
{
1498
6
  const ccv_cnnp_model_pool_t* const self = (const ccv_cnnp_model_pool_t*)super;
1499
6
  return ccv_cnnp_max_pool(self->kdim, self->hint, self->super.name);
1500
6
}
1501
1502
static void _ccv_cnnp_average_pool_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1503
15
{
1504
15
  ccv_cnnp_model_pool_t* const self = (ccv_cnnp_model_pool_t*)super;
1505
15
  PRINT(CCV_CLI_VERBOSE, "[cnnp_average_pool_build] -\n");
1506
15
  assert(input_size == 1);
1507
15
  assert(output_size == 1);
1508
15
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1509
15
  const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim));
1510
15
  ccv_nnc_cmd_t cmd;
1511
15
  if (hw >= 0 && self->kdim[0] == 0 && 
self->kdim[1] == 02
)
1512
2
    cmd = CMD_AVERAGE_POOL_FORWARD(params.dim[hw], params.dim[hw + 1]);
1513
13
  else
1514
13
    cmd = CMD_AVERAGE_POOL_FORWARD(self->kdim[0], self->kdim[1]);
1515
15
  ccv_nnc_tensor_param_t output_params;
1516
15
  ccv_nnc_hint_tensor_auto(cmd, &params, 1, self->hint, &output_params, 1);
1517
15
  const ccv_nnc_tensor_symbol_t pool_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1518
15
  const ccv_nnc_graph_exec_symbol_t exec = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(pool_output), "average_pool");
1519
15
  ccv_nnc_graph_exec_symbol_set_hint(graph, exec, self->hint);
1520
15
  outputs[0] = pool_output;
1521
15
}
1522
1523
static ccv_cnnp_model_t* _ccv_cnnp_average_pool_copy(const ccv_cnnp_model_t* const super, void* const context);
1524
1525
static const ccv_cnnp_model_vtab_t ccv_cnnp_average_pool_isa = {
1526
  .build = _ccv_cnnp_average_pool_build,
1527
  .copy = _ccv_cnnp_average_pool_copy,
1528
};
1529
1530
ccv_cnnp_model_t* ccv_cnnp_average_pool(const int kdim[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_hint_t hint, const char* const name)
1531
17
{
1532
17
  ccv_cnnp_model_pool_t* const model_pool = (ccv_cnnp_model_pool_t*)cccalloc(1, sizeof(ccv_cnnp_model_pool_t));
1533
17
  model_pool->super.isa = &ccv_cnnp_average_pool_isa;
1534
17
  model_pool->super.input_size = 1;
1535
17
  model_pool->super.outputs = &model_pool->output;
1536
17
  model_pool->super.output_size = 1;
1537
17
  ccv_cnnp_model_copy_name(&model_pool->super, name);
1538
17
  memcpy(model_pool->kdim, kdim, sizeof(model_pool->kdim));
1539
17
  model_pool->hint = hint;
1540
17
  return (ccv_cnnp_model_t*)model_pool;
1541
17
}
1542
1543
static ccv_cnnp_model_t* _ccv_cnnp_average_pool_copy(const ccv_cnnp_model_t* const super, void* const context)
1544
2
{
1545
2
  const ccv_cnnp_model_pool_t* const self = (const ccv_cnnp_model_pool_t*)super;
1546
2
  return ccv_cnnp_average_pool(self->kdim, self->hint, self->super.name);
1547
2
}
1548
1549
// MARK - RELU Layer
1550
1551
typedef struct {
1552
  ccv_cnnp_model_t super;
1553
  ccv_nnc_tensor_symbol_t output;
1554
} ccv_cnnp_model_relu_t;
1555
1556
static void _ccv_cnnp_relu_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1557
103
{
1558
103
  PRINT(CCV_CLI_VERBOSE, "[cnnp_relu_build] -\n");
1559
103
  assert(input_size == 1);
1560
103
  assert(output_size == 1);
1561
103
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1562
103
  ccv_nnc_tensor_param_t output_params;
1563
103
  const ccv_nnc_cmd_t relu = CMD_RELU_FORWARD();
1564
103
  ccv_nnc_hint_tensor_auto(relu, (ccv_nnc_tensor_param_t []){
1565
103
      params,
1566
103
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1567
103
  const ccv_nnc_tensor_symbol_t relu_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1568
103
  ccv_nnc_graph_exec_symbol_new(graph, relu, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(relu_output), "relu");
1569
103
  outputs[0] = relu_output;
1570
103
}
1571
1572
static ccv_cnnp_model_t* _ccv_cnnp_relu_copy(const ccv_cnnp_model_t* const self, void* const context);
1573
1574
static const ccv_cnnp_model_vtab_t ccv_cnnp_relu_isa = {
1575
  .build = _ccv_cnnp_relu_build,
1576
  .copy = _ccv_cnnp_relu_copy,
1577
};
1578
1579
ccv_cnnp_model_t* ccv_cnnp_relu(const char* const name)
1580
120
{
1581
120
  ccv_cnnp_model_relu_t* const model_relu = (ccv_cnnp_model_relu_t*)cccalloc(1, sizeof(ccv_cnnp_model_relu_t));
1582
120
  model_relu->super.isa = &ccv_cnnp_relu_isa;
1583
120
  model_relu->super.input_size = 1;
1584
120
  model_relu->super.outputs = &model_relu->output;
1585
120
  model_relu->super.output_size = 1;
1586
120
  ccv_cnnp_model_copy_name(&model_relu->super, name);
1587
120
  return (ccv_cnnp_model_t*)model_relu;
1588
120
}
1589
1590
static ccv_cnnp_model_t* _ccv_cnnp_relu_copy(const ccv_cnnp_model_t* const self, void* const context)
1591
17
{
1592
17
  return ccv_cnnp_relu(self->name);
1593
17
}
1594
1595
// MARK - Sigmoid Layer
1596
1597
typedef struct {
1598
  ccv_cnnp_model_t super;
1599
  ccv_nnc_tensor_symbol_t output;
1600
} ccv_cnnp_model_sigmoid_t;
1601
1602
static void _ccv_cnnp_sigmoid_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1603
5
{
1604
5
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sigmoid_build] -\n");
1605
5
  assert(input_size == 1);
1606
5
  assert(output_size == 1);
1607
5
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1608
5
  ccv_nnc_tensor_param_t output_params;
1609
5
  const ccv_nnc_cmd_t sigmoid = CMD_SIGMOID_FORWARD();
1610
5
  ccv_nnc_hint_tensor_auto(sigmoid, (ccv_nnc_tensor_param_t []){
1611
5
      params,
1612
5
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1613
5
  const ccv_nnc_tensor_symbol_t sigmoid_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1614
5
  ccv_nnc_graph_exec_symbol_new(graph, sigmoid, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(sigmoid_output), "sigmoid");
1615
5
  outputs[0] = sigmoid_output;
1616
5
}
1617
1618
static ccv_cnnp_model_t* _ccv_cnnp_sigmoid_copy(const ccv_cnnp_model_t* const self, void* const context);
1619
1620
static const ccv_cnnp_model_vtab_t ccv_cnnp_sigmoid_isa = {
1621
  .build = _ccv_cnnp_sigmoid_build,
1622
  .copy = _ccv_cnnp_sigmoid_copy,
1623
};
1624
1625
ccv_cnnp_model_t* ccv_cnnp_sigmoid(const char* const name)
1626
5
{
1627
5
  ccv_cnnp_model_sigmoid_t* const model_sigmoid = (ccv_cnnp_model_sigmoid_t*)cccalloc(1, sizeof(ccv_cnnp_model_sigmoid_t));
1628
5
  model_sigmoid->super.isa = &ccv_cnnp_sigmoid_isa;
1629
5
  model_sigmoid->super.input_size = 1;
1630
5
  model_sigmoid->super.outputs = &model_sigmoid->output;
1631
5
  model_sigmoid->super.output_size = 1;
1632
5
  ccv_cnnp_model_copy_name(&model_sigmoid->super, name);
1633
5
  return (ccv_cnnp_model_t*)model_sigmoid;
1634
5
}
1635
1636
static ccv_cnnp_model_t* _ccv_cnnp_sigmoid_copy(const ccv_cnnp_model_t* const self, void* const context)
1637
0
{
1638
0
  return ccv_cnnp_sigmoid(self->name);
1639
0
}
1640
1641
// MARK - Tanh Layer
1642
1643
typedef struct {
1644
  ccv_cnnp_model_t super;
1645
  ccv_nnc_tensor_symbol_t output;
1646
} ccv_cnnp_model_tanh_t;
1647
1648
static void _ccv_cnnp_tanh_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1649
0
{
1650
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_tanh_build] -\n");
1651
0
  assert(input_size == 1);
1652
0
  assert(output_size == 1);
1653
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1654
0
  ccv_nnc_tensor_param_t output_params;
1655
0
  const ccv_nnc_cmd_t tanh = CMD_TANH_FORWARD();
1656
0
  ccv_nnc_hint_tensor_auto(tanh, (ccv_nnc_tensor_param_t []){
1657
0
      params,
1658
0
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1659
0
  const ccv_nnc_tensor_symbol_t tanh_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1660
0
  ccv_nnc_graph_exec_symbol_new(graph, tanh, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(tanh_output), "tanh");
1661
0
  outputs[0] = tanh_output;
1662
0
}
1663
1664
static ccv_cnnp_model_t* _ccv_cnnp_tanh_copy(const ccv_cnnp_model_t* const self, void* const context);
1665
1666
static const ccv_cnnp_model_vtab_t ccv_cnnp_tanh_isa = {
1667
  .build = _ccv_cnnp_tanh_build,
1668
  .copy = _ccv_cnnp_tanh_copy,
1669
};
1670
1671
ccv_cnnp_model_t* ccv_cnnp_tanh(const char* const name)
1672
0
{
1673
0
  ccv_cnnp_model_tanh_t* const model_tanh = (ccv_cnnp_model_tanh_t*)cccalloc(1, sizeof(ccv_cnnp_model_tanh_t));
1674
0
  model_tanh->super.isa = &ccv_cnnp_tanh_isa;
1675
0
  model_tanh->super.input_size = 1;
1676
0
  model_tanh->super.outputs = &model_tanh->output;
1677
0
  model_tanh->super.output_size = 1;
1678
0
  ccv_cnnp_model_copy_name(&model_tanh->super, name);
1679
0
  return (ccv_cnnp_model_t*)model_tanh;
1680
0
}
1681
1682
static ccv_cnnp_model_t* _ccv_cnnp_tanh_copy(const ccv_cnnp_model_t* const self, void* const context)
1683
0
{
1684
0
  return ccv_cnnp_tanh(self->name);
1685
0
}
1686
1687
// MARK - Swish Layer
1688
1689
typedef struct {
1690
  ccv_cnnp_model_t super;
1691
  ccv_nnc_tensor_symbol_t output;
1692
} ccv_cnnp_model_swish_t;
1693
1694
static void _ccv_cnnp_swish_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1695
0
{
1696
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_swish_build] -\n");
1697
0
  assert(input_size == 1);
1698
0
  assert(output_size == 1);
1699
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1700
0
  ccv_nnc_tensor_param_t output_params;
1701
0
  const ccv_nnc_cmd_t swish = CMD_SWISH_FORWARD();
1702
0
  ccv_nnc_hint_tensor_auto(swish, (ccv_nnc_tensor_param_t []){
1703
0
      params,
1704
0
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1705
0
  const ccv_nnc_tensor_symbol_t swish_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1706
0
  ccv_nnc_graph_exec_symbol_new(graph, swish, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(swish_output), "swish");
1707
0
  outputs[0] = swish_output;
1708
0
}
1709
1710
static ccv_cnnp_model_t* _ccv_cnnp_swish_copy(const ccv_cnnp_model_t* const self, void* const context);
1711
1712
static const ccv_cnnp_model_vtab_t ccv_cnnp_swish_isa = {
1713
  .build = _ccv_cnnp_swish_build,
1714
  .copy = _ccv_cnnp_swish_copy,
1715
};
1716
1717
ccv_cnnp_model_t* ccv_cnnp_swish(const char* const name)
1718
0
{
1719
0
  ccv_cnnp_model_swish_t* const model_swish = (ccv_cnnp_model_swish_t*)cccalloc(1, sizeof(ccv_cnnp_model_swish_t));
1720
0
  model_swish->super.isa = &ccv_cnnp_swish_isa;
1721
0
  model_swish->super.input_size = 1;
1722
0
  model_swish->super.outputs = &model_swish->output;
1723
0
  model_swish->super.output_size = 1;
1724
0
  ccv_cnnp_model_copy_name(&model_swish->super, name);
1725
0
  return (ccv_cnnp_model_t*)model_swish;
1726
0
}
1727
1728
static ccv_cnnp_model_t* _ccv_cnnp_swish_copy(const ccv_cnnp_model_t* const self, void* const context)
1729
0
{
1730
0
  return ccv_cnnp_swish(self->name);
1731
0
}
1732
1733
// MARK - GELU Layer
1734
1735
typedef struct {
1736
  ccv_cnnp_model_t super;
1737
  ccv_nnc_tensor_symbol_t output;
1738
  int tanh;
1739
} ccv_cnnp_model_gelu_t;
1740
1741
static void _ccv_cnnp_gelu_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1742
2
{
1743
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_gelu_build] -\n");
1744
2
  assert(input_size == 1);
1745
2
  assert(output_size == 1);
1746
2
  ccv_cnnp_model_gelu_t* const self = (ccv_cnnp_model_gelu_t*)super;
1747
2
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1748
2
  ccv_nnc_tensor_param_t output_params;
1749
2
  const ccv_nnc_cmd_t gelu = CMD_GELU_FORWARD(self->tanh);
1750
2
  ccv_nnc_hint_tensor_auto(gelu, (ccv_nnc_tensor_param_t []){
1751
2
      params,
1752
2
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1753
2
  const ccv_nnc_tensor_symbol_t gelu_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1754
2
  ccv_nnc_graph_exec_symbol_new(graph, gelu, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(gelu_output), "gelu");
1755
2
  outputs[0] = gelu_output;
1756
2
}
1757
1758
static ccv_cnnp_model_t* _ccv_cnnp_gelu_copy(const ccv_cnnp_model_t* const self, void* const context);
1759
1760
static const ccv_cnnp_model_vtab_t ccv_cnnp_gelu_isa = {
1761
  .build = _ccv_cnnp_gelu_build,
1762
  .copy = _ccv_cnnp_gelu_copy,
1763
};
1764
1765
ccv_cnnp_model_t* ccv_cnnp_gelu(const int tanh, const char* const name)
1766
1
{
1767
1
  ccv_cnnp_model_gelu_t* const model_gelu = (ccv_cnnp_model_gelu_t*)cccalloc(1, sizeof(ccv_cnnp_model_gelu_t));
1768
1
  model_gelu->super.isa = &ccv_cnnp_gelu_isa;
1769
1
  model_gelu->super.input_size = 1;
1770
1
  model_gelu->super.outputs = &model_gelu->output;
1771
1
  model_gelu->super.output_size = 1;
1772
1
  model_gelu->tanh = tanh;
1773
1
  ccv_cnnp_model_copy_name(&model_gelu->super, name);
1774
1
  return (ccv_cnnp_model_t*)model_gelu;
1775
1
}
1776
1777
static ccv_cnnp_model_t* _ccv_cnnp_gelu_copy(const ccv_cnnp_model_t* const super, void* const context)
1778
0
{
1779
0
  ccv_cnnp_model_gelu_t* const self = (ccv_cnnp_model_gelu_t*)super;
1780
0
  return ccv_cnnp_gelu(self->tanh, self->super.name);
1781
0
}
1782
1783
// MARK - Leaky ReLU Layer
1784
1785
typedef struct {
1786
  ccv_cnnp_model_t super;
1787
  ccv_nnc_tensor_symbol_t output;
1788
  float negative_slope;
1789
} ccv_cnnp_model_leaky_relu_t;
1790
1791
static void _ccv_cnnp_leaky_relu_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1792
0
{
1793
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_leaky_relu_build] -\n");
1794
0
  assert(input_size == 1);
1795
0
  assert(output_size == 1);
1796
0
  ccv_cnnp_model_leaky_relu_t* const self = (ccv_cnnp_model_leaky_relu_t*)super;
1797
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1798
0
  ccv_nnc_tensor_param_t output_params;
1799
0
  const ccv_nnc_cmd_t leaky_relu = CMD_LEAKY_RELU_FORWARD(self->negative_slope);
1800
0
  ccv_nnc_hint_tensor_auto(leaky_relu, (ccv_nnc_tensor_param_t []){
1801
0
      params,
1802
0
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1803
0
  const ccv_nnc_tensor_symbol_t leaky_relu_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1804
0
  ccv_nnc_graph_exec_symbol_new(graph, leaky_relu, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(leaky_relu_output), "leaky_relu");
1805
0
  outputs[0] = leaky_relu_output;
1806
0
}
1807
1808
static ccv_cnnp_model_t* _ccv_cnnp_leaky_relu_copy(const ccv_cnnp_model_t* const self, void* const context);
1809
1810
static const ccv_cnnp_model_vtab_t ccv_cnnp_leaky_relu_isa = {
1811
  .build = _ccv_cnnp_leaky_relu_build,
1812
  .copy = _ccv_cnnp_leaky_relu_copy,
1813
};
1814
1815
ccv_cnnp_model_t* ccv_cnnp_leaky_relu(const float negative_slope, const char* const name)
1816
0
{
1817
0
  ccv_cnnp_model_leaky_relu_t* const model_leaky_relu = (ccv_cnnp_model_leaky_relu_t*)cccalloc(1, sizeof(ccv_cnnp_model_leaky_relu_t));
1818
0
  model_leaky_relu->super.isa = &ccv_cnnp_leaky_relu_isa;
1819
0
  model_leaky_relu->super.input_size = 1;
1820
0
  model_leaky_relu->super.outputs = &model_leaky_relu->output;
1821
0
  model_leaky_relu->super.output_size = 1;
1822
0
  model_leaky_relu->negative_slope = negative_slope;
1823
0
  ccv_cnnp_model_copy_name(&model_leaky_relu->super, name);
1824
0
  return (ccv_cnnp_model_t*)model_leaky_relu;
1825
0
}
1826
1827
static ccv_cnnp_model_t* _ccv_cnnp_leaky_relu_copy(const ccv_cnnp_model_t* const super, void* const context)
1828
0
{
1829
0
  ccv_cnnp_model_leaky_relu_t* const self = (ccv_cnnp_model_leaky_relu_t*)super;
1830
0
  return ccv_cnnp_leaky_relu(self->negative_slope, self->super.name);
1831
0
}
1832
1833
// MARK - Softmax Layer
1834
1835
typedef struct {
1836
  ccv_cnnp_model_t super;
1837
  ccv_nnc_tensor_symbol_t output;
1838
} ccv_cnnp_model_softmax_t;
1839
1840
static void _ccv_cnnp_softmax_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1841
8
{
1842
8
  PRINT(CCV_CLI_VERBOSE, "[cnnp_softmax_build] -\n");
1843
8
  assert(input_size == 1);
1844
8
  assert(output_size == 1);
1845
8
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1846
8
  ccv_nnc_tensor_param_t output_params;
1847
8
  const ccv_nnc_cmd_t softmax = CMD_SOFTMAX_FORWARD();
1848
8
  ccv_nnc_hint_tensor_auto(softmax, (ccv_nnc_tensor_param_t []){
1849
8
      params,
1850
8
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1851
8
  const ccv_nnc_tensor_symbol_t softmax_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1852
8
  ccv_nnc_graph_exec_symbol_new(graph, softmax, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(softmax_output), "softmax");
1853
8
  outputs[0] = softmax_output;
1854
8
}
1855
1856
static ccv_cnnp_model_t* _ccv_cnnp_softmax_copy(const ccv_cnnp_model_t* const self, void* const context);
1857
1858
static const ccv_cnnp_model_vtab_t ccv_cnnp_softmax_isa = {
1859
  .build = _ccv_cnnp_softmax_build,
1860
  .copy = _ccv_cnnp_softmax_copy,
1861
};
1862
1863
ccv_cnnp_model_t* ccv_cnnp_softmax(const char* const name)
1864
9
{
1865
9
  ccv_cnnp_model_softmax_t* const model_softmax = (ccv_cnnp_model_softmax_t*)cccalloc(1, sizeof(ccv_cnnp_model_softmax_t));
1866
9
  model_softmax->super.isa = &ccv_cnnp_softmax_isa;
1867
9
  model_softmax->super.input_size = 1;
1868
9
  model_softmax->super.outputs = &model_softmax->output;
1869
9
  model_softmax->super.output_size = 1;
1870
9
  ccv_cnnp_model_copy_name(&model_softmax->super, name);
1871
9
  return (ccv_cnnp_model_t*)model_softmax;
1872
9
}
1873
1874
static ccv_cnnp_model_t* _ccv_cnnp_softmax_copy(const ccv_cnnp_model_t* const self, void* const context)
1875
1
{
1876
1
  return ccv_cnnp_softmax(self->name);
1877
1
}
1878
1879
// MARK - Add Layer
1880
1881
typedef struct {
1882
  ccv_cnnp_model_t super;
1883
  float p;
1884
  float q;
1885
  ccv_nnc_tensor_symbol_t output;
1886
} ccv_cnnp_model_add_t;
1887
1888
static void _ccv_cnnp_add_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1889
0
{
1890
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_add_build] -\n");
1891
0
  const ccv_cnnp_model_add_t* const self = (const ccv_cnnp_model_add_t*)super;
1892
0
  assert(input_size == 2);
1893
0
  assert(output_size == 1);
1894
0
  ccv_nnc_tensor_param_t input_params[2];
1895
0
  int i;
1896
0
  for (i = 0; i < 2; i++)
1897
0
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
1898
0
  ccv_nnc_tensor_param_t output_params;
1899
0
  const ccv_nnc_cmd_t add = CMD_ADD_FORWARD(self->p, self->q);
1900
0
  ccv_nnc_hint_tensor_auto(add, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
1901
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1902
0
  ccv_nnc_graph_exec_symbol_new(graph, add, inputs, input_size, outputs, output_size, "add");
1903
0
}
1904
1905
static ccv_cnnp_model_t* _ccv_cnnp_add_copy(const ccv_cnnp_model_t* const self, void* const context);
1906
1907
static const ccv_cnnp_model_vtab_t ccv_cnnp_add_isa = {
1908
  .build = _ccv_cnnp_add_build,
1909
  .copy = _ccv_cnnp_add_copy,
1910
};
1911
1912
ccv_cnnp_model_t* ccv_cnnp_add(const float p, const float q, const char* const name)
1913
0
{
1914
0
  ccv_cnnp_model_add_t* const model_add = (ccv_cnnp_model_add_t*)cccalloc(1, sizeof(ccv_cnnp_model_add_t));
1915
0
  model_add->super.isa = &ccv_cnnp_add_isa;
1916
0
  model_add->super.input_size = 2;
1917
0
  model_add->super.outputs = &model_add->output;
1918
0
  model_add->super.output_size = 1;
1919
0
  model_add->p = p;
1920
0
  model_add->q = q;
1921
0
  ccv_cnnp_model_copy_name(&model_add->super, name);
1922
0
  return (ccv_cnnp_model_t*)model_add;
1923
0
}
1924
1925
static ccv_cnnp_model_t* _ccv_cnnp_add_copy(const ccv_cnnp_model_t* const super, void* const context)
1926
0
{
1927
0
  const ccv_cnnp_model_add_t* const self = (const ccv_cnnp_model_add_t*)super;
1928
0
  return ccv_cnnp_add(self->p, self->q, self->super.name);
1929
0
}
1930
1931
// MARK - Mul Layer
1932
1933
typedef struct {
1934
  ccv_cnnp_model_t super;
1935
  ccv_nnc_tensor_symbol_t output;
1936
  float p;
1937
} ccv_cnnp_model_mul_t;
1938
1939
static void _ccv_cnnp_mul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1940
5
{
1941
5
  PRINT(CCV_CLI_VERBOSE, "[cnnp_mul_build] -\n");
1942
5
  const ccv_cnnp_model_mul_t* const self = (const ccv_cnnp_model_mul_t*)super;
1943
5
  assert(input_size == 2);
1944
5
  assert(output_size == 1);
1945
5
  ccv_nnc_tensor_param_t input_params[2];
1946
5
  int i;
1947
15
  for (i = 0; i < 2; 
i++10
)
1948
10
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
1949
5
  ccv_nnc_tensor_param_t output_params;
1950
5
  const ccv_nnc_cmd_t mul = CMD_MUL_FORWARD(self->p);
1951
5
  ccv_nnc_hint_tensor_auto(mul, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
1952
5
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1953
5
  ccv_nnc_graph_exec_symbol_new(graph, mul, inputs, input_size, outputs, output_size, "mul");
1954
5
}
1955
1956
static ccv_cnnp_model_t* _ccv_cnnp_mul_copy(const ccv_cnnp_model_t* const self, void* const context);
1957
1958
static const ccv_cnnp_model_vtab_t ccv_cnnp_mul_isa = {
1959
  .build = _ccv_cnnp_mul_build,
1960
  .copy = _ccv_cnnp_mul_copy,
1961
};
1962
1963
ccv_cnnp_model_t* ccv_cnnp_mul(const float p, const char* const name)
1964
4
{
1965
4
  ccv_cnnp_model_mul_t* const model_mul = (ccv_cnnp_model_mul_t*)cccalloc(1, sizeof(ccv_cnnp_model_mul_t));
1966
4
  model_mul->super.isa = &ccv_cnnp_mul_isa;
1967
4
  model_mul->super.input_size = 2;
1968
4
  model_mul->super.outputs = &model_mul->output;
1969
4
  model_mul->super.output_size = 1;
1970
4
  model_mul->p = p;
1971
4
  ccv_cnnp_model_copy_name(&model_mul->super, name);
1972
4
  return (ccv_cnnp_model_t*)model_mul;
1973
4
}
1974
1975
static ccv_cnnp_model_t* _ccv_cnnp_mul_copy(const ccv_cnnp_model_t* const super, void* const context)
1976
0
{
1977
0
  const ccv_cnnp_model_mul_t* const self = (const ccv_cnnp_model_mul_t*)super;
1978
0
  return ccv_cnnp_mul(self->p, self->super.name);
1979
0
}
1980
1981
// MARK - Scalar Mul Layer
1982
1983
typedef struct {
1984
  ccv_cnnp_model_t super;
1985
  ccv_nnc_tensor_symbol_t output;
1986
  float a;
1987
} ccv_cnnp_model_scalar_mul_t;
1988
1989
static void _ccv_cnnp_scalar_mul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1990
4
{
1991
4
  PRINT(CCV_CLI_VERBOSE, "[cnnp_scalar_mul_build] -\n");
1992
4
  assert(input_size == 1);
1993
4
  assert(output_size == 1);
1994
4
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1995
4
  ccv_nnc_tensor_param_t output_params;
1996
4
  ccv_cnnp_model_scalar_mul_t* const self = (ccv_cnnp_model_scalar_mul_t*)super;
1997
4
  const ccv_nnc_cmd_t scalar_mul = CMD_SCALAR_MUL_FORWARD(self->a);
1998
4
  ccv_nnc_hint_tensor_auto(scalar_mul, (ccv_nnc_tensor_param_t []){
1999
4
      params,
2000
4
    }, 1, ccv_nnc_no_hint, &output_params, 1);
2001
4
  const ccv_nnc_tensor_symbol_t scalar_mul_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2002
4
  ccv_nnc_graph_exec_symbol_new(graph, scalar_mul, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(scalar_mul_output), "scalar_mul");
2003
4
  outputs[0] = scalar_mul_output;
2004
4
}
2005
2006
static ccv_cnnp_model_t* _ccv_cnnp_scalar_mul_copy(const ccv_cnnp_model_t* const super, void* const context);
2007
2008
static const ccv_cnnp_model_vtab_t ccv_cnnp_scalar_mul_isa = {
2009
  .build = _ccv_cnnp_scalar_mul_build,
2010
  .copy = _ccv_cnnp_scalar_mul_copy,
2011
};
2012
2013
ccv_cnnp_model_t* ccv_cnnp_scalar_mul(const float a, const char* const name)
2014
4
{
2015
4
  ccv_cnnp_model_scalar_mul_t* const model_scalar_mul = (ccv_cnnp_model_scalar_mul_t*)cccalloc(1, sizeof(ccv_cnnp_model_scalar_mul_t));
2016
4
  model_scalar_mul->super.isa = &ccv_cnnp_scalar_mul_isa;
2017
4
  model_scalar_mul->super.input_size = 1;
2018
4
  model_scalar_mul->super.outputs = &model_scalar_mul->output;
2019
4
  model_scalar_mul->super.output_size = 1;
2020
4
  model_scalar_mul->a = a;
2021
4
  ccv_cnnp_model_copy_name(&model_scalar_mul->super, name);
2022
4
  return (ccv_cnnp_model_t*)model_scalar_mul;
2023
4
}
2024
2025
static ccv_cnnp_model_t* _ccv_cnnp_scalar_mul_copy(const ccv_cnnp_model_t* const super, void* const context)
2026
0
{
2027
0
  const ccv_cnnp_model_scalar_mul_t* const self = (const ccv_cnnp_model_scalar_mul_t*)super;
2028
0
  return ccv_cnnp_scalar_mul(self->a, self->super.name);
2029
0
}
2030
2031
// MARK - Div Layer
2032
2033
typedef struct {
2034
  ccv_cnnp_model_t super;
2035
  ccv_nnc_tensor_symbol_t output;
2036
  int reciprocal;
2037
} ccv_cnnp_model_div_t;
2038
2039
static void _ccv_cnnp_div_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2040
2
{
2041
2
  const ccv_cnnp_model_div_t* const self = (const ccv_cnnp_model_div_t*)super;
2042
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_div_build] -\n");
2043
2
  assert(output_size == 1);
2044
2
  ccv_nnc_tensor_param_t input_params[2];
2045
2
  int i;
2046
2
  ccv_nnc_tensor_param_t output_params;
2047
2
  const ccv_nnc_cmd_t div = CMD_EWDIV_FORWARD();
2048
2
  if (self->reciprocal)
2049
1
  {
2050
1
    assert(input_size == 1);
2051
1
    input_params[0] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2052
1
    input_params[1] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2053
1
    ccv_nnc_hint_tensor_auto(div, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
2054
1
    outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2055
1
    ccv_nnc_graph_exec_symbol_new(graph, div, TENSOR_SYMBOL_LIST(NO_TENSOR_SYMBOL, inputs[0]), outputs, output_size, "div");
2056
1
  } else {
2057
1
    assert(input_size == 2);
2058
3
    
for (i = 0; 1
i < 2;
i++2
)
2059
2
      input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
2060
1
    ccv_nnc_hint_tensor_auto(div, input_params, input_size, ccv_nnc_no_hint, &output_params, 1);
2061
1
    outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2062
1
    ccv_nnc_graph_exec_symbol_new(graph, div, inputs, input_size, outputs, output_size, "div");
2063
1
  }
2064
2
}
2065
2066
static ccv_cnnp_model_t* _ccv_cnnp_div_copy(const ccv_cnnp_model_t* const self, void* const context);
2067
2068
static const ccv_cnnp_model_vtab_t ccv_cnnp_div_isa = {
2069
  .build = _ccv_cnnp_div_build,
2070
  .copy = _ccv_cnnp_div_copy,
2071
};
2072
2073
ccv_cnnp_model_t* ccv_cnnp_div(const int reciprocal, const char* const name)
2074
2
{
2075
2
  ccv_cnnp_model_div_t* const model_div = (ccv_cnnp_model_div_t*)cccalloc(1, sizeof(ccv_cnnp_model_div_t));
2076
2
  model_div->super.isa = &ccv_cnnp_div_isa;
2077
2
  model_div->super.input_size = reciprocal ? 
11
:
21
;
2078
2
  model_div->super.outputs = &model_div->output;
2079
2
  model_div->super.output_size = 1;
2080
2
  model_div->reciprocal = reciprocal;
2081
2
  ccv_cnnp_model_copy_name(&model_div->super, name);
2082
2
  return (ccv_cnnp_model_t*)model_div;
2083
2
}
2084
2085
static ccv_cnnp_model_t* _ccv_cnnp_div_copy(const ccv_cnnp_model_t* const super, void* const context)
2086
0
{
2087
0
  const ccv_cnnp_model_div_t* const self = (const ccv_cnnp_model_div_t*)super;
2088
0
  return ccv_cnnp_div(self->reciprocal, self->super.name);
2089
0
}
2090
2091
// MARK - Sqrt Layer
2092
2093
typedef struct {
2094
  ccv_cnnp_model_t super;
2095
  ccv_nnc_tensor_symbol_t output;
2096
} ccv_cnnp_model_sqrt_t;
2097
2098
static void _ccv_cnnp_sqrt_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2099
0
{
2100
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sqrt_build] -\n");
2101
0
  assert(output_size == 1);
2102
0
  ccv_nnc_tensor_param_t input_params[1];
2103
0
  ccv_nnc_tensor_param_t output_params;
2104
0
  const ccv_nnc_cmd_t sqrt = CMD_EWSQRT_FORWARD();
2105
0
  assert(input_size == 1);
2106
0
  input_params[0] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2107
0
  ccv_nnc_hint_tensor_auto(sqrt, input_params, 1, ccv_nnc_no_hint, &output_params, 1);
2108
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2109
0
  ccv_nnc_graph_exec_symbol_new(graph, sqrt, inputs, 1, outputs, output_size, "sqrt");
2110
0
}
2111
2112
static ccv_cnnp_model_t* _ccv_cnnp_sqrt_copy(const ccv_cnnp_model_t* const self, void* const context);
2113
2114
static const ccv_cnnp_model_vtab_t ccv_cnnp_sqrt_isa = {
2115
  .build = _ccv_cnnp_sqrt_build,
2116
  .copy = _ccv_cnnp_sqrt_copy,
2117
};
2118
2119
ccv_cnnp_model_t* ccv_cnnp_sqrt(const char* const name)
2120
0
{
2121
0
  ccv_cnnp_model_sqrt_t* const model_sqrt = (ccv_cnnp_model_sqrt_t*)cccalloc(1, sizeof(ccv_cnnp_model_sqrt_t));
2122
0
  model_sqrt->super.isa = &ccv_cnnp_sqrt_isa;
2123
0
  model_sqrt->super.input_size = 1;
2124
0
  model_sqrt->super.outputs = &model_sqrt->output;
2125
0
  model_sqrt->super.output_size = 1;
2126
0
  ccv_cnnp_model_copy_name(&model_sqrt->super, name);
2127
0
  return (ccv_cnnp_model_t*)model_sqrt;
2128
0
}
2129
2130
static ccv_cnnp_model_t* _ccv_cnnp_sqrt_copy(const ccv_cnnp_model_t* const super, void* const context)
2131
0
{
2132
0
  const ccv_cnnp_model_sqrt_t* const self = (const ccv_cnnp_model_sqrt_t*)super;
2133
0
  return ccv_cnnp_sqrt(self->super.name);
2134
0
}
2135
2136
// MARK - Cmul Layer
2137
2138
typedef struct {
2139
  ccv_cnnp_model_t super;
2140
  ccv_nnc_tensor_symbol_t output;
2141
} ccv_cnnp_model_cmul_t;
2142
2143
static void _ccv_cnnp_cmul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2144
0
{
2145
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_cmul_build] -\n");
2146
0
  assert(input_size == 2);
2147
0
  assert(output_size == 1);
2148
0
  ccv_nnc_tensor_param_t input_params[2];
2149
0
  int i;
2150
0
  for (i = 0; i < 2; i++)
2151
0
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
2152
0
  ccv_nnc_tensor_param_t output_params;
2153
0
  const ccv_nnc_cmd_t mul = CMD_CMUL_FORWARD();
2154
0
  ccv_nnc_hint_tensor_auto(mul, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
2155
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2156
0
  ccv_nnc_graph_exec_symbol_new(graph, mul, inputs, input_size, outputs, output_size, "cmul");
2157
0
}
2158
2159
static ccv_cnnp_model_t* _ccv_cnnp_cmul_copy(const ccv_cnnp_model_t* const self, void* const context);
2160
2161
static const ccv_cnnp_model_vtab_t ccv_cnnp_cmul_isa = {
2162
  .build = _ccv_cnnp_cmul_build,
2163
  .copy = _ccv_cnnp_cmul_copy,
2164
};
2165
2166
ccv_cnnp_model_t* ccv_cnnp_cmul(const char* const name)
2167
0
{
2168
0
  ccv_cnnp_model_cmul_t* const model_cmul = (ccv_cnnp_model_cmul_t*)cccalloc(1, sizeof(ccv_cnnp_model_cmul_t));
2169
0
  model_cmul->super.isa = &ccv_cnnp_cmul_isa;
2170
0
  model_cmul->super.input_size = 2;
2171
0
  model_cmul->super.outputs = &model_cmul->output;
2172
0
  model_cmul->super.output_size = 1;
2173
0
  ccv_cnnp_model_copy_name(&model_cmul->super, name);
2174
0
  return (ccv_cnnp_model_t*)model_cmul;
2175
0
}
2176
2177
static ccv_cnnp_model_t* _ccv_cnnp_cmul_copy(const ccv_cnnp_model_t* const super, void* const context)
2178
0
{
2179
0
  return ccv_cnnp_cmul(super->name);
2180
0
}
2181
2182
// MARK - Transpose Layer
2183
2184
typedef struct {
2185
  ccv_cnnp_model_t super;
2186
  ccv_nnc_tensor_symbol_t output;
2187
  int transpose[2];
2188
} ccv_cnnp_model_transpose_t;
2189
2190
static void _ccv_cnnp_transpose_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2191
22
{
2192
22
  ccv_cnnp_model_transpose_t* const self = (ccv_cnnp_model_transpose_t*)super;
2193
22
  PRINT(CCV_CLI_VERBOSE, "[cnnp_transpose_build] (%d, %d)\n", self->transpose[0], self->transpose[1]);
2194
22
  assert(input_size == 1);
2195
22
  assert(output_size == 1);
2196
22
  if (self->transpose[0] == self->transpose[1])
2197
0
  {
2198
0
    outputs[0] = inputs[0];
2199
0
    return;
2200
0
  }
2201
22
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2202
22
  ccv_nnc_tensor_param_t output_params;
2203
22
  const ccv_nnc_cmd_t transpose = CMD_TRANSPOSE_FORWARD(self->transpose[0], self->transpose[1]);
2204
22
  ccv_nnc_hint_tensor_auto(transpose, (ccv_nnc_tensor_param_t []){
2205
22
      params,
2206
22
    }, 1, ccv_nnc_no_hint, &output_params, 1);
2207
22
  const ccv_nnc_tensor_symbol_t transpose_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2208
22
  ccv_nnc_graph_exec_symbol_new(graph, transpose, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(transpose_output), "transpose");
2209
22
  outputs[0] = transpose_output;
2210
22
}
2211
2212
static ccv_cnnp_model_t* _ccv_cnnp_transpose_copy(const ccv_cnnp_model_t* const super, void* const context);
2213
2214
static const ccv_cnnp_model_vtab_t ccv_cnnp_transpose_isa = {
2215
  .build = _ccv_cnnp_transpose_build,
2216
  .copy = _ccv_cnnp_transpose_copy,
2217
};
2218
2219
ccv_cnnp_model_t* ccv_cnnp_transpose(const int axis_a, const int axis_b, const char* const name)
2220
22
{
2221
22
  ccv_cnnp_model_transpose_t* const model_transpose = (ccv_cnnp_model_transpose_t*)cccalloc(1, sizeof(ccv_cnnp_model_transpose_t));
2222
22
  model_transpose->super.isa = &ccv_cnnp_transpose_isa;
2223
22
  model_transpose->super.input_size = 1;
2224
22
  model_transpose->super.outputs = &model_transpose->output;
2225
22
  model_transpose->super.output_size = 1;
2226
22
  model_transpose->transpose[0] = axis_a;
2227
22
  model_transpose->transpose[1] = axis_b;
2228
22
  ccv_cnnp_model_copy_name(&model_transpose->super, name);
2229
22
  return (ccv_cnnp_model_t*)model_transpose;
2230
22
}
2231
2232
static ccv_cnnp_model_t* _ccv_cnnp_transpose_copy(const ccv_cnnp_model_t* const super, void* const context)
2233
0
{
2234
0
  const ccv_cnnp_model_transpose_t* const self = (const ccv_cnnp_model_transpose_t*)super;
2235
0
  return ccv_cnnp_transpose(self->transpose[0], self->transpose[1], self->super.name);
2236
0
}
2237
2238
// MARK - Layer Norm Layer
2239
2240
typedef struct {
2241
  ccv_cnnp_model_t super;
2242
  ccv_nnc_tensor_symbol_t output;
2243
  ccv_nnc_tensor_symbol_t bias;
2244
  ccv_nnc_tensor_symbol_t scale;
2245
  ccv_nnc_cmd_param_t params;
2246
} ccv_cnnp_model_layer_norm_t;
2247
2248
static void _ccv_cnnp_layer_norm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2249
8
{
2250
8
  PRINT(CCV_CLI_VERBOSE, "[cnnp_layer_norm_build] -\n");
2251
8
  assert(input_size == 1);
2252
8
  assert(output_size == 1);
2253
8
  ccv_cnnp_model_layer_norm_t* const self = (ccv_cnnp_model_layer_norm_t*)super;
2254
8
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2255
8
  ccv_nnc_tensor_param_t bias_params = params;
2256
8
  const int nd = ccv_nnc_tensor_nd(params.dim);
2257
8
  int i;
2258
32
  for (i = 0; i < nd; 
i++24
)
2259
24
    bias_params.dim[i] = 1;
2260
16
  for (i = 0; i < self->params.lnorm.count; 
i++8
)
2261
8
    bias_params.dim[self->params.lnorm.axis[i]] = params.dim[self->params.lnorm.axis[i]];
2262
8
  if (self->params.lnorm.elementwise_affine)
2263
8
  {
2264
    // Both scale and bias are shared between if this model is reused.
2265
8
    if (!self->scale.graph)
2266
8
      self->scale = ccv_nnc_tensor_symbol_new(graph, bias_params, "scale");
2267
8
    if (!self->bias.graph)
2268
8
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
2269
8
  }
2270
8
  const ccv_nnc_cmd_t layer_norm = ccv_nnc_cmd(CCV_NNC_LAYER_NORM_FORWARD, 0, self->params, 0);
2271
8
  ccv_nnc_tensor_param_t output_params[3];
2272
8
  if (self->params.lnorm.elementwise_affine)
2273
8
    ccv_nnc_hint_tensor_auto(layer_norm, (ccv_nnc_tensor_param_t []){
2274
8
        params,
2275
8
        bias_params,
2276
8
        bias_params,
2277
8
      }, 3, ccv_nnc_no_hint, output_params, 3);
2278
0
  else
2279
0
    ccv_nnc_hint_tensor_auto(layer_norm, (ccv_nnc_tensor_param_t []){
2280
0
        params,
2281
0
      }, 1, ccv_nnc_no_hint, output_params, 3);
2282
8
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2283
8
  const ccv_nnc_tensor_symbol_t saved_mean = ccv_nnc_tensor_symbol_new(graph, output_params[1], "saved_mean");
2284
8
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, output_params[2], "saved_inv_std");
2285
8
  if (self->params.lnorm.elementwise_affine)
2286
8
    ccv_nnc_graph_exec_symbol_new(graph, layer_norm, TENSOR_SYMBOL_LIST(inputs[0], self->scale, self->bias), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "layer_norm");
2287
0
  else
2288
0
    ccv_nnc_graph_exec_symbol_new(graph, layer_norm, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "layer_norm");
2289
8
  outputs[0] = output;
2290
8
}
2291
2292
static void _ccv_cnnp_layer_norm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
2293
8
{
2294
8
  ccv_cnnp_model_layer_norm_t* const self = (ccv_cnnp_model_layer_norm_t*)super;
2295
8
  if (self->scale.graph)
2296
8
    initializer(context, CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, self->scale);
2297
8
  if (self->bias.graph)
2298
8
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
2299
8
}
2300
2301
static void _ccv_cnnp_layer_norm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
2302
8
{
2303
8
  ccv_cnnp_model_layer_norm_t* const self = (ccv_cnnp_model_layer_norm_t*)super;
2304
8
  if (self->scale.graph)
2305
8
    add_to_array(parameters, self->scale, is_trainable);
2306
8
  if (self->bias.graph)
2307
8
    add_to_array(parameters, self->bias, is_trainable);
2308
8
}
2309
2310
static ccv_cnnp_model_t* _ccv_cnnp_layer_norm_copy(const ccv_cnnp_model_t* const super, void* const context);
2311
2312
static const ccv_cnnp_model_vtab_t ccv_cnnp_layer_norm_isa = {
2313
  .build = _ccv_cnnp_layer_norm_build,
2314
  .init_states = _ccv_cnnp_layer_norm_init_states,
2315
  .add_to_parameter = _ccv_cnnp_layer_norm_add_to_parameter,
2316
  .copy = _ccv_cnnp_layer_norm_copy,
2317
};
2318
2319
ccv_cnnp_model_t* ccv_cnnp_layer_norm(const float epsilon, const int axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int elementwise_affine, const int is_trainable, const char* const name)
2320
8
{
2321
8
  ccv_cnnp_model_layer_norm_t* const model_layer_norm = (ccv_cnnp_model_layer_norm_t*)cccalloc(1, sizeof(ccv_cnnp_model_layer_norm_t));
2322
8
  model_layer_norm->super.isa = &ccv_cnnp_layer_norm_isa;
2323
8
  model_layer_norm->super.input_size = 1;
2324
8
  model_layer_norm->super.outputs = &model_layer_norm->output;
2325
8
  model_layer_norm->super.output_size = 1;
2326
8
  model_layer_norm->super.is_trainable = is_trainable;
2327
8
  ccv_cnnp_model_copy_name(&model_layer_norm->super, name);
2328
8
  model_layer_norm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
2329
8
  model_layer_norm->scale.graph = 0;
2330
8
  model_layer_norm->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
2331
8
  model_layer_norm->bias.graph = 0;
2332
8
  model_layer_norm->params.lnorm.epsilon = epsilon;
2333
8
  model_layer_norm->params.lnorm.count = axis_count;
2334
8
  model_layer_norm->params.lnorm.elementwise_affine = elementwise_affine;
2335
8
  memcpy(model_layer_norm->params.lnorm.axis, axis, sizeof(int) * axis_count);
2336
8
  return (ccv_cnnp_model_t*)model_layer_norm;
2337
8
}
2338
2339
static ccv_cnnp_model_t* _ccv_cnnp_layer_norm_copy(const ccv_cnnp_model_t* const super, void* const context)
2340
0
{
2341
0
  const ccv_cnnp_model_layer_norm_t* const self = (const ccv_cnnp_model_layer_norm_t*)super;
2342
0
  return ccv_cnnp_layer_norm(self->params.lnorm.epsilon, self->params.lnorm.axis, self->params.lnorm.count, self->params.lnorm.elementwise_affine, self->super.is_trainable, self->super.name);
2343
0
}
2344
2345
// MARK - Group Norm Layer
2346
2347
typedef struct {
2348
  ccv_cnnp_model_t super;
2349
  ccv_nnc_tensor_symbol_t output;
2350
  ccv_nnc_tensor_symbol_t bias;
2351
  ccv_nnc_tensor_symbol_t scale;
2352
  ccv_nnc_cmd_param_t params;
2353
} ccv_cnnp_model_group_norm_t;
2354
2355
static void _ccv_cnnp_group_norm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2356
0
{
2357
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_group_norm_build] -\n");
2358
0
  assert(input_size == 1);
2359
0
  assert(output_size == 1);
2360
0
  ccv_cnnp_model_group_norm_t* const self = (ccv_cnnp_model_group_norm_t*)super;
2361
0
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2362
0
  ccv_nnc_tensor_param_t bias_params = params;
2363
0
  const int nd = ccv_nnc_tensor_nd(params.dim);
2364
0
  int i;
2365
0
  for (i = 0; i < nd; i++)
2366
0
    bias_params.dim[i] = 1;
2367
0
  bias_params.dim[self->params.gnorm.group_axis] = params.dim[self->params.gnorm.group_axis];
2368
0
  if (self->params.gnorm.elementwise_affine)
2369
0
  {
2370
    // Both scale and bias are shared between if this model is reused.
2371
0
    if (!self->scale.graph)
2372
0
      self->scale = ccv_nnc_tensor_symbol_new(graph, bias_params, "scale");
2373
0
    if (!self->bias.graph)
2374
0
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
2375
0
  }
2376
0
  const ccv_nnc_cmd_t group_norm = ccv_nnc_cmd(CCV_NNC_GROUP_NORM_FORWARD, 0, self->params, 0);
2377
0
  ccv_nnc_tensor_param_t output_params[3];
2378
0
  if (self->params.gnorm.elementwise_affine)
2379
0
    ccv_nnc_hint_tensor_auto(group_norm, (ccv_nnc_tensor_param_t []){
2380
0
        params,
2381
0
        bias_params,
2382
0
        bias_params,
2383
0
      }, 3, ccv_nnc_no_hint, output_params, 3);
2384
0
  else
2385
0
    ccv_nnc_hint_tensor_auto(group_norm, (ccv_nnc_tensor_param_t []){
2386
0
        params,
2387
0
      }, 1, ccv_nnc_no_hint, output_params, 3);
2388
0
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2389
0
  const ccv_nnc_tensor_symbol_t saved_mean = ccv_nnc_tensor_symbol_new(graph, output_params[1], "saved_mean");
2390
0
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, output_params[2], "saved_inv_std");
2391
0
  if (self->params.gnorm.elementwise_affine)
2392
0
    ccv_nnc_graph_exec_symbol_new(graph, group_norm, TENSOR_SYMBOL_LIST(inputs[0], self->scale, self->bias), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "group_norm");
2393
0
  else
2394
0
    ccv_nnc_graph_exec_symbol_new(graph, group_norm, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "group_norm");
2395
0
  outputs[0] = output;
2396
0
}
2397
2398
static void _ccv_cnnp_group_norm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
2399
0
{
2400
0
  ccv_cnnp_model_group_norm_t* const self = (ccv_cnnp_model_group_norm_t*)super;
2401
0
  if (self->scale.graph)
2402
0
    initializer(context, CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, self->scale);
2403
0
  if (self->bias.graph)
2404
0
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
2405
0
}
2406
2407
static void _ccv_cnnp_group_norm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
2408
0
{
2409
0
  ccv_cnnp_model_group_norm_t* const self = (ccv_cnnp_model_group_norm_t*)super;
2410
0
  if (self->scale.graph)
2411
0
    add_to_array(parameters, self->scale, is_trainable);
2412
0
  if (self->bias.graph)
2413
0
    add_to_array(parameters, self->bias, is_trainable);
2414
0
}
2415
2416
static ccv_cnnp_model_t* _ccv_cnnp_group_norm_copy(const ccv_cnnp_model_t* const super, void* const context);
2417
2418
static const ccv_cnnp_model_vtab_t ccv_cnnp_group_norm_isa = {
2419
  .build = _ccv_cnnp_group_norm_build,
2420
  .init_states = _ccv_cnnp_group_norm_init_states,
2421
  .add_to_parameter = _ccv_cnnp_group_norm_add_to_parameter,
2422
  .copy = _ccv_cnnp_group_norm_copy,
2423
};
2424
2425
ccv_cnnp_model_t* ccv_cnnp_group_norm(const int group_axis, const int groups, const float epsilon, const int reduce_axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int elementwise_affine, const int is_trainable, const char* const name)
2426
0
{
2427
0
  ccv_cnnp_model_group_norm_t* const model_group_norm = (ccv_cnnp_model_group_norm_t*)cccalloc(1, sizeof(ccv_cnnp_model_group_norm_t));
2428
0
  model_group_norm->super.isa = &ccv_cnnp_group_norm_isa;
2429
0
  model_group_norm->super.input_size = 1;
2430
0
  model_group_norm->super.outputs = &model_group_norm->output;
2431
0
  model_group_norm->super.output_size = 1;
2432
0
  model_group_norm->super.is_trainable = is_trainable;
2433
0
  ccv_cnnp_model_copy_name(&model_group_norm->super, name);
2434
0
  model_group_norm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
2435
0
  model_group_norm->scale.graph = 0;
2436
0
  model_group_norm->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
2437
0
  model_group_norm->bias.graph = 0;
2438
0
  model_group_norm->params.gnorm.group_axis = group_axis;
2439
0
  model_group_norm->params.gnorm.groups = groups;
2440
0
  model_group_norm->params.gnorm.epsilon = epsilon;
2441
0
  model_group_norm->params.gnorm.reduce_count = axis_count;
2442
0
  model_group_norm->params.gnorm.elementwise_affine = elementwise_affine;
2443
0
  memcpy(model_group_norm->params.gnorm.reduce_axis, reduce_axis, sizeof(int) * axis_count);
2444
0
  return (ccv_cnnp_model_t*)model_group_norm;
2445
0
}
2446
2447
static ccv_cnnp_model_t* _ccv_cnnp_group_norm_copy(const ccv_cnnp_model_t* const super, void* const context)
2448
0
{
2449
0
  const ccv_cnnp_model_group_norm_t* const self = (const ccv_cnnp_model_group_norm_t*)super;
2450
0
  return ccv_cnnp_group_norm(self->params.gnorm.group_axis, self->params.gnorm.groups, self->params.gnorm.epsilon, self->params.gnorm.reduce_axis, self->params.gnorm.reduce_count, self->params.gnorm.elementwise_affine, self->super.is_trainable, self->super.name);
2451
0
}
2452
2453
// MARK - RMSNorm Layer
2454
2455
typedef struct {
2456
  ccv_cnnp_model_t super;
2457
  ccv_nnc_tensor_symbol_t output;
2458
  ccv_nnc_tensor_symbol_t scale;
2459
  ccv_nnc_cmd_param_t params;
2460
} ccv_cnnp_model_rmsnorm_t;
2461
2462
static void _ccv_cnnp_rmsnorm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2463
0
{
2464
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_rmsnorm_build] -\n");
2465
0
  assert(input_size == 1);
2466
0
  assert(output_size == 1);
2467
0
  ccv_cnnp_model_rmsnorm_t* const self = (ccv_cnnp_model_rmsnorm_t*)super;
2468
0
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2469
0
  ccv_nnc_tensor_param_t scale_params = params;
2470
0
  const int nd = ccv_nnc_tensor_nd(params.dim);
2471
0
  int i;
2472
0
  for (i = 0; i < nd; i++)
2473
0
    scale_params.dim[i] = 1;
2474
0
  for (i = 0; i < self->params.rmsnorm.count; i++)
2475
0
    scale_params.dim[self->params.rmsnorm.axis[i]] = params.dim[self->params.rmsnorm.axis[i]];
2476
  // Both scale and bias are shared between if this model is reused.
2477
0
  if (!self->scale.graph)
2478
0
    self->scale = ccv_nnc_tensor_symbol_new(graph, scale_params, "scale");
2479
0
  const ccv_nnc_cmd_t rmsnorm = ccv_nnc_cmd(CCV_NNC_RMSNORM_FORWARD, 0, self->params, 0);
2480
0
  ccv_nnc_tensor_param_t output_params[2];
2481
0
  ccv_nnc_hint_tensor_auto(rmsnorm, (ccv_nnc_tensor_param_t []){
2482
0
      params,
2483
0
      scale_params,
2484
0
    }, 2, ccv_nnc_no_hint, output_params, 2);
2485
0
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2486
0
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, output_params[1], "saved_inv_std");
2487
0
  ccv_nnc_graph_exec_symbol_new(graph, rmsnorm, TENSOR_SYMBOL_LIST(inputs[0], self->scale), TENSOR_SYMBOL_LIST(output, saved_inv_std), "rmsnorm");
2488
0
  outputs[0] = output;
2489
0
}
2490
2491
static void _ccv_cnnp_rmsnorm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
2492
0
{
2493
0
  ccv_cnnp_model_rmsnorm_t* const self = (ccv_cnnp_model_rmsnorm_t*)super;
2494
0
  if (self->scale.graph)
2495
0
    initializer(context, CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, self->scale);
2496
0
}
2497
2498
static void _ccv_cnnp_rmsnorm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
2499
0
{
2500
0
  ccv_cnnp_model_rmsnorm_t* const self = (ccv_cnnp_model_rmsnorm_t*)super;
2501
0
  if (self->scale.graph)
2502
0
    add_to_array(parameters, self->scale, is_trainable);
2503
0
}
2504
2505
static ccv_cnnp_model_t* _ccv_cnnp_rmsnorm_copy(const ccv_cnnp_model_t* const super, void* const context);
2506
2507
static const ccv_cnnp_model_vtab_t ccv_cnnp_rmsnorm_isa = {
2508
  .build = _ccv_cnnp_rmsnorm_build,
2509
  .init_states = _ccv_cnnp_rmsnorm_init_states,
2510
  .add_to_parameter = _ccv_cnnp_rmsnorm_add_to_parameter,
2511
  .copy = _ccv_cnnp_rmsnorm_copy,
2512
};
2513
2514
ccv_cnnp_model_t* ccv_cnnp_rmsnorm(const float epsilon, const int axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int is_trainable, const char* const name)
2515
0
{
2516
0
  ccv_cnnp_model_rmsnorm_t* const model_rmsnorm = (ccv_cnnp_model_rmsnorm_t*)cccalloc(1, sizeof(ccv_cnnp_model_rmsnorm_t));
2517
0
  model_rmsnorm->super.isa = &ccv_cnnp_rmsnorm_isa;
2518
0
  model_rmsnorm->super.input_size = 1;
2519
0
  model_rmsnorm->super.outputs = &model_rmsnorm->output;
2520
0
  model_rmsnorm->super.output_size = 1;
2521
0
  model_rmsnorm->super.is_trainable = is_trainable;
2522
0
  ccv_cnnp_model_copy_name(&model_rmsnorm->super, name);
2523
0
  model_rmsnorm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
2524
0
  model_rmsnorm->scale.graph = 0;
2525
0
  model_rmsnorm->params.rmsnorm.epsilon = epsilon;
2526
0
  model_rmsnorm->params.rmsnorm.count = axis_count;
2527
0
  memcpy(model_rmsnorm->params.lnorm.axis, axis, sizeof(int) * axis_count);
2528
0
  return (ccv_cnnp_model_t*)model_rmsnorm;
2529
0
}
2530
2531
static ccv_cnnp_model_t* _ccv_cnnp_rmsnorm_copy(const ccv_cnnp_model_t* const super, void* const context)
2532
0
{
2533
0
  const ccv_cnnp_model_rmsnorm_t* const self = (const ccv_cnnp_model_rmsnorm_t*)super;
2534
0
  return ccv_cnnp_rmsnorm(self->params.rmsnorm.epsilon, self->params.rmsnorm.axis, self->params.rmsnorm.count, self->super.is_trainable, self->super.name);
2535
0
}
2536
2537
// MARK - Batched Matrix Mul Layer
2538
2539
typedef struct {
2540
  ccv_cnnp_model_t super;
2541
  ccv_nnc_tensor_symbol_t output;
2542
  int transpose_a[2];
2543
  int transpose_b[2];
2544
  int flags;
2545
} ccv_cnnp_model_matmul_t;
2546
2547
static void _ccv_cnnp_matmul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2548
10
{
2549
10
  PRINT(CCV_CLI_VERBOSE, "[cnnp_matmul_build] -\n");
2550
10
  assert(input_size == 2);
2551
10
  assert(output_size == 1);
2552
10
  ccv_cnnp_model_matmul_t* const self = (ccv_cnnp_model_matmul_t*)super;
2553
10
  ccv_nnc_tensor_param_t a_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2554
10
  ccv_nnc_tensor_param_t b_params = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
2555
10
  ccv_nnc_tensor_param_t output_params;
2556
10
  ccv_nnc_cmd_t matmul = CMD_GEMM_FORWARD(self->transpose_a, self->transpose_b);
2557
10
  matmul.info.blas.flags = self->flags;
2558
10
  ccv_nnc_hint_tensor_auto(matmul, (ccv_nnc_tensor_param_t []){
2559
10
      a_params,
2560
10
      b_params,
2561
10
    }, 2, ccv_nnc_no_hint, &output_params, 1);
2562
10
  const ccv_nnc_tensor_symbol_t matmul_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2563
10
  ccv_nnc_graph_exec_symbol_new(graph, matmul, inputs, input_size, TENSOR_SYMBOL_LIST(matmul_output), "matmul");
2564
10
  outputs[0] = matmul_output;
2565
10
}
2566
2567
static ccv_cnnp_model_t* _ccv_cnnp_matmul_copy(const ccv_cnnp_model_t* const super, void* const context);
2568
2569
static const ccv_cnnp_model_vtab_t ccv_cnnp_matmul_isa = {
2570
  .build = _ccv_cnnp_matmul_build,
2571
  .copy = _ccv_cnnp_matmul_copy,
2572
};
2573
2574
ccv_cnnp_model_t* ccv_cnnp_matmul(const int transpose_a[2], const int transpose_b[2], const int flags, const char* const name)
2575
10
{
2576
10
  ccv_cnnp_model_matmul_t* const model_matmul = (ccv_cnnp_model_matmul_t*)cccalloc(1, sizeof(ccv_cnnp_model_matmul_t));
2577
10
  model_matmul->super.isa = &ccv_cnnp_matmul_isa;
2578
10
  model_matmul->super.input_size = 2;
2579
10
  model_matmul->super.outputs = &model_matmul->output;
2580
10
  model_matmul->super.output_size = 1;
2581
10
  model_matmul->transpose_a[0] = transpose_a[0];
2582
10
  model_matmul->transpose_a[1] = transpose_a[1];
2583
10
  model_matmul->transpose_b[0] = transpose_b[0];
2584
10
  model_matmul->transpose_b[1] = transpose_b[1];
2585
10
  model_matmul->flags = flags;
2586
10
  ccv_cnnp_model_copy_name(&model_matmul->super, name);
2587
10
  return (ccv_cnnp_model_t*)model_matmul;
2588
10
}
2589
2590
static ccv_cnnp_model_t* _ccv_cnnp_matmul_copy(const ccv_cnnp_model_t* const super, void* const context)
2591
1
{
2592
1
  const ccv_cnnp_model_matmul_t* const self = (const ccv_cnnp_model_matmul_t*)super;
2593
1
  return ccv_cnnp_matmul(self->transpose_a, self->transpose_b, self->flags, self->super.name);
2594
1
}
2595
2596
// MARK - Dropout Layer
2597
2598
typedef struct {
2599
  ccv_cnnp_model_t super;
2600
  ccv_nnc_tensor_symbol_t output;
2601
  ccv_nnc_graph_exec_symbol_t dropout;
2602
  float p;
2603
  int entirety;
2604
} ccv_cnnp_model_dropout_t;
2605
2606
static void _ccv_cnnp_dropout_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2607
12
{
2608
12
  PRINT(CCV_CLI_VERBOSE, "[cnnp_dropout_build] -\n");
2609
12
  assert(input_size == 1);
2610
12
  assert(output_size == 1);
2611
12
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2612
12
  ccv_nnc_tensor_param_t output_params[2];
2613
12
  ccv_cnnp_model_dropout_t* const self = (ccv_cnnp_model_dropout_t*)super;
2614
12
  const ccv_nnc_cmd_t dropout = CMD_DROPOUT_FORWARD(self->p, self->entirety);
2615
12
  ccv_nnc_hint_tensor_auto(dropout, (ccv_nnc_tensor_param_t []){
2616
12
      params,
2617
12
    }, 1, ccv_nnc_no_hint, output_params, 2);
2618
12
  const ccv_nnc_tensor_symbol_t dropout_output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2619
12
  const ccv_nnc_tensor_symbol_t mask = ccv_nnc_tensor_symbol_new(graph, output_params[1], "mask");
2620
12
  self->dropout = ccv_nnc_graph_exec_symbol_new(graph, dropout, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(dropout_output, mask), "dropout");
2621
12
  outputs[0] = dropout_output;
2622
12
}
2623
2624
static void _ccv_cnnp_dropout_set_is_test(ccv_cnnp_model_t* const super, const int is_test, const ccv_cnnp_cmd_updater_f updater, void* const context)
2625
24
{
2626
24
  ccv_cnnp_model_dropout_t* const self = (ccv_cnnp_model_dropout_t*)super;
2627
24
  if (self->dropout.graph)
2628
24
  {
2629
24
    if (is_test)
2630
      // During test, the dropout is not applied. Data transfer is perfect because if these are the same tensor, it will skip.
2631
12
      updater(context, self->dropout, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint);
2632
12
    else
2633
12
      updater(context, self->dropout, CMD_DROPOUT_FORWARD(self->p, self->entirety), ccv_nnc_no_hint);
2634
24
  }
2635
24
}
2636
2637
static ccv_cnnp_model_t* _ccv_cnnp_dropout_copy(const ccv_cnnp_model_t* const super, void* const context);
2638
2639
static const ccv_cnnp_model_vtab_t ccv_cnnp_dropout_isa = {
2640
  .build = _ccv_cnnp_dropout_build,
2641
  .set_is_test = _ccv_cnnp_dropout_set_is_test,
2642
  .copy = _ccv_cnnp_dropout_copy,
2643
};
2644
2645
ccv_cnnp_model_t* ccv_cnnp_dropout(const float p, const int entirety, const char* const name)
2646
12
{
2647
12
  ccv_cnnp_model_dropout_t* const model_dropout = (ccv_cnnp_model_dropout_t*)cccalloc(1, sizeof(ccv_cnnp_model_dropout_t));
2648
12
  model_dropout->super.isa = &ccv_cnnp_dropout_isa;
2649
12
  model_dropout->super.input_size = 1;
2650
12
  model_dropout->super.outputs = &model_dropout->output;
2651
12
  model_dropout->super.output_size = 1;
2652
12
  model_dropout->p = p;
2653
12
  model_dropout->entirety = entirety;
2654
12
  ccv_cnnp_model_copy_name(&model_dropout->super, name);
2655
12
  return (ccv_cnnp_model_t*)model_dropout;
2656
12
}
2657
2658
static ccv_cnnp_model_t* _ccv_cnnp_dropout_copy(const ccv_cnnp_model_t* const super, void* const context)
2659
0
{
2660
0
  const ccv_cnnp_model_dropout_t* const self = (const ccv_cnnp_model_dropout_t*)super;
2661
0
  return ccv_cnnp_dropout(self->p, self->entirety, self->super.name);
2662
0
}
2663
2664
// MARK - Masked Fill Layer
2665
2666
typedef struct {
2667
  ccv_cnnp_model_t super;
2668
  ccv_nnc_tensor_symbol_t output;
2669
  float eq;
2670
  float fill;
2671
} ccv_cnnp_model_masked_fill_t;
2672
2673
static void _ccv_cnnp_masked_fill_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2674
4
{
2675
4
  PRINT(CCV_CLI_VERBOSE, "[cnnp_masked_fill_build] -\n");
2676
4
  assert(input_size == 2);
2677
4
  assert(output_size == 1);
2678
4
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2679
4
  ccv_cnnp_model_masked_fill_t* const self = (ccv_cnnp_model_masked_fill_t*)super;
2680
4
  const ccv_nnc_tensor_symbol_t masked_fill_output = ccv_nnc_tensor_symbol_new(graph, params, 0);
2681
4
  ccv_nnc_graph_exec_symbol_new(graph, CMD_MASKED_FILL_FORWARD(self->eq, self->fill), TENSOR_SYMBOL_LIST(inputs[0], inputs[1]), TENSOR_SYMBOL_LIST(masked_fill_output), "masked_fill");
2682
4
  outputs[0] = masked_fill_output;
2683
4
}
2684
2685
static ccv_cnnp_model_t* _ccv_cnnp_masked_fill_copy(const ccv_cnnp_model_t* const super, void* const context);
2686
2687
static const ccv_cnnp_model_vtab_t ccv_cnnp_masked_fill_isa = {
2688
  .build = _ccv_cnnp_masked_fill_build,
2689
  .copy = _ccv_cnnp_masked_fill_copy,
2690
};
2691
2692
ccv_cnnp_model_t* ccv_cnnp_masked_fill(const float eq, const float fill, const char* const name)
2693
4
{
2694
4
  ccv_cnnp_model_masked_fill_t* const model_masked_fill = (ccv_cnnp_model_masked_fill_t*)cccalloc(1, sizeof(ccv_cnnp_model_masked_fill_t));
2695
4
  model_masked_fill->super.isa = &ccv_cnnp_masked_fill_isa;
2696
4
  model_masked_fill->super.input_size = 2;
2697
4
  model_masked_fill->super.outputs = &model_masked_fill->output;
2698
4
  model_masked_fill->super.output_size = 1;
2699
4
  model_masked_fill->eq = eq;
2700
4
  model_masked_fill->fill = fill;
2701
4
  ccv_cnnp_model_copy_name(&model_masked_fill->super, name);
2702
4
  return (ccv_cnnp_model_t*)model_masked_fill;
2703
4
}
2704
2705
static ccv_cnnp_model_t* _ccv_cnnp_masked_fill_copy(const ccv_cnnp_model_t* const super, void* const context)
2706
0
{
2707
0
  const ccv_cnnp_model_masked_fill_t* const self = (const ccv_cnnp_model_masked_fill_t*)super;
2708
0
  return ccv_cnnp_masked_fill(self->eq, self->fill, self->super.name);
2709
0
}
2710
2711
// MARK - Index Select Layer
2712
2713
typedef struct {
2714
  ccv_cnnp_model_t super;
2715
  ccv_nnc_tensor_symbol_t output;
2716
} ccv_cnnp_model_index_select_t;
2717
2718
static void _ccv_cnnp_index_select_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2719
2
{
2720
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_index_select_build] -\n");
2721
2
  assert(input_size == 2);
2722
2
  assert(output_size == 1);
2723
2
  const ccv_nnc_tensor_param_t vocab_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2724
2
  const ccv_nnc_tensor_param_t index_params = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
2725
2
  ccv_nnc_tensor_param_t output_params;
2726
2
  const ccv_nnc_cmd_t index_select = CMD_INDEX_SELECT_FORWARD();
2727
2
  ccv_nnc_hint_tensor_auto(index_select, (ccv_nnc_tensor_param_t []){
2728
2
      vocab_params,
2729
2
      index_params,
2730
2
    }, 2, ccv_nnc_no_hint, &output_params, 1);
2731
2
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2732
2
  ccv_nnc_graph_exec_symbol_new(graph, index_select, TENSOR_SYMBOL_LIST(inputs[0], inputs[1]), TENSOR_SYMBOL_LIST(output), "index_select");
2733
2
  outputs[0] = output;
2734
2
}
2735
2736
static ccv_cnnp_model_t* _ccv_cnnp_index_select_copy(const ccv_cnnp_model_t* const super, void* const context);
2737
2738
static const ccv_cnnp_model_vtab_t ccv_cnnp_index_select_isa = {
2739
  .build = _ccv_cnnp_index_select_build,
2740
  .copy = _ccv_cnnp_index_select_copy,
2741
};
2742
2743
ccv_cnnp_model_t* ccv_cnnp_index_select(const char* const name)
2744
2
{
2745
2
  ccv_cnnp_model_index_select_t* const model_index_select = (ccv_cnnp_model_index_select_t*)cccalloc(1, sizeof(ccv_cnnp_model_index_select_t));
2746
2
  model_index_select->super.isa = &ccv_cnnp_index_select_isa;
2747
2
  model_index_select->super.input_size = 2;
2748
2
  model_index_select->super.outputs = &model_index_select->output;
2749
2
  model_index_select->super.output_size = 1;
2750
2
  ccv_cnnp_model_copy_name(&model_index_select->super, name);
2751
2
  return (ccv_cnnp_model_t*)model_index_select;
2752
2
}
2753
2754
static ccv_cnnp_model_t* _ccv_cnnp_index_select_copy(const ccv_cnnp_model_t* const super, void* const context)
2755
0
{
2756
0
  ccv_cnnp_model_index_select_t* const self = (ccv_cnnp_model_index_select_t*)super;
2757
0
  return ccv_cnnp_index_select(self->super.name);
2758
0
}
2759
2760
// MARK - Embedding Layer
2761
2762
typedef struct {
2763
  ccv_cnnp_model_t super;
2764
  ccv_nnc_tensor_symbol_t output;
2765
  ccv_nnc_tensor_symbol_t vocab;
2766
  int datatype;
2767
  int vocab_size;
2768
  int embed_size;
2769
} ccv_cnnp_model_embedding_t;
2770
2771
static void _ccv_cnnp_embedding_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2772
1
{
2773
1
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
2774
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_embedding_build] vocab_size: %d, embed_size: %d\n", self->vocab_size, self->embed_size);
2775
1
  assert(input_size == 1);
2776
1
  assert(output_size == 1);
2777
1
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2778
1
  ccv_nnc_tensor_param_t vocab_params = params;
2779
1
  memset(vocab_params.dim, 0, sizeof(vocab_params.dim));
2780
1
  vocab_params.datatype = self->datatype;
2781
1
  vocab_params.dim[0] = self->vocab_size;
2782
1
  vocab_params.dim[1] = self->embed_size;
2783
1
  if (!self->vocab.graph)
2784
1
    self->vocab = ccv_nnc_tensor_symbol_new(graph, vocab_params, "vocab");
2785
1
  assert(self->vocab.graph == graph);
2786
1
  ccv_nnc_tensor_param_t output_params;
2787
1
  const ccv_nnc_cmd_t embedding = CMD_INDEX_SELECT_FORWARD();
2788
1
  ccv_nnc_hint_tensor_auto(embedding, (ccv_nnc_tensor_param_t []){
2789
1
      vocab_params,
2790
1
      params,
2791
1
    }, 2, ccv_nnc_no_hint, &output_params, 1);
2792
1
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2793
1
  ccv_nnc_graph_exec_symbol_new(graph, embedding, TENSOR_SYMBOL_LIST(self->vocab, inputs[0]), TENSOR_SYMBOL_LIST(output), "embedding");
2794
1
  outputs[0] = output;
2795
1
}
2796
2797
static void _ccv_cnnp_embedding_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
2798
1
{
2799
1
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
2800
1
  const float std = sqrtf(2) / sqrtf(self->vocab_size + self->embed_size);
2801
1
  const float bound = sqrtf(3) * std;
2802
1
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->vocab);
2803
1
}
2804
2805
static void _ccv_cnnp_embedding_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
2806
1
{
2807
1
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
2808
1
  add_to_array(parameters, self->vocab, is_trainable);
2809
1
}
2810
2811
static ccv_cnnp_model_t* _ccv_cnnp_embedding_copy(const ccv_cnnp_model_t* const super, void* const context);
2812
2813
static const ccv_cnnp_model_vtab_t ccv_cnnp_embedding_isa = {
2814
  .build = _ccv_cnnp_embedding_build,
2815
  .init_states = _ccv_cnnp_embedding_init_states,
2816
  .add_to_parameter = _ccv_cnnp_embedding_add_to_parameter,
2817
  .copy = _ccv_cnnp_embedding_copy,
2818
};
2819
2820
ccv_cnnp_model_t* ccv_cnnp_embedding(const int datatype, const int vocab_size, const int embed_size, const int is_trainable, const char* const name)
2821
1
{
2822
1
  ccv_cnnp_model_embedding_t* const model_embedding = (ccv_cnnp_model_embedding_t*)cccalloc(1, sizeof(ccv_cnnp_model_embedding_t));
2823
1
  model_embedding->super.isa = &ccv_cnnp_embedding_isa;
2824
1
  model_embedding->super.input_size = 1;
2825
1
  model_embedding->super.outputs = &model_embedding->output;
2826
1
  model_embedding->super.output_size = 1;
2827
1
  model_embedding->super.is_trainable = is_trainable;
2828
1
  ccv_cnnp_model_copy_name(&model_embedding->super, name);
2829
1
  model_embedding->vocab.d = CCV_NNC_NO_TENSOR_SYMBOL;
2830
1
  model_embedding->vocab.graph = 0;
2831
1
  assert(datatype == CCV_32F || datatype == CCV_16F);
2832
1
  model_embedding->datatype = datatype;
2833
1
  assert(vocab_size > 0);
2834
1
  model_embedding->vocab_size = vocab_size;
2835
1
  assert(embed_size > 0);
2836
1
  model_embedding->embed_size = embed_size;
2837
1
  return (ccv_cnnp_model_t*)model_embedding;
2838
1
}
2839
2840
static ccv_cnnp_model_t* _ccv_cnnp_embedding_copy(const ccv_cnnp_model_t* const super, void* const context)
2841
0
{
2842
0
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
2843
0
  return ccv_cnnp_embedding(self->datatype, self->vocab_size, self->embed_size, self->super.is_trainable, self->super.name);
2844
0
}
2845
2846
// MARK - Pool Layers
2847
2848
typedef struct {
2849
  ccv_cnnp_model_t super;
2850
  ccv_nnc_tensor_symbol_t output;
2851
  int type;
2852
  float width_scale;
2853
  float height_scale;
2854
  int align_corners;
2855
} ccv_cnnp_model_upsample_t;
2856
2857
static void _ccv_cnnp_upsample_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2858
3
{
2859
3
  PRINT(CCV_CLI_VERBOSE, "[cnnp_upsample_build] -\n");
2860
3
  assert(input_size == 1);
2861
3
  assert(output_size == 1);
2862
3
  ccv_cnnp_model_upsample_t* const self = (ccv_cnnp_model_upsample_t*)super;
2863
3
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2864
3
  ccv_nnc_cmd_t cmd = CMD_UPSAMPLE_FORWARD(self->type, self->width_scale, self->height_scale, self->align_corners);
2865
3
  ccv_nnc_tensor_param_t output_params;
2866
3
  ccv_nnc_hint_tensor_auto(cmd, &params, 1, ccv_nnc_no_hint, &output_params, 1);
2867
3
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2868
3
  ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(output), "upsample");
2869
3
  outputs[0] = output;
2870
3
}
2871
2872
static ccv_cnnp_model_t* _ccv_cnnp_upsample_copy(const ccv_cnnp_model_t* const super, void* const context);
2873
2874
static const ccv_cnnp_model_vtab_t ccv_cnnp_upsample_isa = {
2875
  .build = _ccv_cnnp_upsample_build,
2876
  .copy = _ccv_cnnp_upsample_copy,
2877
};
2878
2879
ccv_cnnp_model_t* ccv_cnnp_upsample(const int type, const float width_scale, const float height_scale, const int align_corners, const char* const name)
2880
3
{
2881
3
  ccv_cnnp_model_upsample_t* const model_upsample = (ccv_cnnp_model_upsample_t*)cccalloc(1, sizeof(ccv_cnnp_model_upsample_t));
2882
3
  model_upsample->super.isa = &ccv_cnnp_upsample_isa;
2883
3
  model_upsample->super.input_size = 1;
2884
3
  model_upsample->super.outputs = &model_upsample->output;
2885
3
  model_upsample->super.output_size = 1;
2886
3
  ccv_cnnp_model_copy_name(&model_upsample->super, name);
2887
3
  assert(type == CCV_NNC_UPSAMPLE_NEAREST || type == CCV_NNC_UPSAMPLE_BILINEAR);
2888
3
  model_upsample->type = type;
2889
3
  model_upsample->width_scale = width_scale;
2890
3
  model_upsample->height_scale = height_scale;
2891
3
  model_upsample->align_corners = align_corners;
2892
3
  return (ccv_cnnp_model_t*)model_upsample;
2893
3
}
2894
2895
static ccv_cnnp_model_t* _ccv_cnnp_upsample_copy(const ccv_cnnp_model_t* const super, void* const context)
2896
0
{
2897
0
  const ccv_cnnp_model_upsample_t* const self = (const ccv_cnnp_model_upsample_t*)super;
2898
0
  return ccv_cnnp_upsample(self->type, self->width_scale, self->height_scale, self->align_corners, self->super.name);
2899
0
}
2900
2901
// MARK - Reduce Sum Layer
2902
2903
typedef struct {
2904
  ccv_cnnp_model_t super;
2905
  int axis[CCV_NNC_MAX_DIM_ALLOC];
2906
  int count;
2907
  ccv_nnc_tensor_symbol_t output;
2908
} ccv_cnnp_model_reduce_sum_t;
2909
2910
static void _ccv_cnnp_reduce_sum_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2911
1
{
2912
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_sum_build] -\n");
2913
1
  const ccv_cnnp_model_reduce_sum_t* const self = (const ccv_cnnp_model_reduce_sum_t*)super;
2914
1
  assert(input_size == 1);
2915
1
  assert(output_size == 1);
2916
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2917
1
  ccv_nnc_tensor_param_t output_params;
2918
1
  ccv_nnc_cmd_t reduce_sum = CMD_REDUCE_SUM_FORWARD();
2919
1
  int i;
2920
2
  for (i = 0; i < self->count; 
i++1
)
2921
1
    reduce_sum.info.reduce.axis[i] = self->axis[i];
2922
1
  reduce_sum.info.reduce.count = self->count;
2923
1
  ccv_nnc_hint_tensor_auto(reduce_sum, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
2924
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2925
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_sum, inputs, input_size, outputs, output_size, "reduce_sum");
2926
1
}
2927
2928
static ccv_cnnp_model_t* _ccv_cnnp_reduce_sum_copy(const ccv_cnnp_model_t* const self, void* const context);
2929
2930
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_sum_isa = {
2931
  .build = _ccv_cnnp_reduce_sum_build,
2932
  .copy = _ccv_cnnp_reduce_sum_copy,
2933
};
2934
2935
ccv_cnnp_model_t* ccv_cnnp_reduce_sum(const int* const axis, const int axis_count, const char* const name)
2936
1
{
2937
1
  ccv_cnnp_model_reduce_sum_t* const model_reduce_sum = (ccv_cnnp_model_reduce_sum_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_sum_t));
2938
1
  model_reduce_sum->super.isa = &ccv_cnnp_reduce_sum_isa;
2939
1
  model_reduce_sum->super.input_size = 1;
2940
1
  model_reduce_sum->super.outputs = &model_reduce_sum->output;
2941
1
  model_reduce_sum->super.output_size = 1;
2942
1
  ccv_cnnp_model_copy_name(&model_reduce_sum->super, name);
2943
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
2944
1
  int i;
2945
2
  for (i = 0; i < axis_count; 
i++1
)
2946
1
    model_reduce_sum->axis[i] = axis[i];
2947
1
  model_reduce_sum->count = axis_count;
2948
1
  return (ccv_cnnp_model_t*)model_reduce_sum;
2949
1
}
2950
2951
static ccv_cnnp_model_t* _ccv_cnnp_reduce_sum_copy(const ccv_cnnp_model_t* const super, void* const context)
2952
0
{
2953
0
  const ccv_cnnp_model_reduce_sum_t* const self = (const ccv_cnnp_model_reduce_sum_t*)super;
2954
0
  return ccv_cnnp_reduce_sum(self->axis, self->count, self->super.name);
2955
0
}
2956
2957
// MARK - Reduce Mean Layer
2958
2959
typedef struct {
2960
  ccv_cnnp_model_t super;
2961
  int axis[CCV_NNC_MAX_DIM_ALLOC];
2962
  int count;
2963
  ccv_nnc_tensor_symbol_t output;
2964
} ccv_cnnp_model_reduce_mean_t;
2965
2966
static void _ccv_cnnp_reduce_mean_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2967
1
{
2968
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_mean_build] -\n");
2969
1
  const ccv_cnnp_model_reduce_mean_t* const self = (const ccv_cnnp_model_reduce_mean_t*)super;
2970
1
  assert(input_size == 1);
2971
1
  assert(output_size == 1);
2972
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2973
1
  ccv_nnc_tensor_param_t output_params;
2974
1
  ccv_nnc_cmd_t reduce_mean = CMD_REDUCE_MEAN_FORWARD();
2975
1
  int i;
2976
2
  for (i = 0; i < self->count; 
i++1
)
2977
1
    reduce_mean.info.reduce.axis[i] = self->axis[i];
2978
1
  reduce_mean.info.reduce.count = self->count;
2979
1
  ccv_nnc_hint_tensor_auto(reduce_mean, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
2980
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2981
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_mean, inputs, input_size, outputs, output_size, "reduce_mean");
2982
1
}
2983
2984
static ccv_cnnp_model_t* _ccv_cnnp_reduce_mean_copy(const ccv_cnnp_model_t* const self, void* const context);
2985
2986
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_mean_isa = {
2987
  .build = _ccv_cnnp_reduce_mean_build,
2988
  .copy = _ccv_cnnp_reduce_mean_copy,
2989
};
2990
2991
ccv_cnnp_model_t* ccv_cnnp_reduce_mean(const int* const axis, const int axis_count, const char* const name)
2992
1
{
2993
1
  ccv_cnnp_model_reduce_mean_t* const model_reduce_mean = (ccv_cnnp_model_reduce_mean_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_mean_t));
2994
1
  model_reduce_mean->super.isa = &ccv_cnnp_reduce_mean_isa;
2995
1
  model_reduce_mean->super.input_size = 1;
2996
1
  model_reduce_mean->super.outputs = &model_reduce_mean->output;
2997
1
  model_reduce_mean->super.output_size = 1;
2998
1
  ccv_cnnp_model_copy_name(&model_reduce_mean->super, name);
2999
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3000
1
  int i;
3001
2
  for (i = 0; i < axis_count; 
i++1
)
3002
1
    model_reduce_mean->axis[i] = axis[i];
3003
1
  model_reduce_mean->count = axis_count;
3004
1
  return (ccv_cnnp_model_t*)model_reduce_mean;
3005
1
}
3006
3007
static ccv_cnnp_model_t* _ccv_cnnp_reduce_mean_copy(const ccv_cnnp_model_t* const super, void* const context)
3008
0
{
3009
0
  const ccv_cnnp_model_reduce_mean_t* const self = (const ccv_cnnp_model_reduce_mean_t*)super;
3010
0
  return ccv_cnnp_reduce_mean(self->axis, self->count, self->super.name);
3011
0
}
3012
3013
// MARK - Reduce Max Layer
3014
3015
typedef struct {
3016
  ccv_cnnp_model_t super;
3017
  int axis[CCV_NNC_MAX_DIM_ALLOC];
3018
  int count;
3019
  ccv_nnc_tensor_symbol_t output;
3020
} ccv_cnnp_model_reduce_max_t;
3021
3022
static void _ccv_cnnp_reduce_max_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3023
1
{
3024
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_max_build] -\n");
3025
1
  const ccv_cnnp_model_reduce_max_t* const self = (const ccv_cnnp_model_reduce_max_t*)super;
3026
1
  assert(input_size == 1);
3027
1
  assert(output_size == 1);
3028
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3029
1
  ccv_nnc_tensor_param_t output_params;
3030
1
  ccv_nnc_cmd_t reduce_max = CMD_REDUCE_MAX_FORWARD();
3031
1
  int i;
3032
2
  for (i = 0; i < self->count; 
i++1
)
3033
1
    reduce_max.info.reduce.axis[i] = self->axis[i];
3034
1
  reduce_max.info.reduce.count = self->count;
3035
1
  ccv_nnc_hint_tensor_auto(reduce_max, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3036
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3037
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_max, inputs, input_size, outputs, output_size, "reduce_max");
3038
1
}
3039
3040
static ccv_cnnp_model_t* _ccv_cnnp_reduce_max_copy(const ccv_cnnp_model_t* const self, void* const context);
3041
3042
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_max_isa = {
3043
  .build = _ccv_cnnp_reduce_max_build,
3044
  .copy = _ccv_cnnp_reduce_max_copy,
3045
};
3046
3047
ccv_cnnp_model_t* ccv_cnnp_reduce_max(const int* const axis, const int axis_count, const char* const name)
3048
1
{
3049
1
  ccv_cnnp_model_reduce_max_t* const model_reduce_max = (ccv_cnnp_model_reduce_max_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_max_t));
3050
1
  model_reduce_max->super.isa = &ccv_cnnp_reduce_max_isa;
3051
1
  model_reduce_max->super.input_size = 1;
3052
1
  model_reduce_max->super.outputs = &model_reduce_max->output;
3053
1
  model_reduce_max->super.output_size = 1;
3054
1
  ccv_cnnp_model_copy_name(&model_reduce_max->super, name);
3055
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3056
1
  int i;
3057
2
  for (i = 0; i < axis_count; 
i++1
)
3058
1
    model_reduce_max->axis[i] = axis[i];
3059
1
  model_reduce_max->count = axis_count;
3060
1
  return (ccv_cnnp_model_t*)model_reduce_max;
3061
1
}
3062
3063
static ccv_cnnp_model_t* _ccv_cnnp_reduce_max_copy(const ccv_cnnp_model_t* const super, void* const context)
3064
0
{
3065
0
  const ccv_cnnp_model_reduce_max_t* const self = (const ccv_cnnp_model_reduce_max_t*)super;
3066
0
  return ccv_cnnp_reduce_max(self->axis, self->count, self->super.name);
3067
0
}
3068
3069
// MARK - Reduce Min Layer
3070
3071
typedef struct {
3072
  ccv_cnnp_model_t super;
3073
  int axis[CCV_NNC_MAX_DIM_ALLOC];
3074
  int count;
3075
  ccv_nnc_tensor_symbol_t output;
3076
} ccv_cnnp_model_reduce_min_t;
3077
3078
static void _ccv_cnnp_reduce_min_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3079
1
{
3080
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_min_build] -\n");
3081
1
  const ccv_cnnp_model_reduce_min_t* const self = (const ccv_cnnp_model_reduce_min_t*)super;
3082
1
  assert(input_size == 1);
3083
1
  assert(output_size == 1);
3084
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3085
1
  ccv_nnc_tensor_param_t output_params;
3086
1
  ccv_nnc_cmd_t reduce_min = CMD_REDUCE_MIN_FORWARD();
3087
1
  int i;
3088
2
  for (i = 0; i < self->count; 
i++1
)
3089
1
    reduce_min.info.reduce.axis[i] = self->axis[i];
3090
1
  reduce_min.info.reduce.count = self->count;
3091
1
  ccv_nnc_hint_tensor_auto(reduce_min, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3092
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3093
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_min, inputs, input_size, outputs, output_size, "reduce_min");
3094
1
}
3095
3096
static ccv_cnnp_model_t* _ccv_cnnp_reduce_min_copy(const ccv_cnnp_model_t* const self, void* const context);
3097
3098
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_min_isa = {
3099
  .build = _ccv_cnnp_reduce_min_build,
3100
  .copy = _ccv_cnnp_reduce_min_copy,
3101
};
3102
3103
ccv_cnnp_model_t* ccv_cnnp_reduce_min(const int* const axis, const int axis_count, const char* const name)
3104
1
{
3105
1
  ccv_cnnp_model_reduce_min_t* const model_reduce_min = (ccv_cnnp_model_reduce_min_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_min_t));
3106
1
  model_reduce_min->super.isa = &ccv_cnnp_reduce_min_isa;
3107
1
  model_reduce_min->super.input_size = 1;
3108
1
  model_reduce_min->super.outputs = &model_reduce_min->output;
3109
1
  model_reduce_min->super.output_size = 1;
3110
1
  ccv_cnnp_model_copy_name(&model_reduce_min->super, name);
3111
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3112
1
  int i;
3113
2
  for (i = 0; i < axis_count; 
i++1
)
3114
1
    model_reduce_min->axis[i] = axis[i];
3115
1
  model_reduce_min->count = axis_count;
3116
1
  return (ccv_cnnp_model_t*)model_reduce_min;
3117
1
}
3118
3119
static ccv_cnnp_model_t* _ccv_cnnp_reduce_min_copy(const ccv_cnnp_model_t* const super, void* const context)
3120
0
{
3121
0
  const ccv_cnnp_model_reduce_min_t* const self = (const ccv_cnnp_model_reduce_min_t*)super;
3122
0
  return ccv_cnnp_reduce_min(self->axis, self->count, self->super.name);
3123
0
}
3124
3125
// MARK - Reduce Norm2 Layer
3126
3127
typedef struct {
3128
  ccv_cnnp_model_t super;
3129
  int axis[CCV_NNC_MAX_DIM_ALLOC];
3130
  int count;
3131
  ccv_nnc_tensor_symbol_t output;
3132
} ccv_cnnp_model_reduce_norm2_t;
3133
3134
static void _ccv_cnnp_reduce_norm2_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3135
1
{
3136
1
  const ccv_cnnp_model_reduce_norm2_t* const self = (const ccv_cnnp_model_reduce_norm2_t*)super;
3137
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_norm2_build] -\n");
3138
1
  assert(input_size == 1);
3139
1
  assert(output_size == 1);
3140
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3141
1
  ccv_nnc_tensor_param_t output_params;
3142
1
  ccv_nnc_cmd_t reduce_norm2 = CMD_REDUCE_NORM2_FORWARD();
3143
1
  int i;
3144
2
  for (i = 0; i < self->count; 
i++1
)
3145
1
    reduce_norm2.info.reduce.axis[i] = self->axis[i];
3146
1
  reduce_norm2.info.reduce.count = self->count;
3147
1
  ccv_nnc_hint_tensor_auto(reduce_norm2, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3148
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3149
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_norm2, inputs, input_size, outputs, output_size, "reduce_norm2");
3150
1
}
3151
3152
static ccv_cnnp_model_t* _ccv_cnnp_reduce_norm2_copy(const ccv_cnnp_model_t* const self, void* const context);
3153
3154
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_norm2_isa = {
3155
  .build = _ccv_cnnp_reduce_norm2_build,
3156
  .copy = _ccv_cnnp_reduce_norm2_copy,
3157
};
3158
3159
ccv_cnnp_model_t* ccv_cnnp_reduce_norm2(const int* const axis, const int axis_count, const char* const name)
3160
1
{
3161
1
  ccv_cnnp_model_reduce_norm2_t* const model_reduce_norm2 = (ccv_cnnp_model_reduce_norm2_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_norm2_t));
3162
1
  model_reduce_norm2->super.isa = &ccv_cnnp_reduce_norm2_isa;
3163
1
  model_reduce_norm2->super.input_size = 1;
3164
1
  model_reduce_norm2->super.outputs = &model_reduce_norm2->output;
3165
1
  model_reduce_norm2->super.output_size = 1;
3166
1
  ccv_cnnp_model_copy_name(&model_reduce_norm2->super, name);
3167
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3168
1
  int i;
3169
2
  for (i = 0; i < axis_count; 
i++1
)
3170
1
    model_reduce_norm2->axis[i] = axis[i];
3171
1
  model_reduce_norm2->count = axis_count;
3172
1
  return (ccv_cnnp_model_t*)model_reduce_norm2;
3173
1
}
3174
3175
static ccv_cnnp_model_t* _ccv_cnnp_reduce_norm2_copy(const ccv_cnnp_model_t* const super, void* const context)
3176
0
{
3177
0
  const ccv_cnnp_model_reduce_norm2_t* const self = (const ccv_cnnp_model_reduce_norm2_t*)super;
3178
0
  return ccv_cnnp_reduce_norm2(self->axis, self->count, self->super.name);
3179
0
}
3180
3181
// MARK - Argmax Layer
3182
3183
typedef struct {
3184
  ccv_cnnp_model_t super;
3185
  int axis;
3186
  ccv_nnc_tensor_symbol_t output;
3187
} ccv_cnnp_model_argmax_t;
3188
3189
static void _ccv_cnnp_argmax_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3190
1
{
3191
1
  const ccv_cnnp_model_argmax_t* const self = (const ccv_cnnp_model_argmax_t*)super;
3192
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_argmax_build] -\n");
3193
1
  assert(input_size == 1);
3194
1
  assert(output_size == 1);
3195
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3196
1
  ccv_nnc_tensor_param_t output_params;
3197
1
  ccv_nnc_cmd_t argmax = CMD_ARGMAX_FORWARD();
3198
1
  argmax.info.reduce.axis[0] = self->axis;
3199
1
  argmax.info.reduce.count = 1;
3200
1
  ccv_nnc_hint_tensor_auto(argmax, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3201
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3202
1
  ccv_nnc_graph_exec_symbol_new(graph, argmax, inputs, input_size, outputs, output_size, "argmax");
3203
1
}
3204
3205
static ccv_cnnp_model_t* _ccv_cnnp_argmax_copy(const ccv_cnnp_model_t* const self, void* const context);
3206
3207
static const ccv_cnnp_model_vtab_t ccv_cnnp_argmax_isa = {
3208
  .build = _ccv_cnnp_argmax_build,
3209
  .copy = _ccv_cnnp_argmax_copy,
3210
};
3211
3212
ccv_cnnp_model_t* ccv_cnnp_argmax(const int axis, const char* const name)
3213
1
{
3214
1
  ccv_cnnp_model_argmax_t* const model_argmax = (ccv_cnnp_model_argmax_t*)cccalloc(1, sizeof(ccv_cnnp_model_argmax_t));
3215
1
  model_argmax->super.isa = &ccv_cnnp_argmax_isa;
3216
1
  model_argmax->super.input_size = 1;
3217
1
  model_argmax->super.outputs = &model_argmax->output;
3218
1
  model_argmax->super.output_size = 1;
3219
1
  ccv_cnnp_model_copy_name(&model_argmax->super, name);
3220
1
  model_argmax->axis = axis;
3221
1
  return (ccv_cnnp_model_t*)model_argmax;
3222
1
}
3223
3224
static ccv_cnnp_model_t* _ccv_cnnp_argmax_copy(const ccv_cnnp_model_t* const super, void* const context)
3225
0
{
3226
0
  const ccv_cnnp_model_argmax_t* const self = (const ccv_cnnp_model_argmax_t*)super;
3227
0
  return ccv_cnnp_argmax(self->axis, self->super.name);
3228
0
}
3229
3230
// MARK - Argmin Layer
3231
3232
typedef struct {
3233
  ccv_cnnp_model_t super;
3234
  int axis;
3235
  ccv_nnc_tensor_symbol_t output;
3236
} ccv_cnnp_model_argmin_t;
3237
3238
static void _ccv_cnnp_argmin_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3239
1
{
3240
1
  const ccv_cnnp_model_argmin_t* const self = (const ccv_cnnp_model_argmin_t*)super;
3241
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_argmin_build] -\n");
3242
1
  assert(input_size == 1);
3243
1
  assert(output_size == 1);
3244
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3245
1
  ccv_nnc_tensor_param_t output_params;
3246
1
  ccv_nnc_cmd_t argmin = CMD_ARGMIN_FORWARD();
3247
1
  argmin.info.reduce.axis[0] = self->axis;
3248
1
  argmin.info.reduce.count = 1;
3249
1
  ccv_nnc_hint_tensor_auto(argmin, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3250
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3251
1
  ccv_nnc_graph_exec_symbol_new(graph, argmin, inputs, input_size, outputs, output_size, "argmin");
3252
1
}
3253
3254
static ccv_cnnp_model_t* _ccv_cnnp_argmin_copy(const ccv_cnnp_model_t* const self, void* const context);
3255
3256
static const ccv_cnnp_model_vtab_t ccv_cnnp_argmin_isa = {
3257
  .build = _ccv_cnnp_argmin_build,
3258
  .copy = _ccv_cnnp_argmin_copy,
3259
};
3260
3261
ccv_cnnp_model_t* ccv_cnnp_argmin(const int axis, const char* const name)
3262
1
{
3263
1
  ccv_cnnp_model_argmin_t* const model_argmin = (ccv_cnnp_model_argmin_t*)cccalloc(1, sizeof(ccv_cnnp_model_argmin_t));
3264
1
  model_argmin->super.isa = &ccv_cnnp_argmin_isa;
3265
1
  model_argmin->super.input_size = 1;
3266
1
  model_argmin->super.outputs = &model_argmin->output;
3267
1
  model_argmin->super.output_size = 1;
3268
1
  ccv_cnnp_model_copy_name(&model_argmin->super, name);
3269
1
  model_argmin->axis = axis;
3270
1
  return (ccv_cnnp_model_t*)model_argmin;
3271
1
}
3272
3273
static ccv_cnnp_model_t* _ccv_cnnp_argmin_copy(const ccv_cnnp_model_t* const super, void* const context)
3274
0
{
3275
0
  const ccv_cnnp_model_argmin_t* const self = (const ccv_cnnp_model_argmin_t*)super;
3276
0
  return ccv_cnnp_argmin(self->axis, self->super.name);
3277
0
}
3278
3279
// MARK - Min Layer
3280
3281
typedef struct {
3282
  ccv_cnnp_model_t super;
3283
  ccv_nnc_tensor_symbol_t output;
3284
} ccv_cnnp_model_min_t;
3285
3286
static void _ccv_cnnp_min_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3287
1
{
3288
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_min_build] -\n");
3289
1
  assert(input_size == 2);
3290
1
  assert(output_size == 1);
3291
1
  ccv_nnc_tensor_param_t input_params[2];
3292
1
  int i;
3293
3
  for (i = 0; i < 2; 
i++2
)
3294
2
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
3295
1
  ccv_nnc_tensor_param_t output_params;
3296
1
  const ccv_nnc_cmd_t min = CMD_MIN_FORWARD();
3297
1
  ccv_nnc_hint_tensor_auto(min, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
3298
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3299
1
  ccv_nnc_graph_exec_symbol_new(graph, min, inputs, input_size, outputs, output_size, "min");
3300
1
}
3301
3302
static ccv_cnnp_model_t* _ccv_cnnp_min_copy(const ccv_cnnp_model_t* const self, void* const context);
3303
3304
static const ccv_cnnp_model_vtab_t ccv_cnnp_min_isa = {
3305
  .build = _ccv_cnnp_min_build,
3306
  .copy = _ccv_cnnp_min_copy,
3307
};
3308
3309
ccv_cnnp_model_t* ccv_cnnp_min(const char* const name)
3310
1
{
3311
1
  ccv_cnnp_model_min_t* const model_min = (ccv_cnnp_model_min_t*)cccalloc(1, sizeof(ccv_cnnp_model_min_t));
3312
1
  model_min->super.isa = &ccv_cnnp_min_isa;
3313
1
  model_min->super.input_size = 2;
3314
1
  model_min->super.outputs = &model_min->output;
3315
1
  model_min->super.output_size = 1;
3316
1
  ccv_cnnp_model_copy_name(&model_min->super, name);
3317
1
  return (ccv_cnnp_model_t*)model_min;
3318
1
}
3319
3320
static ccv_cnnp_model_t* _ccv_cnnp_min_copy(const ccv_cnnp_model_t* const super, void* const context)
3321
0
{
3322
0
  const ccv_cnnp_model_min_t* const self = (const ccv_cnnp_model_min_t*)super;
3323
0
  return ccv_cnnp_min(self->super.name);
3324
0
}
3325
3326
// MARK - Max Layer
3327
3328
typedef struct {
3329
  ccv_cnnp_model_t super;
3330
  ccv_nnc_tensor_symbol_t output;
3331
} ccv_cnnp_model_max_t;
3332
3333
static void _ccv_cnnp_max_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3334
1
{
3335
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_max_build] -\n");
3336
1
  assert(input_size == 2);
3337
1
  assert(output_size == 1);
3338
1
  ccv_nnc_tensor_param_t input_params[2];
3339
1
  int i;
3340
3
  for (i = 0; i < 2; 
i++2
)
3341
2
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
3342
1
  ccv_nnc_tensor_param_t output_params;
3343
1
  const ccv_nnc_cmd_t max = CMD_MAX_FORWARD();
3344
1
  ccv_nnc_hint_tensor_auto(max, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
3345
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3346
1
  ccv_nnc_graph_exec_symbol_new(graph, max, inputs, input_size, outputs, output_size, "max");
3347
1
}
3348
3349
static ccv_cnnp_model_t* _ccv_cnnp_max_copy(const ccv_cnnp_model_t* const self, void* const context);
3350
3351
static const ccv_cnnp_model_vtab_t ccv_cnnp_max_isa = {
3352
  .build = _ccv_cnnp_max_build,
3353
  .copy = _ccv_cnnp_max_copy,
3354
};
3355
3356
ccv_cnnp_model_t* ccv_cnnp_max(const char* const name)
3357
1
{
3358
1
  ccv_cnnp_model_max_t* const model_max = (ccv_cnnp_model_max_t*)cccalloc(1, sizeof(ccv_cnnp_model_max_t));
3359
1
  model_max->super.isa = &ccv_cnnp_max_isa;
3360
1
  model_max->super.input_size = 2;
3361
1
  model_max->super.outputs = &model_max->output;
3362
1
  model_max->super.output_size = 1;
3363
1
  ccv_cnnp_model_copy_name(&model_max->super, name);
3364
1
  return (ccv_cnnp_model_t*)model_max;
3365
1
}
3366
3367
static ccv_cnnp_model_t* _ccv_cnnp_max_copy(const ccv_cnnp_model_t* const super, void* const context)
3368
0
{
3369
0
  const ccv_cnnp_model_max_t* const self = (const ccv_cnnp_model_max_t*)super;
3370
0
  return ccv_cnnp_max(self->super.name);
3371
0
}
3372
3373
// MARK - LSTM Layer
3374
3375
typedef struct {
3376
  ccv_cnnp_model_t super;
3377
  int masked;
3378
  ccv_nnc_tensor_symbol_t output;
3379
  ccv_nnc_tensor_symbol_t weights;
3380
  ccv_nnc_tensor_symbol_t reserves;
3381
  ccv_nnc_cmd_param_t params;
3382
  ccv_nnc_graph_exec_symbol_t lstm;
3383
} ccv_cnnp_model_lstm_t;
3384
3385
static int _ccv_cnnp_lstm_weight_dim(int bidirectional, int num_layers, int input_size, int hidden_size, int proj_size, int bias)
3386
1
{
3387
1
  const int D = !!bidirectional + 1;
3388
1
  if (hidden_size == proj_size)
3389
1
    return (num_layers * (bias ? 8 : 
00
) + (num_layers - 1) * (hidden_size * 4 * D + hidden_size * 4) + input_size * 4 + hidden_size * 4) * D;
3390
0
  else
3391
0
    return (num_layers * (bias ? 8 : 0) + (num_layers - 1) * (proj_size * 4 * D + proj_size * 4) + (proj_size * 4 + input_size * 4) + num_layers * proj_size) * D;
3392
1
}
3393
3394
static void _ccv_cnnp_lstm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3395
1
{
3396
1
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3397
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_lstm_build] -\n");
3398
1
  assert(input_size == self->super.input_size);
3399
1
  assert(output_size == 1);
3400
1
  const int proj_size = self->params.rnn.proj_size == 0 ? self->params.rnn.hidden_size : 
self->params.rnn.proj_size0
;
3401
1
  ccv_nnc_tensor_param_t input_params[5];
3402
1
  input_params[0]= ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3403
1
  if (input_size == 2)
3404
1
    input_params[1] = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
3405
1
  input_params[4] = input_params[0];
3406
1
  memset(input_params[4].dim, 0, sizeof(input_params[4].dim));
3407
1
  const int x_nd = ccv_nnc_tensor_nd(input_params[0].dim);
3408
1
  const int feature_count = input_params[0].dim[x_nd - 1];
3409
1
  input_params[4].dim[0] = _ccv_cnnp_lstm_weight_dim(self->params.rnn.bidirectional, self->params.rnn.num_layers, feature_count, self->params.rnn.hidden_size, proj_size, self->params.rnn.bias);
3410
1
  input_params[4].dim[1] = self->params.rnn.hidden_size;
3411
1
  const ccv_nnc_cmd_t lstm = ccv_nnc_cmd(CCV_NNC_LSTM_FORWARD, 0, self->params, 0);
3412
1
  ccv_nnc_tensor_param_t output_params[4];
3413
1
  ccv_nnc_hint_tensor_auto(lstm, input_params, 5, ccv_nnc_no_hint, output_params, 4);
3414
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
3415
1
  if (!self->weights.graph)
3416
1
    self->weights = ccv_nnc_tensor_symbol_new(graph, input_params[4], "weights");
3417
1
  if (!self->reserves.graph)
3418
1
    self->reserves = ccv_nnc_tensor_symbol_new(graph, output_params[3], "reserves");
3419
1
  const ccv_nnc_tensor_symbol_t mask = input_size == 2 ? inputs[1] : 
NO_TENSOR_SYMBOL0
;
3420
1
  self->lstm = ccv_nnc_graph_exec_symbol_new(graph, lstm, TENSOR_SYMBOL_LIST(inputs[0], mask, NO_TENSOR_SYMBOL, NO_TENSOR_SYMBOL, self->weights), TENSOR_SYMBOL_LIST(outputs[0], NO_TENSOR_SYMBOL, NO_TENSOR_SYMBOL, self->reserves), "lstm");
3421
1
}
3422
3423
static void _ccv_cnnp_lstm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
3424
1
{
3425
1
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3426
1
  if (self->weights.graph)
3427
1
  {
3428
1
    const float stdv = 1.0 / sqrt(self->params.rnn.hidden_size);
3429
1
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-stdv, stdv), ccv_nnc_no_hint, 0, 0, self->weights);
3430
1
  }
3431
1
}
3432
3433
static void _ccv_cnnp_lstm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
3434
1
{
3435
1
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3436
1
  if (self->weights.graph)
3437
1
    add_to_array(parameters, self->weights, is_trainable);
3438
1
}
3439
3440
static void _ccv_cnnp_lstm_set_is_test(ccv_cnnp_model_t* const super, const int is_test, const ccv_cnnp_cmd_updater_f updater, void* const context)
3441
2
{
3442
2
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3443
2
  if (self->lstm.graph)
3444
2
  {
3445
2
    self->params.rnn.is_test = is_test;
3446
2
    updater(context, self->lstm, ccv_nnc_cmd(CCV_NNC_LSTM_FORWARD, 0, self->params, 0), ccv_nnc_no_hint);
3447
2
  }
3448
2
}
3449
3450
static ccv_cnnp_model_t* _ccv_cnnp_lstm_copy(const ccv_cnnp_model_t* const self, void* const context);
3451
3452
static const ccv_cnnp_model_vtab_t ccv_cnnp_lstm_isa = {
3453
  .build = _ccv_cnnp_lstm_build,
3454
  .init_states = _ccv_cnnp_lstm_init_states,
3455
  .add_to_parameter = _ccv_cnnp_lstm_add_to_parameter,
3456
  .copy = _ccv_cnnp_lstm_copy,
3457
  .set_is_test = _ccv_cnnp_lstm_set_is_test,
3458
};
3459
3460
ccv_cnnp_model_t* ccv_cnnp_lstm(const int masked, const int hidden_size, const int proj_size, const int num_layers, const int bias, const int batch_first, const int bidirectional, const float dropout, const int is_trainable, const char* const name)
3461
1
{
3462
1
  ccv_cnnp_model_lstm_t* const model_lstm = (ccv_cnnp_model_lstm_t*)cccalloc(1, sizeof(ccv_cnnp_model_lstm_t));
3463
1
  model_lstm->super.isa = &ccv_cnnp_lstm_isa;
3464
1
  model_lstm->super.input_size = masked ? 2 : 
10
;
3465
1
  model_lstm->super.outputs = &model_lstm->output;
3466
1
  model_lstm->super.output_size = 1;
3467
1
  model_lstm->super.is_trainable = is_trainable;
3468
1
  ccv_cnnp_model_copy_name(&model_lstm->super, name);
3469
1
  model_lstm->masked = masked;
3470
1
  model_lstm->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
3471
1
  model_lstm->weights.graph = 0;
3472
1
  model_lstm->params.rnn.hidden_size = hidden_size;
3473
1
  model_lstm->params.rnn.proj_size = proj_size;
3474
1
  model_lstm->params.rnn.num_layers = num_layers;
3475
1
  model_lstm->params.rnn.bias = bias;
3476
1
  model_lstm->params.rnn.batch_first = batch_first;
3477
1
  model_lstm->params.rnn.bidirectional = bidirectional;
3478
1
  model_lstm->params.rnn.dropout = dropout;
3479
1
  return (ccv_cnnp_model_t*)model_lstm;
3480
1
}
3481
3482
static ccv_cnnp_model_t* _ccv_cnnp_lstm_copy(const ccv_cnnp_model_t* const super, void* const context)
3483
0
{
3484
0
  const ccv_cnnp_model_lstm_t* const self = (const ccv_cnnp_model_lstm_t*)super;
3485
0
  return ccv_cnnp_lstm(self->masked, self->params.rnn.hidden_size, self->params.rnn.proj_size, self->params.rnn.num_layers, self->params.rnn.bias, self->params.rnn.batch_first, self->params.rnn.bidirectional, self->params.rnn.dropout, self->super.is_trainable, self->super.name);
3486
0
}
3487
3488
/// MARK - Datatype conversion layer.
3489
3490
typedef struct {
3491
  ccv_cnnp_model_t super;
3492
  ccv_nnc_tensor_symbol_t output;
3493
  int datatype;
3494
  int ref_to_last;
3495
} ccv_cnnp_model_datatype_conversion_t;
3496
3497
static void _ccv_cnnp_datatype_conversion_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3498
2
{
3499
2
  ccv_cnnp_model_datatype_conversion_t* const self = (ccv_cnnp_model_datatype_conversion_t*)super;
3500
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_datatype_conversion_build] -\n");
3501
2
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3502
2
  if (self->ref_to_last)
3503
1
  {
3504
1
    assert(input_size > 1);
3505
1
    const ccv_nnc_tensor_param_t last_params = ccv_nnc_tensor_symbol_params(graph, inputs[input_size - 1]);
3506
1
    params.datatype = last_params.datatype;
3507
1
  } else
3508
1
    params.datatype = self->datatype;
3509
2
  assert(output_size == 1);
3510
2
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3511
2
  ccv_nnc_graph_exec_symbol_new(graph, CMD_DATATYPE_CONVERSION_FORWARD(), inputs, output_size, outputs, output_size, 0);
3512
2
}
3513
3514
static ccv_cnnp_model_t* _ccv_cnnp_datatype_conversion_copy(const ccv_cnnp_model_t* const self, void* const context);
3515
3516
static const ccv_cnnp_model_vtab_t ccv_cnnp_datatype_conversion_isa = {
3517
  .build = _ccv_cnnp_datatype_conversion_build,
3518
  .copy = _ccv_cnnp_datatype_conversion_copy,
3519
};
3520
3521
ccv_cnnp_model_t* ccv_cnnp_datatype_conversion(const int datatype, const int ref_to_last, const char* const name)
3522
2
{
3523
2
  ccv_cnnp_model_datatype_conversion_t* const model_datatype_conversion = (ccv_cnnp_model_datatype_conversion_t*)cccalloc(1, sizeof(ccv_cnnp_model_datatype_conversion_t));
3524
2
  model_datatype_conversion->super.isa = &ccv_cnnp_datatype_conversion_isa;
3525
2
  model_datatype_conversion->super.input_size = 0;
3526
2
  model_datatype_conversion->super.outputs = &model_datatype_conversion->output;
3527
2
  model_datatype_conversion->super.output_size = 1;
3528
2
  model_datatype_conversion->datatype = datatype;
3529
2
  model_datatype_conversion->ref_to_last = ref_to_last;
3530
2
  ccv_cnnp_model_copy_name(&model_datatype_conversion->super, name);
3531
2
  return (ccv_cnnp_model_t*)model_datatype_conversion;
3532
2
}
3533
3534
static ccv_cnnp_model_t* _ccv_cnnp_datatype_conversion_copy(const ccv_cnnp_model_t* const super, void* const context)
3535
0
{
3536
0
  ccv_cnnp_model_datatype_conversion_t* const self = (ccv_cnnp_model_datatype_conversion_t*)super;
3537
0
  return ccv_cnnp_datatype_conversion(self->datatype, self->ref_to_last, self->super.name);
3538
0
}
3539
3540
/// MARK - Clamp layer.
3541
3542
typedef struct {
3543
  ccv_cnnp_model_t super;
3544
  ccv_nnc_tensor_symbol_t output;
3545
  float min;
3546
  float max;
3547
} ccv_cnnp_model_clamp_t;
3548
3549
static void _ccv_cnnp_clamp_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3550
0
{
3551
0
  ccv_cnnp_model_clamp_t* const self = (ccv_cnnp_model_clamp_t*)super;
3552
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_clamp_build] -\n");
3553
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3554
0
  assert(output_size == 1);
3555
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3556
0
  ccv_nnc_graph_exec_symbol_new(graph, CMD_CLAMP_FORWARD(self->min, self->max), inputs, output_size, outputs, output_size, 0);
3557
0
}
3558
3559
static ccv_cnnp_model_t* _ccv_cnnp_clamp_copy(const ccv_cnnp_model_t* const self, void* const context);
3560
3561
static const ccv_cnnp_model_vtab_t ccv_cnnp_clamp_isa = {
3562
  .build = _ccv_cnnp_clamp_build,
3563
  .copy = _ccv_cnnp_clamp_copy,
3564
};
3565
3566
ccv_cnnp_model_t* ccv_cnnp_clamp(const float min, const float max, const char* const name)
3567
0
{
3568
0
  ccv_cnnp_model_clamp_t* const model_clamp = (ccv_cnnp_model_clamp_t*)cccalloc(1, sizeof(ccv_cnnp_model_clamp_t));
3569
0
  model_clamp->super.isa = &ccv_cnnp_clamp_isa;
3570
0
  model_clamp->super.input_size = 0;
3571
0
  model_clamp->super.outputs = &model_clamp->output;
3572
0
  model_clamp->super.output_size = 1;
3573
0
  model_clamp->min = min;
3574
0
  model_clamp->max = max;
3575
0
  ccv_cnnp_model_copy_name(&model_clamp->super, name);
3576
0
  return (ccv_cnnp_model_t*)model_clamp;
3577
0
}
3578
3579
static ccv_cnnp_model_t* _ccv_cnnp_clamp_copy(const ccv_cnnp_model_t* const super, void* const context)
3580
0
{
3581
0
  ccv_cnnp_model_clamp_t* const self = (ccv_cnnp_model_clamp_t*)super;
3582
0
  return ccv_cnnp_clamp(self->min, self->max, self->super.name);
3583
0
}
3584
3585
// MARK - Parameter Layer
3586
3587
typedef struct {
3588
  ccv_cnnp_model_t super;
3589
  float init_bound;
3590
  ccv_nnc_tensor_symbol_t weights;
3591
  ccv_nnc_tensor_param_t weights_params;
3592
  ccv_nnc_tensor_symbol_t output;
3593
} ccv_cnnp_model_parameter_t;
3594
3595
static void _ccv_cnnp_parameter_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3596
1
{
3597
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_parameter_build] -\n");
3598
1
  assert(output_size == 1);
3599
1
  ccv_cnnp_model_parameter_t* const self = (ccv_cnnp_model_parameter_t*)super;
3600
1
  if (!self->weights.graph)
3601
1
    self->weights = ccv_nnc_tensor_symbol_new(graph, self->weights_params, "weights");
3602
1
  assert(self->weights.graph == graph);
3603
1
  outputs[0] = self->weights;
3604
1
}
3605
3606
static void _ccv_cnnp_parameter_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
3607
0
{
3608
0
  ccv_cnnp_model_parameter_t* const self = (ccv_cnnp_model_parameter_t*)super;
3609
0
  if (self->init_bound > 0)
3610
0
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-self->init_bound, self->init_bound), ccv_nnc_no_hint, 0, 0, self->weights);
3611
0
  else
3612
0
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->weights);
3613
0
}
3614
3615
static void _ccv_cnnp_parameter_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
3616
1
{
3617
1
  ccv_cnnp_model_parameter_t* const self = (ccv_cnnp_model_parameter_t*)super;
3618
1
  add_to_array(parameters, self->weights, is_trainable);
3619
1
}
3620
3621
static ccv_cnnp_model_t* _ccv_cnnp_parameter_copy(const ccv_cnnp_model_t* const super, void* const context);
3622
3623
static const ccv_cnnp_model_vtab_t ccv_cnnp_parameter_isa = {
3624
  .build = _ccv_cnnp_parameter_build,
3625
  .init_states = _ccv_cnnp_parameter_init_states,
3626
  .add_to_parameter = _ccv_cnnp_parameter_add_to_parameter,
3627
  .copy = _ccv_cnnp_parameter_copy,
3628
};
3629
3630
ccv_cnnp_model_t* ccv_cnnp_parameter(const ccv_nnc_tensor_param_t params, const float init_bound, const int is_trainable, const char* const name)
3631
1
{
3632
1
  ccv_cnnp_model_parameter_t* const model_parameter = (ccv_cnnp_model_parameter_t*)cccalloc(1, sizeof(ccv_cnnp_model_parameter_t));
3633
1
  model_parameter->super.isa = &ccv_cnnp_parameter_isa;
3634
1
  model_parameter->super.input_size = 0;
3635
1
  model_parameter->super.outputs = &model_parameter->output;
3636
1
  model_parameter->super.output_size = 1;
3637
1
  model_parameter->super.is_trainable = is_trainable;
3638
1
  ccv_cnnp_model_copy_name(&model_parameter->super, name);
3639
1
  model_parameter->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
3640
1
  model_parameter->weights.graph = 0;
3641
1
  model_parameter->weights_params = params;
3642
1
  return (ccv_cnnp_model_t*)model_parameter;
3643
1
}
3644
3645
static ccv_cnnp_model_t* _ccv_cnnp_parameter_copy(const ccv_cnnp_model_t* const super, void* const context)
3646
0
{
3647
0
  const ccv_cnnp_model_parameter_t* const self = (const ccv_cnnp_model_parameter_t*)super;
3648
0
  return ccv_cnnp_parameter(self->weights_params, self->init_bound, self->super.is_trainable, self->super.name);
3649
0
}
3650
3651
// MARK - Scalar Layer
3652
3653
typedef struct {
3654
  ccv_cnnp_model_t super;
3655
  int type;
3656
  int format;
3657
  int datatype;
3658
  float value;
3659
  ccv_nnc_tensor_symbol_t output;
3660
} ccv_cnnp_model_scalar_t;
3661
3662
static void _ccv_cnnp_scalar_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3663
2
{
3664
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_scalar_build] -\n");
3665
2
  assert(output_size == 1);
3666
2
  ccv_cnnp_model_scalar_t* const self = (ccv_cnnp_model_scalar_t*)super;
3667
2
  ccv_nnc_tensor_param_t params = {
3668
2
    .type = self->type,
3669
2
    .format = self->format,
3670
2
    .datatype = self->datatype,
3671
2
    .dim = {
3672
2
      1
3673
2
    }
3674
2
  };
3675
2
  if (input_size > 0)
3676
1
  {
3677
1
    ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3678
1
    params.type = input_params.type;
3679
1
    params.format = input_params.format;
3680
1
    params.datatype = input_params.datatype;
3681
1
  }
3682
2
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3683
2
  ccv_nnc_graph_exec_symbol_new(graph, CMD_SET_FORWARD(self->value), 0, 0, outputs, 1, 0);
3684
2
}
3685
3686
static ccv_cnnp_model_t* _ccv_cnnp_scalar_copy(const ccv_cnnp_model_t* const super, void* const context);
3687
3688
static const ccv_cnnp_model_vtab_t ccv_cnnp_scalar_isa = {
3689
  .build = _ccv_cnnp_scalar_build,
3690
  .copy = _ccv_cnnp_scalar_copy,
3691
};
3692
3693
ccv_cnnp_model_t* ccv_cnnp_scalar(const int type, const int format, const int datatype, const float value, const char* const name)
3694
2
{
3695
2
  ccv_cnnp_model_scalar_t* const model_scalar = (ccv_cnnp_model_scalar_t*)cccalloc(1, sizeof(ccv_cnnp_model_scalar_t));
3696
2
  model_scalar->super.isa = &ccv_cnnp_scalar_isa;
3697
2
  model_scalar->super.input_size = 0;
3698
2
  model_scalar->super.outputs = &model_scalar->output;
3699
2
  model_scalar->super.output_size = 1;
3700
2
  ccv_cnnp_model_copy_name(&model_scalar->super, name);
3701
2
  model_scalar->type = type;
3702
2
  model_scalar->format = format;
3703
2
  model_scalar->datatype = datatype;
3704
2
  model_scalar->value = value;
3705
2
  return (ccv_cnnp_model_t*)model_scalar;
3706
2
}
3707
3708
static ccv_cnnp_model_t* _ccv_cnnp_scalar_copy(const ccv_cnnp_model_t* const super, void* const context)
3709
0
{
3710
0
  const ccv_cnnp_model_scalar_t* const self = (const ccv_cnnp_model_scalar_t*)super;
3711
0
  return ccv_cnnp_scalar(self->type, self->format, self->datatype, self->value, self->super.name);
3712
0
}
3713
3714
// MARK - Variable Layer
3715
3716
typedef struct {
3717
  ccv_cnnp_model_t super;
3718
  ccv_nnc_tensor_param_t params;
3719
  ccv_nnc_tensor_symbol_t output;
3720
} ccv_cnnp_model_variable_t;
3721
3722
static void _ccv_cnnp_variable_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3723
1
{
3724
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_variable_build] -\n");
3725
1
  assert(input_size == 0);
3726
1
  assert(output_size == 1);
3727
1
  ccv_cnnp_model_variable_t* const self = (ccv_cnnp_model_variable_t*)super;
3728
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, self->params, 0);
3729
1
}
3730
3731
static ccv_cnnp_model_t* _ccv_cnnp_variable_copy(const ccv_cnnp_model_t* const super, void* const context);
3732
3733
static const ccv_cnnp_model_vtab_t ccv_cnnp_variable_isa = {
3734
  .build = _ccv_cnnp_variable_build,
3735
  .copy = _ccv_cnnp_variable_copy,
3736
};
3737
3738
ccv_cnnp_model_t* ccv_cnnp_variable(const ccv_nnc_tensor_param_t params, const char* const name)
3739
1
{
3740
1
  ccv_cnnp_model_variable_t* const model_variable = (ccv_cnnp_model_variable_t*)cccalloc(1, sizeof(ccv_cnnp_model_variable_t));
3741
1
  model_variable->super.isa = &ccv_cnnp_variable_isa;
3742
1
  model_variable->super.input_size = 0;
3743
1
  model_variable->super.outputs = &model_variable->output;
3744
1
  model_variable->super.output_size = 1;
3745
1
  ccv_cnnp_model_copy_name(&model_variable->super, name);
3746
1
  model_variable->params = params;
3747
1
  return (ccv_cnnp_model_t*)model_variable;
3748
1
}
3749
3750
static ccv_cnnp_model_t* _ccv_cnnp_variable_copy(const ccv_cnnp_model_t* const super, void* const context)
3751
0
{
3752
0
  const ccv_cnnp_model_variable_t* const self = (const ccv_cnnp_model_variable_t*)super;
3753
0
  return ccv_cnnp_variable(self->params, self->super.name);
3754
0
}
3755
3756
// MARK - Move Layer
3757
3758
typedef struct {
3759
  ccv_cnnp_model_t super;
3760
  ccv_nnc_tensor_symbol_t output;
3761
} ccv_cnnp_model_move_t;
3762
3763
static void _ccv_cnnp_move_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3764
3
{
3765
3
  PRINT(CCV_CLI_VERBOSE, "[cnnp_move_build] -\n");
3766
3
  assert(input_size == 2);
3767
3
  assert(output_size == 1);
3768
3
  outputs[0] = inputs[1];
3769
3
  ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, 1, outputs, 1, "move");
3770
3
}
3771
3772
static ccv_cnnp_model_t* _ccv_cnnp_move_copy(const ccv_cnnp_model_t* const super, void* const context);
3773
3774
static const ccv_cnnp_model_vtab_t ccv_cnnp_move_isa = {
3775
  .build = _ccv_cnnp_move_build,
3776
  .copy = _ccv_cnnp_move_copy,
3777
};
3778
3779
ccv_cnnp_model_t* ccv_cnnp_move(const char* const name)
3780
3
{
3781
3
  ccv_cnnp_model_move_t* const model_move = (ccv_cnnp_model_move_t*)cccalloc(1, sizeof(ccv_cnnp_model_move_t));
3782
3
  model_move->super.isa = &ccv_cnnp_move_isa;
3783
3
  model_move->super.input_size = 2;
3784
3
  model_move->super.outputs = &model_move->output;
3785
3
  model_move->super.output_size = 1;
3786
3
  ccv_cnnp_model_copy_name(&model_move->super, name);
3787
3
  return (ccv_cnnp_model_t*)model_move;
3788
3
}
3789
3790
static ccv_cnnp_model_t* _ccv_cnnp_move_copy(const ccv_cnnp_model_t* const super, void* const context)
3791
0
{
3792
0
  const ccv_cnnp_model_move_t* const self = (const ccv_cnnp_model_move_t*)super;
3793
0
  return ccv_cnnp_move(self->super.name);
3794
0
}
3795
3796
// MARK - "Making" Contiguous Layer
3797
3798
typedef struct {
3799
  ccv_cnnp_model_t super;
3800
  ccv_nnc_tensor_symbol_t output;
3801
} ccv_cnnp_model_contiguous_t;
3802
3803
static void _ccv_cnnp_contiguous_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3804
5
{
3805
5
  PRINT(CCV_CLI_VERBOSE, "[cnnp_contiguous_build] -\n");
3806
5
  assert(input_size == 1);
3807
5
  assert(output_size == 1);
3808
5
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3809
5
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
3810
5
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
3811
0
  {
3812
0
    outputs[0] = inputs[0];
3813
0
    return;
3814
0
  }
3815
  // Otherwise, we need to check its stride to know if it is contiguous.
3816
5
  int old_stride[CCV_NNC_MAX_DIM_ALLOC];
3817
5
  ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], 0, old_stride);
3818
  // We identify permute by checking if the stride is not in descending order.
3819
  // This also covered "permute" through reshape, rather than using ccv_cnnp_permute directly.
3820
5
  if (ccv_nnc_is_tensor_stride_packed(old_stride, params.dim))
3821
2
  {
3822
2
    outputs[0] = inputs[0];
3823
2
    return;
3824
2
  }
3825
3
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3826
3
  ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, 1, outputs, 1, "contiguous");
3827
3
  ccv_nnc_graph_exec_symbol_set_flags(graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
3828
3
}
3829
3830
static ccv_cnnp_model_t* _ccv_cnnp_contiguous_copy(const ccv_cnnp_model_t* const super, void* const context);
3831
3832
static const ccv_cnnp_model_vtab_t ccv_cnnp_contiguous_isa = {
3833
  .build = _ccv_cnnp_contiguous_build,
3834
  .copy = _ccv_cnnp_contiguous_copy,
3835
};
3836
3837
ccv_cnnp_model_t* ccv_cnnp_contiguous(const char* const name)
3838
5
{
3839
5
  ccv_cnnp_model_contiguous_t* const model_contiguous = (ccv_cnnp_model_contiguous_t*)cccalloc(1, sizeof(ccv_cnnp_model_contiguous_t));
3840
5
  model_contiguous->super.isa = &ccv_cnnp_contiguous_isa;
3841
5
  model_contiguous->super.input_size = 1;
3842
5
  model_contiguous->super.outputs = &model_contiguous->output;
3843
5
  model_contiguous->super.output_size = 1;
3844
5
  ccv_cnnp_model_copy_name(&model_contiguous->super, name);
3845
5
  return (ccv_cnnp_model_t*)model_contiguous;
3846
5
}
3847
3848
static ccv_cnnp_model_t* _ccv_cnnp_contiguous_copy(const ccv_cnnp_model_t* const super, void* const context)
3849
0
{
3850
0
  const ccv_cnnp_model_contiguous_t* const self = (const ccv_cnnp_model_contiguous_t*)super;
3851
0
  return ccv_cnnp_contiguous(self->super.name);
3852
0
}
3853
3854
// MARK - Scaled-Dot Product Attention Layer
3855
3856
typedef struct {
3857
  ccv_cnnp_model_t super;
3858
  ccv_nnc_tensor_symbol_t output;
3859
  ccv_nnc_tensor_symbol_t weights;
3860
  ccv_nnc_tensor_symbol_t bias;
3861
  float scale;
3862
  int is_causal;
3863
  int has_attn_mask;
3864
  int flags;
3865
  int fused_unify_head_weights;
3866
  int no_bias;
3867
} ccv_cnnp_model_scaled_dot_product_attention_t;
3868
3869
static void _ccv_cnnp_scaled_dot_product_attention_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3870
3
{
3871
3
  PRINT(CCV_CLI_VERBOSE, "[cnnp_scaled_dot_product_attention_build] -\n");
3872
3
  assert(input_size == 3 || input_size == 4);
3873
3
  assert(output_size == 1);
3874
3
  ccv_cnnp_model_scaled_dot_product_attention_t* const self = (ccv_cnnp_model_scaled_dot_product_attention_t*)super;
3875
3
  const ccv_nnc_tensor_param_t q_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3876
3
  const ccv_nnc_tensor_param_t k_params = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
3877
3
  const ccv_nnc_tensor_param_t v_params = ccv_nnc_tensor_symbol_params(graph, inputs[2]);
3878
3
  const int v_nd = ccv_nnc_tensor_nd(v_params.dim);
3879
3
  assert(v_nd == 3 || v_nd == 4);
3880
3
  const int hEv = (v_nd == 3 ? 
10
: v_params.dim[2]) * v_params.dim[v_nd - 1];
3881
3
  ccv_nnc_tensor_param_t weights_params = q_params;
3882
3
  memset(weights_params.dim, 0, sizeof(weights_params.dim));
3883
3
  weights_params.dim[0] = hEv;
3884
3
  weights_params.dim[1] = hEv;
3885
3
  ccv_nnc_tensor_param_t bias_params = q_params;
3886
3
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
3887
3
  bias_params.dim[0] = hEv;
3888
3
  ccv_nnc_cmd_t cmd = {0};
3889
3
  cmd.cmd = CCV_NNC_SCALED_DOT_PRODUCT_ATTENTION_FORWARD;
3890
3
  cmd.info.scaled_dot_product_attention.scale = self->scale;
3891
3
  cmd.info.scaled_dot_product_attention.is_causal = self->is_causal;
3892
3
  cmd.info.scaled_dot_product_attention.flags = self->flags;
3893
3
  ccv_nnc_tensor_param_t output_params[3];
3894
3
  ccv_nnc_tensor_symbol_t output;
3895
3
  ccv_nnc_tensor_symbol_t saved_softmax_lse;
3896
3
  ccv_nnc_tensor_symbol_t saved_v_proj = NO_TENSOR_SYMBOL;
3897
3
  ccv_nnc_tensor_symbol_t attn_mask = NO_TENSOR_SYMBOL;
3898
3
  ccv_nnc_tensor_symbol_t weights = NO_TENSOR_SYMBOL;
3899
3
  ccv_nnc_tensor_symbol_t bias = NO_TENSOR_SYMBOL;
3900
3
  if (self->has_attn_mask)
3901
1
    attn_mask = inputs[3];
3902
3
  if (self->fused_unify_head_weights)
3903
1
  {
3904
1
    if (!self->weights.graph)
3905
1
      self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
3906
1
    weights = self->weights;
3907
1
    if (!self->no_bias)
3908
1
    {
3909
1
      if (!self->bias.graph)
3910
1
        self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
3911
1
      bias = self->bias;
3912
1
    }
3913
1
    ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
3914
1
        q_params,
3915
1
        k_params,
3916
1
        v_params,
3917
1
        (ccv_nnc_tensor_param_t){},
3918
1
        weights_params,
3919
1
        bias_params,
3920
1
      }, 6, ccv_nnc_no_hint, output_params, 3);
3921
1
    output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
3922
1
    saved_softmax_lse = ccv_nnc_tensor_symbol_new(graph, output_params[1], 0);
3923
1
    saved_v_proj = ccv_nnc_tensor_symbol_new(graph, output_params[2], 0);
3924
2
  } else {
3925
2
    ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
3926
2
        q_params,
3927
2
        k_params,
3928
2
        v_params,
3929
2
      }, 3, ccv_nnc_no_hint, output_params, 2);
3930
2
    output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
3931
2
    saved_softmax_lse = ccv_nnc_tensor_symbol_new(graph, output_params[1], 0);
3932
2
  }
3933
3
  ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], inputs[1], inputs[2], attn_mask, weights, bias), TENSOR_SYMBOL_LIST(output, saved_softmax_lse, saved_v_proj), "scaled_dot_product_attention");
3934
3
  outputs[0] = output;
3935
3
}
3936
3937
static void _ccv_cnnp_scaled_dot_product_attention_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
3938
0
{
3939
0
  ccv_cnnp_model_scaled_dot_product_attention_t* const self = (ccv_cnnp_model_scaled_dot_product_attention_t*)super;
3940
0
  if (self->weights.graph)
3941
0
  {
3942
0
    assert(self->fused_unify_head_weights);
3943
0
    const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
3944
0
    const int c = weight_params.dim[1];
3945
0
    const float std = sqrtf(2) / sqrtf(c);
3946
0
    const float bound = sqrtf(3) * std;
3947
0
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
3948
0
    if (self->bias.graph)
3949
0
      initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
3950
0
  }
3951
0
}
3952
3953
static void _ccv_cnnp_scaled_dot_product_attention_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
3954
1
{
3955
1
  ccv_cnnp_model_scaled_dot_product_attention_t* const self = (ccv_cnnp_model_scaled_dot_product_attention_t*)super;
3956
1
  if (self->weights.graph)
3957
1
  {
3958
1
    assert(self->fused_unify_head_weights);
3959
1
    add_to_array(parameters, self->weights, is_trainable);
3960
1
    if (self->bias.graph)
3961
1
      add_to_array(parameters, self->bias, is_trainable);
3962
1
  }
3963
1
}
3964
3965
static ccv_cnnp_model_t* _ccv_cnnp_scaled_dot_product_attention_copy(const ccv_cnnp_model_t* const super, void* const context);
3966
3967
static const ccv_cnnp_model_vtab_t ccv_cnnp_scaled_dot_product_attention_isa = {
3968
  .build = _ccv_cnnp_scaled_dot_product_attention_build,
3969
  .copy = _ccv_cnnp_scaled_dot_product_attention_copy,
3970
};
3971
3972
static const ccv_cnnp_model_vtab_t ccv_cnnp_scaled_dot_product_attention_fused_isa = {
3973
  .build = _ccv_cnnp_scaled_dot_product_attention_build,
3974
  .init_states = _ccv_cnnp_scaled_dot_product_attention_init_states,
3975
  .add_to_parameter = _ccv_cnnp_scaled_dot_product_attention_add_to_parameter,
3976
  .copy = _ccv_cnnp_scaled_dot_product_attention_copy,
3977
};
3978
3979
ccv_cnnp_model_t* ccv_cnnp_scaled_dot_product_attention(const float scale, const int is_causal, const int has_attn_mask, const int flags, const int fused_unify_head_weights, const int no_bias, const int is_trainable, const char* const name)
3980
3
{
3981
3
  ccv_cnnp_model_scaled_dot_product_attention_t* const model_scaled_dot_product_attention = (ccv_cnnp_model_scaled_dot_product_attention_t*)cccalloc(1, sizeof(ccv_cnnp_model_scaled_dot_product_attention_t));
3982
3
  model_scaled_dot_product_attention->super.isa = fused_unify_head_weights ? 
&ccv_cnnp_scaled_dot_product_attention_fused_isa1
:
&ccv_cnnp_scaled_dot_product_attention_isa2
;
3983
3
  model_scaled_dot_product_attention->super.input_size = has_attn_mask ? 
41
:
32
;
3984
3
  model_scaled_dot_product_attention->super.outputs = &model_scaled_dot_product_attention->output;
3985
3
  model_scaled_dot_product_attention->super.output_size = 1;
3986
3
  model_scaled_dot_product_attention->super.is_trainable = is_trainable;
3987
3
  ccv_cnnp_model_copy_name(&model_scaled_dot_product_attention->super, name);
3988
3
  model_scaled_dot_product_attention->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
3989
3
  model_scaled_dot_product_attention->weights.graph = 0;
3990
3
  model_scaled_dot_product_attention->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
3991
3
  model_scaled_dot_product_attention->bias.graph = 0;
3992
3
  model_scaled_dot_product_attention->scale = scale;
3993
3
  model_scaled_dot_product_attention->is_causal = is_causal;
3994
3
  model_scaled_dot_product_attention->has_attn_mask = has_attn_mask;
3995
3
  model_scaled_dot_product_attention->flags = flags;
3996
3
  model_scaled_dot_product_attention->fused_unify_head_weights = fused_unify_head_weights;
3997
3
  model_scaled_dot_product_attention->no_bias = no_bias;
3998
3
  return (ccv_cnnp_model_t*)model_scaled_dot_product_attention;
3999
3
}
4000
4001
static ccv_cnnp_model_t* _ccv_cnnp_scaled_dot_product_attention_copy(const ccv_cnnp_model_t* const super, void* const context)
4002
0
{
4003
0
  const ccv_cnnp_model_scaled_dot_product_attention_t* const self = (const ccv_cnnp_model_scaled_dot_product_attention_t*)super;
4004
0
  return ccv_cnnp_scaled_dot_product_attention(self->scale, self->is_causal, self->has_attn_mask, self->flags, self->fused_unify_head_weights, self->no_bias, self->super.is_trainable, self->super.name);
4005
0
}